Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Delta Between Two Patch Sets: lib/abp2blocklist.js

Issue 29426594: Issue 3673 - Merge closely matching rules (Closed) Base URL: https://hg.adblockplus.org/abp2blocklist
Left Patch Set: Rebase with minor changes Created July 20, 2017, 3:45 p.m.
Right Patch Set: Rebase Created July 28, 2017, 1:31 p.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
Left: Side by side diff | Download
Right: Side by side diff | Download
« no previous file with change/comment | « abp2blocklist.js ('k') | test/abp2blocklist.js » ('j') | no next file with change/comment »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
LEFTRIGHT
1 /* 1 /*
2 * This file is part of Adblock Plus <https://adblockplus.org/>, 2 * This file is part of Adblock Plus <https://adblockplus.org/>,
3 * Copyright (C) 2006-2017 eyeo GmbH 3 * Copyright (C) 2006-2017 eyeo GmbH
4 * 4 *
5 * Adblock Plus is free software: you can redistribute it and/or modify 5 * Adblock Plus is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 3 as 6 * it under the terms of the GNU General Public License version 3 as
7 * published by the Free Software Foundation. 7 * published by the Free Software Foundation.
8 * 8 *
9 * Adblock Plus is distributed in the hope that it will be useful, 9 * Adblock Plus is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
(...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after
55 55
56 // If this looks like Node.js, call process.nextTick, otherwise call 56 // If this looks like Node.js, call process.nextTick, otherwise call
57 // setTimeout. 57 // setTimeout.
58 if (typeof process != "undefined") 58 if (typeof process != "undefined")
59 process.nextTick(call); 59 process.nextTick(call);
60 else 60 else
61 setTimeout(call, 0); 61 setTimeout(call, 0);
62 }); 62 });
63 } 63 }
64 64
65 function async(funcs) 65 function async(callees, mapFunction)
66 { 66 {
67 if (!Array.isArray(funcs)) 67 if (!(Symbol.iterator in callees))
68 funcs = Array.from(arguments); 68 callees = [callees];
69 69
70 let lastPause = Date.now(); 70 let lastPause = Date.now();
71 71 let index = 0;
72 return funcs.reduce((promise, next) => promise.then(() => 72
73 { 73 let promise = Promise.resolve();
74 // If it has been 100ms or longer since the last call, take a pause. This 74
75 // keeps the browser from freezing up. 75 for (let next of callees)
76 let now = Date.now(); 76 {
77 if (now - lastPause >= 100) 77 let currentIndex = index;
78 { 78
79 lastPause = now; 79 promise = promise.then(() =>
80 return callLater(next); 80 {
81 } 81 if (mapFunction)
82 82 next = mapFunction(next, currentIndex);
83 return next(); 83
84 }), 84 // If it has been 100ms or longer since the last call, take a pause. This
85 Promise.resolve()); 85 // keeps the browser from freezing up.
86 let now = Date.now();
87 if (now - lastPause >= 100)
88 {
89 lastPause = now;
90 return callLater(next);
91 }
92
93 return next();
94 });
95
96 index++;
97 }
98
99 return promise;
86 } 100 }
87 101
88 function parseDomains(domains, included, excluded) 102 function parseDomains(domains, included, excluded)
89 { 103 {
90 for (let domain in domains) 104 for (let domain in domains)
91 { 105 {
92 if (domain != "") 106 if (domain != "")
93 { 107 {
94 let enabled = domains[domain]; 108 let enabled = domains[domain];
95 domain = punycode.toASCII(domain.toLowerCase()); 109 domain = punycode.toASCII(domain.toLowerCase());
96 110
97 if (!enabled) 111 if (!enabled)
98 excluded.push(domain); 112 excluded.push(domain);
99 else if (!domains[""]) 113 else if (!domains[""])
100 included.push(domain); 114 included.push(domain);
101 } 115 }
102 } 116 }
103 } 117 }
104 118
105 function escapeRegExp(s) 119 function escapeRegExp(s)
106 { 120 {
107 return s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); 121 return s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
108 } 122 }
109 123
110 function matchDomain(domain) 124 function matchDomain(domain)
111 { 125 {
126 if (!domain)
127 return "^https?://";
128
112 return "^https?://([^/:]*\\.)?" + escapeRegExp(domain).toLowerCase() + "[/:]"; 129 return "^https?://([^/:]*\\.)?" + escapeRegExp(domain).toLowerCase() + "[/:]";
113 } 130 }
114 131
115 function getURLSchemes(contentType) 132 function getURLSchemes(contentType)
116 { 133 {
117 // If the given content type includes all supported URL schemes, simply 134 // If the given content type includes all supported URL schemes, simply
118 // return a single generic URL scheme pattern. This minimizes the size of the 135 // return a single generic URL scheme pattern. This minimizes the size of the
119 // generated rule set. The downside to this is that it will also match 136 // generated rule set. The downside to this is that it will also match
120 // schemes that we do not want to match (e.g. "ftp://"), but this can be 137 // schemes that we do not want to match (e.g. "ftp://"), but this can be
121 // mitigated by adding exceptions for those schemes. 138 // mitigated by adding exceptions for those schemes.
(...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after
160 if (parsed.justHostname) 177 if (parsed.justHostname)
161 domains.add(parsed.hostname); 178 domains.add(parsed.hostname);
162 } 179 }
163 return domains; 180 return domains;
164 } 181 }
165 182
166 function convertElemHideFilter(filter, elemhideSelectorExceptions) 183 function convertElemHideFilter(filter, elemhideSelectorExceptions)
167 { 184 {
168 let included = []; 185 let included = [];
169 let excluded = []; 186 let excluded = [];
170 let rules = [];
171 187
172 parseDomains(filter.domains, included, excluded); 188 parseDomains(filter.domains, included, excluded);
173 189
174 if (excluded.length == 0 && !(filter.selector in elemhideSelectorExceptions)) 190 if (excluded.length == 0 && !(filter.selector in elemhideSelectorExceptions))
175 return {matchDomains: included.map(matchDomain), selector: filter.selector}; 191 return {matchDomains: included, selector: filter.selector};
176 } 192 }
177 193
178 /** 194 /**
179 * Parse the given filter "regexpSource" string. Producing a regular expression, 195 * Parse the given filter "regexpSource" string. Producing a regular expression,
180 * extracting the hostname (if any), deciding if the regular expression is safe 196 * extracting the hostname (if any), deciding if the regular expression is safe
181 * to be converted + matched as lower case and noting if the source contains 197 * to be converted + matched as lower case and noting if the source contains
182 * anything after the hostname.) 198 * anything after the hostname.)
183 * 199 *
184 * @param {string} text regexpSource property of a filter 200 * @param {string} text regexpSource property of a filter
185 * @param {string} urlScheme The URL scheme to use in the regular expression 201 * @param {string} urlScheme The URL scheme to use in the regular expression
(...skipping 431 matching lines...) Expand 10 before | Expand all | Expand 10 after
617 { 633 {
618 newSelector.push(selector.substring(i, pos.start)); 634 newSelector.push(selector.substring(i, pos.start));
619 newSelector.push('[id=', selector.substring(pos.start + 1, pos.end), ']'); 635 newSelector.push('[id=', selector.substring(pos.start + 1, pos.end), ']');
620 i = pos.end; 636 i = pos.end;
621 } 637 }
622 newSelector.push(selector.substring(i)); 638 newSelector.push(selector.substring(i));
623 639
624 return newSelector.join(""); 640 return newSelector.join("");
625 } 641 }
626 642
627 function addCSSRules(rules, selectors, matchDomain, exceptionDomains) 643 function addCSSRules(rules, selectors, domain, exceptionDomains)
628 { 644 {
629 let unlessDomain = exceptionDomains.size > 0 ? [] : null; 645 let unlessDomain = exceptionDomains.size > 0 ? [] : null;
630 646
631 exceptionDomains.forEach(name => unlessDomain.push("*" + name)); 647 exceptionDomains.forEach(name =>
648 {
649 // For domain-specific filters, include the exception domains only if
650 // they're subdomains of the given domain.
651 if (!domain || name.substr(-domain.length - 1) == "." + domain)
652 unlessDomain.push("*" + name);
653 });
632 654
633 while (selectors.length) 655 while (selectors.length)
634 { 656 {
635 let selector = selectors.splice(0, selectorLimit).join(", "); 657 let selector = selectors.splice(0, selectorLimit).join(", ");
636 658
637 // As of Safari 9.0 element IDs are matched as lowercase. We work around 659 // As of Safari 9.0 element IDs are matched as lowercase. We work around
638 // this by converting to the attribute format [id="elementID"] 660 // this by converting to the attribute format [id="elementID"]
639 selector = convertIDSelectorsToAttributeSelectors(selector); 661 selector = convertIDSelectorsToAttributeSelectors(selector);
640 662
641 let rule = { 663 let rule = {
642 trigger: {"url-filter": matchDomain, 664 trigger: {"url-filter": matchDomain(domain),
643 "url-filter-is-case-sensitive": true}, 665 "url-filter-is-case-sensitive": true},
644 action: {type: "css-display-none", 666 action: {type: "css-display-none",
645 selector: selector} 667 selector: selector}
646 }; 668 };
647 669
648 if (unlessDomain) 670 if (unlessDomain)
649 rule.trigger["unless-domain"] = unlessDomain; 671 rule.trigger["unless-domain"] = unlessDomain;
650 672
651 rules.push(rule); 673 rules.push(rule);
652 } 674 }
(...skipping 122 matching lines...) Expand 10 before | Expand all | Expand 10 after
775 return edit; 797 return edit;
776 } 798 }
777 799
778 function eliminateRedundantRulesByURLFilter(rulesInfo, exhaustive) 800 function eliminateRedundantRulesByURLFilter(rulesInfo, exhaustive)
779 { 801 {
780 const heuristicRange = 1000; 802 const heuristicRange = 1000;
781 803
782 let ol = rulesInfo.length; 804 let ol = rulesInfo.length;
783 805
784 // Throw out obviously redundant rules. 806 // Throw out obviously redundant rules.
785 return async(rulesInfo.map((ruleInfo, index) => () => 807 return async(rulesInfo, (ruleInfo, index) => () =>
786 { 808 {
787 // If this rule is already marked as redundant, don't bother comparing it 809 // If this rule is already marked as redundant, don't bother comparing it
788 // with other rules. 810 // with other rules.
789 if (rulesInfo[index].redundant) 811 if (rulesInfo[index].redundant)
790 return; 812 return;
791 813
792 let limit = exhaustive ? rulesInfo.length : 814 let limit = exhaustive ? rulesInfo.length :
793 Math.min(index + heuristicRange, rulesInfo.length); 815 Math.min(index + heuristicRange, rulesInfo.length);
794 816
795 for (let i = index, j = i + 1; j < limit; j++) 817 for (let i = index, j = i + 1; j < limit; j++)
(...skipping 12 matching lines...) Expand all
808 { 830 {
809 rulesInfo[i].redundant = true; 831 rulesInfo[i].redundant = true;
810 break; 832 break;
811 } 833 }
812 } 834 }
813 else if (target.substring(0, source.length) == source) 835 else if (target.substring(0, source.length) == source)
814 { 836 {
815 rulesInfo[j].redundant = true; 837 rulesInfo[j].redundant = true;
816 } 838 }
817 } 839 }
818 })) 840 })
819 .then(() => rulesInfo.filter(ruleInfo => !ruleInfo.redundant)); 841 .then(() => rulesInfo.filter(ruleInfo => !ruleInfo.redundant));
820 } 842 }
821 843
822 function findMatchesForRuleByURLFilter(rulesInfo, index, exhaustive) 844 function findMatchesForRuleByURLFilter(rulesInfo, index, exhaustive)
823 { 845 {
824 // Closely matching rules are likely to be within a certain range. We only 846 // Closely matching rules are likely to be within a certain range. We only
825 // look for matches within this range by default. If we increase this value, 847 // look for matches within this range by default. If we increase this value,
826 // it can give us more matches and a smaller resulting rule set, but possibly 848 // it can give us more matches and a smaller resulting rule set, but possibly
827 // at a significant performance cost. 849 // at a significant performance cost.
828 // 850 //
(...skipping 200 matching lines...) Expand 10 before | Expand all | Expand 10 after
1029 rule.trigger["url-filter"] = urlFilter; 1051 rule.trigger["url-filter"] = urlFilter;
1030 1052
1031 // Mark this rule as one that has had other rules merged into it. 1053 // Mark this rule as one that has had other rules merged into it.
1032 ruleInfo.mergedInto = true; 1054 ruleInfo.mergedInto = true;
1033 } 1055 }
1034 } 1056 }
1035 } 1057 }
1036 1058
1037 function mergeRulesByURLFilter(rulesInfo, exhaustive) 1059 function mergeRulesByURLFilter(rulesInfo, exhaustive)
1038 { 1060 {
1039 return async(rulesInfo.map((ruleInfo, index) => () => 1061 return async(rulesInfo, (ruleInfo, index) => () =>
1040 findMatchesForRuleByURLFilter(rulesInfo, index, exhaustive) 1062 findMatchesForRuleByURLFilter(rulesInfo, index, exhaustive)
1041 )) 1063 )
1042 .then(() => mergeCandidateRulesByURLFilter(rulesInfo)); 1064 .then(() => mergeCandidateRulesByURLFilter(rulesInfo));
1043 } 1065 }
1044 1066
1045 function mergeRulesByArrayProperty(rulesInfo, propertyType, property) 1067 function mergeRulesByArrayProperty(rulesInfo, propertyType, property)
1046 { 1068 {
1047 if (rulesInfo.length <= 1) 1069 if (rulesInfo.length <= 1)
1048 return; 1070 return;
1049 1071
1050 let valueSet = new Set(rulesInfo[0].rule[propertyType][property]); 1072 let valueSet = new Set(rulesInfo[0].rule[propertyType][property]);
1051 1073
(...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after
1091 1113
1092 function mergeRules(rules, exhaustive) 1114 function mergeRules(rules, exhaustive)
1093 { 1115 {
1094 let rulesInfo = rules.map(rule => ({rule})); 1116 let rulesInfo = rules.map(rule => ({rule}));
1095 1117
1096 let arrayPropertiesToMergeBy = ["resource-type", "if-domain"]; 1118 let arrayPropertiesToMergeBy = ["resource-type", "if-domain"];
1097 1119
1098 return async(() => 1120 return async(() =>
1099 { 1121 {
1100 let map = groupRulesByMergeableProperty(rulesInfo, "trigger", "url-filter"); 1122 let map = groupRulesByMergeableProperty(rulesInfo, "trigger", "url-filter");
1101 return async(Array.from(map.values()).map(mergeableRulesInfo => () => 1123 return async(map.values(), mergeableRulesInfo => () =>
kzar 2017/07/25 12:18:53 If async always took a sequence as the first argum
Manish Jethani 2017/07/28 09:17:36 That's a good suggestion. If the async function t
1102 eliminateRedundantRulesByURLFilter(mergeableRulesInfo, exhaustive) 1124 eliminateRedundantRulesByURLFilter(mergeableRulesInfo, exhaustive)
1103 .then(rulesInfo => mergeRulesByURLFilter(rulesInfo, exhaustive)) 1125 .then(rulesInfo => mergeRulesByURLFilter(rulesInfo, exhaustive))
1104 )) 1126 )
1105 .then(() => 1127 .then(() =>
1106 { 1128 {
1107 // Filter out rules that are redundant or have been merged into other 1129 // Filter out rules that are redundant or have been merged into other
1108 // rules. 1130 // rules.
1109 rulesInfo = rulesInfo.filter(ruleInfo => !ruleInfo.redundant && 1131 rulesInfo = rulesInfo.filter(ruleInfo => !ruleInfo.redundant &&
1110 !ruleInfo.merged); 1132 !ruleInfo.merged);
1111 }); 1133 });
1112 }) 1134 })
1113 .then(() => async(arrayPropertiesToMergeBy.map(arrayProperty => () => 1135 .then(() => async(arrayPropertiesToMergeBy, arrayProperty => () =>
1114 { 1136 {
1115 let map = groupRulesByMergeableProperty(rulesInfo, "trigger", 1137 let map = groupRulesByMergeableProperty(rulesInfo, "trigger",
1116 arrayProperty); 1138 arrayProperty);
1117 return async(Array.from(map.values()).map(mergeableRulesInfo => () => 1139 return async(map.values(), mergeableRulesInfo => () =>
1118 mergeRulesByArrayProperty(mergeableRulesInfo, "trigger", arrayProperty) 1140 mergeRulesByArrayProperty(mergeableRulesInfo, "trigger", arrayProperty)
1119 )) 1141 )
1120 .then(() => 1142 .then(() =>
1121 { 1143 {
1122 rulesInfo = rulesInfo.filter(ruleInfo => !ruleInfo.merged); 1144 rulesInfo = rulesInfo.filter(ruleInfo => !ruleInfo.merged);
1123 }); 1145 });
1124 }))) 1146 }))
1125 .then(() => rulesInfo.map(ruleInfo => ruleInfo.rule)); 1147 .then(() => rulesInfo.map(ruleInfo => ruleInfo.rule));
1126 } 1148 }
1127 1149
1128 let ContentBlockerList = 1150 let ContentBlockerList =
1129 /** 1151 /**
1130 * Create a new Adblock Plus filter to content blocker list converter 1152 * Create a new Adblock Plus filter to content blocker list converter
1131 * 1153 *
1132 * @param {object} options Options for content blocker list generation 1154 * @param {object} options Options for content blocker list generation
1133 * 1155 *
1134 * @constructor 1156 * @constructor
(...skipping 109 matching lines...) Expand 10 before | Expand all | Expand 10 after
1244 // --max_old_space_size=4096 1266 // --max_old_space_size=4096
1245 let elemhideExceptionDomains = extractFilterDomains(this.elemhideExceptions); 1267 let elemhideExceptionDomains = extractFilterDomains(this.elemhideExceptions);
1246 1268
1247 let genericSelectorExceptionDomains = 1269 let genericSelectorExceptionDomains =
1248 extractFilterDomains(this.generichideExceptions); 1270 extractFilterDomains(this.generichideExceptions);
1249 elemhideExceptionDomains.forEach(name => 1271 elemhideExceptionDomains.forEach(name =>
1250 { 1272 {
1251 genericSelectorExceptionDomains.add(name); 1273 genericSelectorExceptionDomains.add(name);
1252 }); 1274 });
1253 1275
1254 addCSSRules(cssRules, genericSelectors, "^https?://", 1276 addCSSRules(cssRules, genericSelectors, null,
1255 genericSelectorExceptionDomains); 1277 genericSelectorExceptionDomains);
1278
1279 // Filter out whitelisted domains.
1280 elemhideExceptionDomains.forEach(domain =>
1281 groupedElemhideFilters.delete(domain));
1256 1282
1257 groupedElemhideFilters.forEach((selectors, matchDomain) => 1283 groupedElemhideFilters.forEach((selectors, matchDomain) =>
1258 { 1284 {
1259 addCSSRules(cssRules, selectors, matchDomain, elemhideExceptionDomains); 1285 addCSSRules(cssRules, selectors, matchDomain, elemhideExceptionDomains);
1260 }); 1286 });
1261 1287
1262 let requestFilterExceptionDomains = []; 1288 let requestFilterExceptionDomains = [];
1263 for (let filter of this.genericblockExceptions) 1289 for (let filter of this.genericblockExceptions)
1264 { 1290 {
1265 let parsed = parseFilterRegexpSource(filter.regexpSource); 1291 let parsed = parseFilterRegexpSource(filter.regexpSource);
1266 if (parsed.hostname) 1292 if (parsed.hostname)
1267 requestFilterExceptionDomains.push(parsed.hostname); 1293 requestFilterExceptionDomains.push(parsed.hostname);
1268 } 1294 }
1269 1295
1270 for (let filter of this.requestFilters) 1296 for (let filter of this.requestFilters)
1271 { 1297 {
1272 convertFilterAddRules(blockingRules, filter, "block", true, 1298 convertFilterAddRules(blockingRules, filter, "block", true,
1273 requestFilterExceptionDomains); 1299 requestFilterExceptionDomains);
1274 } 1300 }
1275 1301
1276 for (let filter of this.requestExceptions) 1302 for (let filter of this.requestExceptions)
1303 {
1277 convertFilterAddRules(blockingExceptionRules, filter, 1304 convertFilterAddRules(blockingExceptionRules, filter,
kzar 2017/07/25 12:18:53 Nit: Please use braces for this for loop since it
Manish Jethani 2017/07/28 09:17:36 Done.
1278 "ignore-previous-rules", true); 1305 "ignore-previous-rules", true);
1279 1306 }
1280 return async(ruleGroups.map((group, index) => () => 1307
1308 return async(ruleGroups, (group, index) => () =>
1281 { 1309 {
1282 let next = () => 1310 let next = () =>
1283 { 1311 {
1284 if (index == ruleGroups.length - 1) 1312 if (index == ruleGroups.length - 1)
1285 return ruleGroups.reduce((all, rules) => all.concat(rules), []); 1313 return ruleGroups.reduce((all, rules) => all.concat(rules), []);
1286 }; 1314 };
1287 1315
1288 if (this.options.merge == "all" || 1316 if (this.options.merge == "all" ||
1289 (this.options.merge == "auto" && 1317 (this.options.merge == "auto" &&
1290 ruleGroups.reduce((n, group) => n + group.length, 0) > 50000)) 1318 ruleGroups.reduce((n, group) => n + group.length, 0) > 50000))
1291 { 1319 {
1292 return mergeRules(ruleGroups[index], this.options.merge == "all") 1320 return mergeRules(ruleGroups[index], this.options.merge == "all")
1293 .then(rules => 1321 .then(rules =>
1294 { 1322 {
1295 ruleGroups[index] = rules; 1323 ruleGroups[index] = rules;
1296 return next(); 1324 return next();
1297 }); 1325 });
1298 } 1326 }
1299 1327
1300 return next(); 1328 return next();
1301 })); 1329 });
1302 }; 1330 };
LEFTRIGHT

Powered by Google App Engine
This is Rietveld