Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Delta Between Two Patch Sets: lib/abp2blocklist.js

Issue 29426594: Issue 3673 - Merge closely matching rules (Closed) Base URL: https://hg.adblockplus.org/abp2blocklist
Left Patch Set: Make generateRules asynchronous Created May 23, 2017, 4:22 p.m.
Right Patch Set: Rebase Created July 28, 2017, 1:31 p.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
Left: Side by side diff | Download
Right: Side by side diff | Download
« no previous file with change/comment | « abp2blocklist.js ('k') | test/abp2blocklist.js » ('j') | no next file with change/comment »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
LEFTRIGHT
1 /* 1 /*
2 * This file is part of Adblock Plus <https://adblockplus.org/>, 2 * This file is part of Adblock Plus <https://adblockplus.org/>,
3 * Copyright (C) 2006-2017 eyeo GmbH 3 * Copyright (C) 2006-2017 eyeo GmbH
4 * 4 *
5 * Adblock Plus is free software: you can redistribute it and/or modify 5 * Adblock Plus is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 3 as 6 * it under the terms of the GNU General Public License version 3 as
7 * published by the Free Software Foundation. 7 * published by the Free Software Foundation.
8 * 8 *
9 * Adblock Plus is distributed in the hope that it will be useful, 9 * Adblock Plus is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details. 12 * GNU General Public License for more details.
13 * 13 *
14 * You should have received a copy of the GNU General Public License 14 * You should have received a copy of the GNU General Public License
15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. 15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.
16 */ 16 */
17 17
18 /** @module abp2blocklist */ 18 /** @module abp2blocklist */
19 19
20 "use strict"; 20 "use strict";
21 21
22 let filterClasses = require("filterClasses"); 22 let filterClasses = require("filterClasses");
23 let tldjs = require("tldjs");
24 let punycode = require("punycode"); 23 let punycode = require("punycode");
25 24
26 const selectorLimit = 5000; 25 const selectorLimit = 5000;
27 const typeMap = filterClasses.RegExpFilter.typeMap; 26 const typeMap = filterClasses.RegExpFilter.typeMap;
28 const whitelistableRequestTypes = (typeMap.IMAGE 27
29 | typeMap.STYLESHEET 28 const httpRequestTypes = typeMap.IMAGE |
30 | typeMap.SCRIPT 29 typeMap.STYLESHEET |
31 | typeMap.FONT 30 typeMap.SCRIPT |
32 | typeMap.MEDIA 31 typeMap.FONT |
33 | typeMap.POPUP 32 typeMap.MEDIA |
34 | typeMap.OBJECT 33 typeMap.POPUP |
35 | typeMap.OBJECT_SUBREQUEST 34 typeMap.OBJECT |
36 | typeMap.XMLHTTPREQUEST 35 typeMap.OBJECT_SUBREQUEST |
37 | typeMap.PING 36 typeMap.XMLHTTPREQUEST |
38 | typeMap.SUBDOCUMENT 37 typeMap.PING |
39 | typeMap.OTHER); 38 typeMap.SUBDOCUMENT |
39 typeMap.OTHER;
40 const rawRequestTypes = typeMap.XMLHTTPREQUEST |
41 typeMap.WEBSOCKET |
42 typeMap.WEBRTC |
43 typeMap.OBJECT_SUBREQUEST |
44 typeMap.PING |
45 typeMap.OTHER;
46 const whitelistableRequestTypes = httpRequestTypes |
47 typeMap.WEBSOCKET |
48 typeMap.WEBRTC;
40 49
41 function callLater(func) 50 function callLater(func)
42 { 51 {
43 return new Promise(resolve => 52 return new Promise(resolve =>
44 { 53 {
45 let call = () => resolve(func()); 54 let call = () => resolve(func());
46 55
47 // If this looks like Node.js, call process.nextTick, otherwise call 56 // If this looks like Node.js, call process.nextTick, otherwise call
48 // setTimeout. 57 // setTimeout.
49 if (typeof process != "undefined") 58 if (typeof process != "undefined")
50 process.nextTick(call); 59 process.nextTick(call);
51 else 60 else
52 setTimeout(call, 0); 61 setTimeout(call, 0);
53 }); 62 });
54 } 63 }
55 64
56 function async(funcs) 65 function async(callees, mapFunction)
57 { 66 {
58 if (!Array.isArray(funcs)) 67 if (!(Symbol.iterator in callees))
59 funcs = Array.from(arguments); 68 callees = [callees];
60 69
61 let lastPause = Date.now(); 70 let lastPause = Date.now();
62 71 let index = 0;
63 return funcs.reduce((promise, next) => promise.then(() => 72
64 { 73 let promise = Promise.resolve();
65 // If it has been 100ms or longer since the last call, take a pause. This 74
66 // keeps the browser from freezing up. 75 for (let next of callees)
67 let now = Date.now(); 76 {
68 if (now - lastPause >= 100) 77 let currentIndex = index;
69 { 78
70 lastPause = now; 79 promise = promise.then(() =>
71 return callLater(next); 80 {
72 } 81 if (mapFunction)
73 82 next = mapFunction(next, currentIndex);
74 return next(); 83
75 }), 84 // If it has been 100ms or longer since the last call, take a pause. This
76 Promise.resolve()); 85 // keeps the browser from freezing up.
86 let now = Date.now();
87 if (now - lastPause >= 100)
88 {
89 lastPause = now;
90 return callLater(next);
91 }
92
93 return next();
94 });
95
96 index++;
97 }
98
99 return promise;
77 } 100 }
78 101
79 function parseDomains(domains, included, excluded) 102 function parseDomains(domains, included, excluded)
80 { 103 {
81 for (let domain in domains) 104 for (let domain in domains)
82 { 105 {
83 if (domain != "") 106 if (domain != "")
84 { 107 {
85 let enabled = domains[domain]; 108 let enabled = domains[domain];
86 domain = punycode.toASCII(domain.toLowerCase()); 109 domain = punycode.toASCII(domain.toLowerCase());
87 110
88 if (!enabled) 111 if (!enabled)
89 excluded.push(domain); 112 excluded.push(domain);
90 else if (!domains[""]) 113 else if (!domains[""])
91 included.push(domain); 114 included.push(domain);
92 } 115 }
93 } 116 }
94 } 117 }
95 118
96 function escapeRegExp(s) 119 function escapeRegExp(s)
97 { 120 {
98 return s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); 121 return s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
99 } 122 }
100 123
101 function matchDomain(domain) 124 function matchDomain(domain)
102 { 125 {
126 if (!domain)
127 return "^https?://";
128
103 return "^https?://([^/:]*\\.)?" + escapeRegExp(domain).toLowerCase() + "[/:]"; 129 return "^https?://([^/:]*\\.)?" + escapeRegExp(domain).toLowerCase() + "[/:]";
130 }
131
132 function getURLSchemes(contentType)
133 {
134 // If the given content type includes all supported URL schemes, simply
135 // return a single generic URL scheme pattern. This minimizes the size of the
136 // generated rule set. The downside to this is that it will also match
137 // schemes that we do not want to match (e.g. "ftp://"), but this can be
138 // mitigated by adding exceptions for those schemes.
139 if (contentType & typeMap.WEBSOCKET && contentType & typeMap.WEBRTC &&
140 contentType & httpRequestTypes)
141 return ["[^:]+:(//)?"];
142
143 let urlSchemes = [];
144
145 if (contentType & typeMap.WEBSOCKET)
146 urlSchemes.push("wss?://");
147
148 if (contentType & typeMap.WEBRTC)
149 urlSchemes.push("stuns?:", "turns?:");
150
151 if (contentType & httpRequestTypes)
152 urlSchemes.push("https?://");
153
154 return urlSchemes;
155 }
156
157 function findSubdomainsInList(domain, list)
158 {
159 let subdomains = [];
160 let suffixLength = domain.length + 1;
161
162 for (let name of list)
163 {
164 if (name.length > suffixLength && name.slice(-suffixLength) == "." + domain)
165 subdomains.push(name.slice(0, -suffixLength));
166 }
167
168 return subdomains;
169 }
170
171 function extractFilterDomains(filters)
172 {
173 let domains = new Set();
174 for (let filter of filters)
175 {
176 let parsed = parseFilterRegexpSource(filter.regexpSource);
177 if (parsed.justHostname)
178 domains.add(parsed.hostname);
179 }
180 return domains;
104 } 181 }
105 182
106 function convertElemHideFilter(filter, elemhideSelectorExceptions) 183 function convertElemHideFilter(filter, elemhideSelectorExceptions)
107 { 184 {
108 let included = []; 185 let included = [];
109 let excluded = []; 186 let excluded = [];
110 let rules = [];
111 187
112 parseDomains(filter.domains, included, excluded); 188 parseDomains(filter.domains, included, excluded);
113 189
114 if (excluded.length == 0 && !(filter.selector in elemhideSelectorExceptions)) 190 if (excluded.length == 0 && !(filter.selector in elemhideSelectorExceptions))
115 return {matchDomains: included.map(matchDomain), selector: filter.selector}; 191 return {matchDomains: included, selector: filter.selector};
116 } 192 }
117 193
118 /** 194 /**
119 * Parse the given filter "regexpSource" string. Producing a regular expression, 195 * Parse the given filter "regexpSource" string. Producing a regular expression,
120 * extracting the hostname (if any), deciding if the regular expression is safe 196 * extracting the hostname (if any), deciding if the regular expression is safe
121 * to be converted + matched as lower case and noting if the source contains 197 * to be converted + matched as lower case and noting if the source contains
122 * anything after the hostname.) 198 * anything after the hostname.)
123 * 199 *
124 * @param {string} text regexpSource property of a filter 200 * @param {string} text regexpSource property of a filter
201 * @param {string} urlScheme The URL scheme to use in the regular expression
125 * @returns {object} An object containing a regular expression string, a bool 202 * @returns {object} An object containing a regular expression string, a bool
126 * indicating if the filter can be safely matched as lower 203 * indicating if the filter can be safely matched as lower
127 * case, a hostname string (or undefined) and a bool 204 * case, a hostname string (or undefined) and a bool
128 * indicating if the source only contains a hostname or not: 205 * indicating if the source only contains a hostname or not:
129 * {regexp: "...", 206 * {regexp: "...",
130 * canSafelyMatchAsLowercase: true/false, 207 * canSafelyMatchAsLowercase: true/false,
131 * hostname: "...", 208 * hostname: "...",
132 * justHostname: true/false} 209 * justHostname: true/false}
133 */ 210 */
134 function parseFilterRegexpSource(text) 211 function parseFilterRegexpSource(text, urlScheme)
135 { 212 {
136 let regexp = []; 213 let regexp = [];
137 let lastIndex = text.length - 1; 214
215 // Convert the text into an array of Unicode characters.
216 //
217 // In the case of surrogate pairs (the smiley emoji, for example), one
218 // Unicode code point is represented by two JavaScript characters together.
219 // We want to iterate over Unicode code points rather than JavaScript
220 // characters.
221 let characters = Array.from(text);
222
223 let lastIndex = characters.length - 1;
138 let hostname; 224 let hostname;
139 let hostnameStart = null; 225 let hostnameStart = null;
140 let hostnameFinished = false; 226 let hostnameFinished = false;
141 let justHostname = false; 227 let justHostname = false;
142 let canSafelyMatchAsLowercase = false; 228 let canSafelyMatchAsLowercase = false;
143 229
144 for (let i = 0; i < text.length; i++) 230 if (!urlScheme)
145 { 231 urlScheme = getURLSchemes()[0];
146 let c = text[i]; 232
233 for (let i = 0; i < characters.length; i++)
234 {
235 let c = characters[i];
147 236
148 if (hostnameFinished) 237 if (hostnameFinished)
149 justHostname = false; 238 justHostname = false;
150 239
151 // If we're currently inside the hostname we have to be careful not to 240 // If we're currently inside the hostname we have to be careful not to
152 // escape any characters until after we have converted it to punycode. 241 // escape any characters until after we have converted it to punycode.
153 if (hostnameStart != null && !hostnameFinished) 242 if (hostnameStart != null && !hostnameFinished)
154 { 243 {
155 let endingChar = (c == "*" || c == "^" || 244 let endingChar = (c == "*" || c == "^" ||
156 c == "?" || c == "/" || c == "|"); 245 c == "?" || c == "/" || c == "|");
157 if (!endingChar && i != lastIndex) 246 if (!endingChar && i != lastIndex)
158 continue; 247 continue;
159 248
160 hostname = punycode.toASCII( 249 hostname = punycode.toASCII(
161 text.substring(hostnameStart, endingChar ? i : i + 1) 250 characters.slice(hostnameStart, endingChar ? i : i + 1).join("")
251 .toLowerCase()
162 ); 252 );
163 hostnameFinished = justHostname = true; 253 hostnameFinished = justHostname = true;
164 regexp.push(escapeRegExp(hostname)); 254 regexp.push(escapeRegExp(hostname));
165 if (!endingChar) 255 if (!endingChar)
166 break; 256 break;
167 } 257 }
168 258
169 switch (c) 259 switch (c)
170 { 260 {
171 case "*": 261 case "*":
172 if (regexp.length > 0 && i < lastIndex && text[i + 1] != "*") 262 if (regexp.length > 0 && i < lastIndex && characters[i + 1] != "*")
173 regexp.push(".*"); 263 regexp.push(".*");
174 break; 264 break;
175 case "^": 265 case "^":
176 if (i < lastIndex) 266 let alphabet = "a-z";
177 regexp.push("."); 267 // If justHostname is true and we've encountered a "^", it means we're
268 // still in the hostname part of the URL. Since hostnames are always
269 // lower case (Punycode), there's no need to include "A-Z" in the
270 // pattern. Further, subsequent code may lower-case the entire regular
271 // expression (if the URL contains only the hostname part), leaving us
272 // with "a-za-z", which would be redundant.
273 if (!justHostname)
274 alphabet = "A-Z" + alphabet;
275 let digits = "0-9";
276 // Note that the "-" must appear first here in order to retain its
277 // literal meaning within the brackets.
278 let specialCharacters = "-_.%";
279 let separator = "[^" + specialCharacters + alphabet + digits + "]";
280 if (i == 0)
281 regexp.push("^" + urlScheme + "(.*" + separator + ")?");
282 else if (i == lastIndex)
283 regexp.push("(" + separator + ".*)?$");
284 else
285 regexp.push(separator);
178 break; 286 break;
179 case "|": 287 case "|":
180 if (i == 0) 288 if (i == 0)
181 { 289 {
182 regexp.push("^"); 290 regexp.push("^");
183 break; 291 break;
184 } 292 }
185 if (i == lastIndex) 293 if (i == lastIndex)
186 { 294 {
187 regexp.push("$"); 295 regexp.push("$");
188 break; 296 break;
189 } 297 }
190 if (i == 1 && text[0] == "|") 298 if (i == 1 && characters[0] == "|")
191 { 299 {
192 hostnameStart = i + 1; 300 hostnameStart = i + 1;
193 canSafelyMatchAsLowercase = true; 301 canSafelyMatchAsLowercase = true;
194 regexp.push("https?://([^/]+\\.)?"); 302 regexp.push(urlScheme + "([^/]+\\.)?");
195 break; 303 break;
196 } 304 }
197 regexp.push("\\|"); 305 regexp.push("\\|");
198 break; 306 break;
199 case "/": 307 case "/":
200 if (!hostnameFinished && 308 if (!hostnameFinished &&
201 text.charAt(i-2) == ":" && text.charAt(i-1) == "/") 309 characters[i - 2] == ":" && characters[i - 1] == "/")
202 { 310 {
203 hostnameStart = i + 1; 311 hostnameStart = i + 1;
204 canSafelyMatchAsLowercase = true; 312 canSafelyMatchAsLowercase = true;
205 } 313 }
206 regexp.push("/"); 314 regexp.push("/");
207 break; 315 break;
208 case ".": case "+": case "$": case "?": 316 case ".": case "+": case "$": case "?":
209 case "{": case "}": case "(": case ")": 317 case "{": case "}": case "(": case ")":
210 case "[": case "]": case "\\": 318 case "[": case "]": case "\\":
211 regexp.push("\\", c); 319 regexp.push("\\", c);
212 break; 320 break;
213 default: 321 default:
214 if (hostnameFinished && (c >= "a" && c <= "z" || 322 if (hostnameFinished && (c >= "a" && c <= "z" ||
215 c >= "A" && c <= "Z")) 323 c >= "A" && c <= "Z"))
216 canSafelyMatchAsLowercase = false; 324 canSafelyMatchAsLowercase = false;
217 regexp.push(c); 325 regexp.push(c == "%" ? c : encodeURI(c));
218 } 326 }
219 } 327 }
220 328
221 return { 329 return {
222 regexp: regexp.join(""), 330 regexp: regexp.join(""),
223 canSafelyMatchAsLowercase: canSafelyMatchAsLowercase, 331 canSafelyMatchAsLowercase: canSafelyMatchAsLowercase,
224 hostname: hostname, 332 hostname: hostname,
225 justHostname: justHostname 333 justHostname: justHostname
226 }; 334 };
227 } 335 }
228 336
229 function getResourceTypes(filter) 337 function getResourceTypes(contentType)
230 { 338 {
231 let types = []; 339 let types = [];
232 340
233 if (filter.contentType & typeMap.IMAGE) 341 if (contentType & typeMap.IMAGE)
234 types.push("image"); 342 types.push("image");
235 if (filter.contentType & typeMap.STYLESHEET) 343 if (contentType & typeMap.STYLESHEET)
236 types.push("style-sheet"); 344 types.push("style-sheet");
237 if (filter.contentType & typeMap.SCRIPT) 345 if (contentType & typeMap.SCRIPT)
238 types.push("script"); 346 types.push("script");
239 if (filter.contentType & typeMap.FONT) 347 if (contentType & typeMap.FONT)
240 types.push("font"); 348 types.push("font");
241 if (filter.contentType & (typeMap.MEDIA | typeMap.OBJECT)) 349 if (contentType & (typeMap.MEDIA | typeMap.OBJECT))
242 types.push("media"); 350 types.push("media");
243 if (filter.contentType & typeMap.POPUP) 351 if (contentType & typeMap.POPUP)
244 types.push("popup"); 352 types.push("popup");
245 if (filter.contentType & (typeMap.XMLHTTPREQUEST | 353 if (contentType & rawRequestTypes)
246 typeMap.OBJECT_SUBREQUEST |
247 typeMap.PING |
248 typeMap.OTHER))
249 types.push("raw"); 354 types.push("raw");
250 if (filter.contentType & typeMap.SUBDOCUMENT) 355 if (contentType & typeMap.SUBDOCUMENT)
251 types.push("document"); 356 types.push("document");
252 357
253 return types; 358 return types;
254 } 359 }
255 360
256 function addDomainPrefix(domains) 361 function makeRuleCopies(trigger, action, urlSchemes)
257 { 362 {
258 let result = []; 363 let copies = [];
259 364
260 for (let domain of domains) 365 // Always make a deep copy of the rule, since rules may have to be
261 { 366 // manipulated individually at a later stage.
262 result.push(domain); 367 let stringifiedTrigger = JSON.stringify(trigger);
263 368
264 if (tldjs.getDomain(domain) == domain) 369 let filterPattern = trigger["url-filter"].substring(1);
265 result.push("www." + domain); 370 let startIndex = 0;
266 } 371
267 372 // If the URL filter already begins with the first URL scheme pattern, skip
268 return result; 373 // it.
269 } 374 if (trigger["url-filter"].startsWith("^" + urlSchemes[0]))
270 375 {
271 function convertFilterAddRules(rules, filter, action, withResourceTypes) 376 filterPattern = filterPattern.substring(urlSchemes[0].length);
272 { 377 startIndex = 1;
273 let parsed = parseFilterRegexpSource(filter.regexpSource); 378 }
379 else
380 {
381 filterPattern = ".*" + filterPattern;
382 }
383
384 for (let i = startIndex; i < urlSchemes.length; i++)
385 {
386 let copyTrigger = Object.assign(JSON.parse(stringifiedTrigger), {
387 "url-filter": "^" + urlSchemes[i] + filterPattern
388 });
389 copies.push({trigger: copyTrigger, action});
390 }
391
392 return copies;
393 }
394
395 function excludeTopURLFromTrigger(trigger)
396 {
397 trigger["unless-top-url"] = [trigger["url-filter"]];
398 if (trigger["url-filter-is-case-sensitive"])
399 trigger["top-url-filter-is-case-sensitive"] = true;
400 }
401
402 function convertFilterAddRules(rules, filter, action, withResourceTypes,
403 exceptionDomains, contentType)
404 {
405 if (!contentType)
406 contentType = filter.contentType;
407
408 // If WebSocket or WebRTC are given along with other options but not
409 // including all three of WebSocket, WebRTC, and at least one HTTP raw type,
410 // we must generate multiple rules. For example, for the filter
411 // "foo$websocket,image", we must generate one rule with "^wss?://" and "raw"
412 // and another rule with "^https?://" and "image". If we merge the two, we
413 // end up blocking requests of all HTTP raw types (e.g. XMLHttpRequest)
414 // inadvertently.
415 if ((contentType & typeMap.WEBSOCKET && contentType != typeMap.WEBSOCKET &&
416 !(contentType & typeMap.WEBRTC &&
417 contentType & rawRequestTypes & httpRequestTypes)) ||
418 (contentType & typeMap.WEBRTC && contentType != typeMap.WEBRTC &&
419 !(contentType & typeMap.WEBSOCKET &&
420 contentType & rawRequestTypes & httpRequestTypes)))
421 {
422 if (contentType & typeMap.WEBSOCKET)
423 {
424 convertFilterAddRules(rules, filter, action, withResourceTypes,
425 exceptionDomains, typeMap.WEBSOCKET);
426 }
427
428 if (contentType & typeMap.WEBRTC)
429 {
430 convertFilterAddRules(rules, filter, action, withResourceTypes,
431 exceptionDomains, typeMap.WEBRTC);
432 }
433
434 contentType &= ~(typeMap.WEBSOCKET | typeMap.WEBRTC);
435
436 if (!contentType)
437 return;
438 }
439
440 let urlSchemes = getURLSchemes(contentType);
441 let parsed = parseFilterRegexpSource(filter.regexpSource, urlSchemes[0]);
274 442
275 // For the special case of $document whitelisting filters with just a domain 443 // For the special case of $document whitelisting filters with just a domain
276 // we can generate an equivalent blocking rule exception using if-domain. 444 // we can generate an equivalent blocking rule exception using if-domain.
277 if (filter instanceof filterClasses.WhitelistFilter && 445 if (filter instanceof filterClasses.WhitelistFilter &&
278 filter.contentType & typeMap.DOCUMENT && 446 contentType & typeMap.DOCUMENT &&
279 parsed.justHostname) 447 parsed.justHostname)
280 { 448 {
281 rules.push({ 449 rules.push({
282 trigger: { 450 trigger: {
283 "url-filter": ".*", 451 "url-filter": ".*",
284 "if-domain": addDomainPrefix([parsed.hostname]) 452 "if-domain": ["*" + parsed.hostname]
285 }, 453 },
286 action: {type: "ignore-previous-rules"} 454 action: {type: "ignore-previous-rules"}
287 }); 455 });
288 // If the filter contains other supported options we'll need to generate 456 // If the filter contains other supported options we'll need to generate
289 // further rules for it, but if not we can simply return now. 457 // further rules for it, but if not we can simply return now.
290 if (!(filter.contentType & whitelistableRequestTypes)) 458 if (!(contentType & whitelistableRequestTypes))
291 return; 459 return;
292 } 460 }
293 461
294 let trigger = {"url-filter": parsed.regexp}; 462 let trigger = {"url-filter": parsed.regexp};
295 463
296 // Limit rules to HTTP(S) URLs 464 // If the URL filter begins with one of the URL schemes for this content
297 if (!/^(\^|http)/i.test(trigger["url-filter"])) 465 // type, we generate additional rules for all the URL scheme patterns;
298 trigger["url-filter"] = "^https?://.*" + trigger["url-filter"]; 466 // otherwise, if the start of the URL filter literally matches the first URL
467 // scheme pattern, we just generate additional rules for the remaining URL
468 // scheme patterns.
469 //
470 // For example, "stun:foo$webrtc" will give us "stun:foo", then we add a "^"
471 // in front of this and generate two additional rules for
472 // "^stuns?:.*stun:foo" and "^turns?:.*stun:foo". On the other hand,
473 // "||foo$webrtc" will give us "^stuns?:([^/]+\\.)?foo", so we just generate
474 // "^turns?:([^/]+\\.)?foo" in addition.
475 //
476 // Note that the filter can be already anchored to the beginning
477 // (e.g. "|stun:foo$webrtc"), in which case we do not generate any additional
478 // rules.
479 let needAltRules = trigger["url-filter"][0] != "^" ||
480 trigger["url-filter"].startsWith("^" + urlSchemes[0]);
481
482 if (trigger["url-filter"][0] != "^")
483 {
484 if (!urlSchemes.some(scheme => new RegExp("^" + scheme)
485 .test(trigger["url-filter"])))
486 {
487 trigger["url-filter"] = urlSchemes[0] + ".*" + trigger["url-filter"];
488 }
489
490 trigger["url-filter"] = "^" + trigger["url-filter"];
491 }
299 492
300 // For rules containing only a hostname we know that we're matching against 493 // For rules containing only a hostname we know that we're matching against
301 // a lowercase string unless the matchCase option was passed. 494 // a lowercase string unless the matchCase option was passed.
302 if (parsed.canSafelyMatchAsLowercase && !filter.matchCase) 495 if (parsed.canSafelyMatchAsLowercase && !filter.matchCase)
303 trigger["url-filter"] = trigger["url-filter"].toLowerCase(); 496 trigger["url-filter"] = trigger["url-filter"].toLowerCase();
304 497
305 if (parsed.canSafelyMatchAsLowercase || filter.matchCase) 498 if (parsed.canSafelyMatchAsLowercase || filter.matchCase)
306 trigger["url-filter-is-case-sensitive"] = true; 499 trigger["url-filter-is-case-sensitive"] = true;
307 500
308 let included = []; 501 let included = [];
309 let excluded = []; 502 let excluded = [];
310 503
311 parseDomains(filter.domains, included, excluded); 504 parseDomains(filter.domains, included, excluded);
312 505
506 if (exceptionDomains)
507 excluded = excluded.concat(exceptionDomains);
508
313 if (withResourceTypes) 509 if (withResourceTypes)
314 { 510 {
315 trigger["resource-type"] = getResourceTypes(filter); 511 let resourceTypes = getResourceTypes(contentType);
316 512
317 if (trigger["resource-type"].length == 0) 513 // Content blocker rules can't differentiate between sub-document requests
514 // (iframes) and top-level document requests. To avoid too many false
515 // positives, we prevent rules with no hostname part from blocking document
516 // requests.
517 //
518 // Once Safari 11 becomes our minimum supported version, we could change
519 // our approach here to use the new "unless-top-url" property instead.
520 if (filter instanceof filterClasses.BlockingFilter && !parsed.hostname)
521 resourceTypes = resourceTypes.filter(type => type != "document");
522
523 if (resourceTypes.length == 0)
318 return; 524 return;
525
526 trigger["resource-type"] = resourceTypes;
319 } 527 }
320 528
321 if (filter.thirdParty != null) 529 if (filter.thirdParty != null)
322 trigger["load-type"] = [filter.thirdParty ? "third-party" : "first-party"]; 530 trigger["load-type"] = [filter.thirdParty ? "third-party" : "first-party"];
323 531
532 let addTopLevelException = false;
533
324 if (included.length > 0) 534 if (included.length > 0)
325 trigger["if-domain"] = addDomainPrefix(included); 535 {
536 trigger["if-domain"] = [];
537
538 for (let name of included)
539 {
540 // If this is a blocking filter or an element hiding filter, add the
541 // subdomain wildcard only if no subdomains have been excluded.
542 let notSubdomains = null;
543 if ((filter instanceof filterClasses.BlockingFilter ||
544 filter instanceof filterClasses.ElemHideFilter) &&
545 (notSubdomains = findSubdomainsInList(name, excluded)).length > 0)
546 {
547 trigger["if-domain"].push(name);
548
549 // Add the "www" prefix but only if it hasn't been excluded.
550 if (!notSubdomains.includes("www"))
551 trigger["if-domain"].push("www." + name);
552 }
553 else
554 {
555 trigger["if-domain"].push("*" + name);
556 }
557 }
558 }
326 else if (excluded.length > 0) 559 else if (excluded.length > 0)
327 trigger["unless-domain"] = addDomainPrefix(excluded); 560 {
561 trigger["unless-domain"] = excluded.map(name => "*" + name);
562 }
563 else if (filter instanceof filterClasses.BlockingFilter &&
564 filter.contentType & typeMap.SUBDOCUMENT && parsed.hostname)
565 {
566 // Rules with a hostname part are still allowed to block document requests,
567 // but we add an exception for top-level documents.
568 //
569 // Note that we can only do this if there's no "unless-domain" property for
570 // now. This also only works in Safari 11 onwards, while older versions
571 // simply ignore this property. Once Safari 11 becomes our minimum
572 // supported version, we can merge "unless-domain" into "unless-top-url".
573 addTopLevelException = true;
574 excludeTopURLFromTrigger(trigger);
575 }
328 576
329 rules.push({trigger: trigger, action: {type: action}}); 577 rules.push({trigger: trigger, action: {type: action}});
330 } 578
331 579 if (needAltRules)
332 function hasNonASCI(obj) 580 {
333 { 581 // Generate additional rules for any alternative URL schemes.
334 if (typeof obj == "string") 582 for (let altRule of makeRuleCopies(trigger, {type: action}, urlSchemes))
335 { 583 {
336 if (/[^\x00-\x7F]/.test(obj)) 584 if (addTopLevelException)
337 return true; 585 excludeTopURLFromTrigger(altRule.trigger);
338 } 586
339 587 rules.push(altRule);
340 if (typeof obj == "object") 588 }
341 { 589 }
342 if (obj instanceof Array)
343 for (let item of obj)
344 if (hasNonASCI(item))
345 return true;
346
347 let names = Object.getOwnPropertyNames(obj);
348 for (let name of names)
349 if (hasNonASCI(obj[name]))
350 return true;
351 }
352
353 return false;
354 } 590 }
355 591
356 function convertIDSelectorsToAttributeSelectors(selector) 592 function convertIDSelectorsToAttributeSelectors(selector)
357 { 593 {
358 // First we figure out where all the IDs are 594 // First we figure out where all the IDs are
359 let sep = ""; 595 let sep = "";
360 let start = null; 596 let start = null;
361 let positions = []; 597 let positions = [];
362 for (let i = 0; i < selector.length; i++) 598 for (let i = 0; i < selector.length; i++)
363 { 599 {
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after
397 { 633 {
398 newSelector.push(selector.substring(i, pos.start)); 634 newSelector.push(selector.substring(i, pos.start));
399 newSelector.push('[id=', selector.substring(pos.start + 1, pos.end), ']'); 635 newSelector.push('[id=', selector.substring(pos.start + 1, pos.end), ']');
400 i = pos.end; 636 i = pos.end;
401 } 637 }
402 newSelector.push(selector.substring(i)); 638 newSelector.push(selector.substring(i));
403 639
404 return newSelector.join(""); 640 return newSelector.join("");
405 } 641 }
406 642
643 function addCSSRules(rules, selectors, domain, exceptionDomains)
644 {
645 let unlessDomain = exceptionDomains.size > 0 ? [] : null;
646
647 exceptionDomains.forEach(name =>
648 {
649 // For domain-specific filters, include the exception domains only if
650 // they're subdomains of the given domain.
651 if (!domain || name.substr(-domain.length - 1) == "." + domain)
652 unlessDomain.push("*" + name);
653 });
654
655 while (selectors.length)
656 {
657 let selector = selectors.splice(0, selectorLimit).join(", ");
658
659 // As of Safari 9.0 element IDs are matched as lowercase. We work around
660 // this by converting to the attribute format [id="elementID"]
661 selector = convertIDSelectorsToAttributeSelectors(selector);
662
663 let rule = {
664 trigger: {"url-filter": matchDomain(domain),
665 "url-filter-is-case-sensitive": true},
666 action: {type: "css-display-none",
667 selector: selector}
668 };
669
670 if (unlessDomain)
671 rule.trigger["unless-domain"] = unlessDomain;
672
673 rules.push(rule);
674 }
675 }
676
407 /** 677 /**
408 * Check if two strings are a close match 678 * Check if two strings are a close match
409 * 679 *
410 * This function returns an edit operation, one of "substitute", "delete", and 680 * This function returns an edit operation, one of "substitute", "delete", and
411 * "insert", along with an index in the source string where the edit must occur 681 * "insert", along with an index in the source string where the edit must occur
412 * in order to arrive at the target string. If the strings are not a close 682 * in order to arrive at the target string. If the strings are not a close
413 * match, it returns null. 683 * match, it returns null.
414 * 684 *
415 * Two strings are considered to be a close match if they are one edit 685 * Two strings are considered to be a close match if they are one edit
416 * operation apart. 686 * operation apart.
(...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after
455 // calculation. 725 // calculation.
456 if (diff < 0) 726 if (diff < 0)
457 { 727 {
458 let tmp = s; 728 let tmp = s;
459 s = t; 729 s = t;
460 t = tmp; 730 t = tmp;
461 } 731 }
462 732
463 let edit = null; 733 let edit = null;
464 734
465 let i = 0, j = 0; 735 let i = 0;
736 let j = 0;
466 737
467 // Start from the beginning and keep going until we hit a character that 738 // Start from the beginning and keep going until we hit a character that
468 // doesn't match. 739 // doesn't match.
469 for (; i < s.length; i++) 740 for (; i < s.length; i++)
470 { 741 {
471 if (s[i] != t[i]) 742 if (s[i] != t[i])
472 break; 743 break;
473 } 744 }
474 745
475 // Now do exactly the same from the end, but also stop if we reach the 746 // Now do exactly the same from the end, but also stop if we reach the
(...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after
519 { 790 {
520 edit = {type: "insert", index: i}; 791 edit = {type: "insert", index: i};
521 792
522 if (diff < -1) 793 if (diff < -1)
523 edit.endIndex = s.length - j; 794 edit.endIndex = s.length - j;
524 } 795 }
525 796
526 return edit; 797 return edit;
527 } 798 }
528 799
529 function eliminateRedundantRulesByURLFilter(rulesInfo) 800 function eliminateRedundantRulesByURLFilter(rulesInfo, exhaustive)
530 { 801 {
531 for (let i = 0; i < rulesInfo.length; i++) 802 const heuristicRange = 1000;
803
804 let ol = rulesInfo.length;
805
806 // Throw out obviously redundant rules.
807 return async(rulesInfo, (ruleInfo, index) => () =>
532 { 808 {
533 // If this rule is already marked as redundant, don't bother comparing it 809 // If this rule is already marked as redundant, don't bother comparing it
534 // with other rules. 810 // with other rules.
535 if (rulesInfo[i].redundant) 811 if (rulesInfo[index].redundant)
536 continue; 812 return;
537 813
538 for (let j = i + 1; j < rulesInfo.length; j++) 814 let limit = exhaustive ? rulesInfo.length :
815 Math.min(index + heuristicRange, rulesInfo.length);
816
817 for (let i = index, j = i + 1; j < limit; j++)
539 { 818 {
540 if (rulesInfo[j].redundant) 819 if (rulesInfo[j].redundant)
541 continue; 820 continue;
542 821
543 let source = rulesInfo[i].rule.trigger["url-filter"]; 822 let source = rulesInfo[i].rule.trigger["url-filter"];
544 let target = rulesInfo[j].rule.trigger["url-filter"]; 823 let target = rulesInfo[j].rule.trigger["url-filter"];
545 824
546 if (source.length >= target.length) 825 if (source.length >= target.length)
547 { 826 {
548 // If one URL filter is a substring of the other starting at the 827 // If one URL filter is a substring of the other starting at the
549 // beginning, the other one is clearly redundant. 828 // beginning, the other one is clearly redundant.
550 if (source.substring(0, target.length) == target) 829 if (source.substring(0, target.length) == target)
551 { 830 {
552 rulesInfo[i].redundant = true; 831 rulesInfo[i].redundant = true;
553 break; 832 break;
554 } 833 }
555 } 834 }
556 else if (target.substring(0, source.length) == source) 835 else if (target.substring(0, source.length) == source)
557 { 836 {
558 rulesInfo[j].redundant = true; 837 rulesInfo[j].redundant = true;
559 } 838 }
560 } 839 }
561 } 840 })
562 841 .then(() => rulesInfo.filter(ruleInfo => !ruleInfo.redundant));
563 return rulesInfo.filter(ruleInfo => !ruleInfo.redundant); 842 }
564 } 843
565 844 function findMatchesForRuleByURLFilter(rulesInfo, index, exhaustive)
566 function mergeRulesByURLFilter(rulesInfo, exhaustive)
567 { 845 {
568 // Closely matching rules are likely to be within a certain range. We only 846 // Closely matching rules are likely to be within a certain range. We only
569 // look for matches within this range by default. If we increase this value, 847 // look for matches within this range by default. If we increase this value,
570 // it can give us more matches and a smaller resulting rule set, but possibly 848 // it can give us more matches and a smaller resulting rule set, but possibly
571 // at a significant performance cost. 849 // at a significant performance cost.
572 // 850 //
573 // If the exhaustive option is true, we simply ignore this value and look for 851 // If the exhaustive option is true, we simply ignore this value and look for
574 // matches throughout the rule set. 852 // matches throughout the rule set.
575 const heuristicRange = 10; 853 const heuristicRange = 1000;
576 854
577 return async(() => 855 let limit = exhaustive ? rulesInfo.length :
578 { 856 Math.min(index + heuristicRange, rulesInfo.length);
579 if (exhaustive) 857
580 { 858 for (let i = index, j = i + 1; j < limit; j++)
581 // Throw out obviously redundant rules. 859 {
582 rulesInfo = eliminateRedundantRulesByURLFilter(rulesInfo); 860 let source = rulesInfo[i].rule.trigger["url-filter"];
583 } 861 let target = rulesInfo[j].rule.trigger["url-filter"];
584 }) 862
585 .then(() => 863 let edit = closeMatch(source, target);
586 { 864
587 if (rulesInfo.length <= 1) 865 if (edit)
588 return; 866 {
589 867 let urlFilter, ruleInfo, match = {edit};
590 return async(rulesInfo.map((_, i) => () => 868
591 { 869 if (edit.type == "insert")
592 let limit = exhaustive ? rulesInfo.length : 870 {
593 Math.min(i + heuristicRange, rulesInfo.length); 871 // Convert the insertion into a deletion and stick it on the target
594 872 // rule instead. We can only group deletions and substitutions;
595 for (let j = i + 1; j < limit; j++) 873 // therefore insertions must be treated as deletions on the target
596 { 874 // rule.
597 let source = rulesInfo[i].rule.trigger["url-filter"]; 875 urlFilter = target;
598 let target = rulesInfo[j].rule.trigger["url-filter"]; 876 ruleInfo = rulesInfo[j];
599 877 match.index = i;
600 let edit = closeMatch(source, target); 878 edit.type = "delete";
601 879 }
602 if (edit) 880 else
881 {
882 urlFilter = source;
883 ruleInfo = rulesInfo[i];
884 match.index = j;
885 }
886
887 // If the edit has an end index, it represents a multiple character
888 // edit.
889 let multiEdit = !!edit.endIndex;
890
891 if (multiEdit)
892 {
893 // We only care about a single multiple character edit because the
894 // number of characters for such a match doesn't matter, we can
895 // only merge with one other rule.
896 if (!ruleInfo.multiEditMatch)
897 ruleInfo.multiEditMatch = match;
898 }
899 else
900 {
901 // For single character edits, multiple rules can be merged into
902 // one. e.g. "ad", "ads", and "adv" can be merged into "ad[sv]?".
903 if (!ruleInfo.matches)
904 ruleInfo.matches = new Array(urlFilter.length);
905
906 // Matches at a particular index. For example, for a source string
907 // "ads", both target strings "ad" (deletion) and "adv"
908 // (substitution) match at index 2, hence they are grouped together
909 // to possibly be merged later into "ad[sv]?".
910 let matchesForIndex = ruleInfo.matches[edit.index];
911
912 if (matchesForIndex)
603 { 913 {
604 let urlFilter, ruleInfo, match = {edit}; 914 matchesForIndex.push(match);
605 915 }
606 if (edit.type == "insert") 916 else
917 {
918 matchesForIndex = [match];
919 ruleInfo.matches[edit.index] = matchesForIndex;
920 }
921
922 // Keep track of the best set of matches. We later sort by this to
923 // get best results.
924 if (!ruleInfo.bestMatches ||
925 matchesForIndex.length > ruleInfo.bestMatches.length)
926 ruleInfo.bestMatches = matchesForIndex;
927 }
928 }
929 }
930 }
931
932 function mergeCandidateRulesByURLFilter(rulesInfo)
933 {
934 // Filter out rules that have no matches at all.
935 let candidateRulesInfo = rulesInfo.filter(ruleInfo =>
936 {
937 return ruleInfo.bestMatches || ruleInfo.multiEditMatch
938 });
939
940 // For best results, we have to sort the candidates by the largest set of
941 // matches.
942 //
943 // For example, we want "ads", "bds", "adv", "bdv", "adx", and "bdx" to
944 // generate "ad[svx]" and "bd[svx]" (2 rules), not "[ab]ds", "[ab]dv", and
945 // "[ab]dx" (3 rules).
946 candidateRulesInfo.sort((ruleInfo1, ruleInfo2) =>
947 {
948 let weight1 = ruleInfo1.bestMatches ? ruleInfo1.bestMatches.length :
949 ruleInfo1.multiEditMatch ? 1 : 0;
950 let weight2 = ruleInfo2.bestMatches ? ruleInfo2.bestMatches.length :
951 ruleInfo2.multiEditMatch ? 1 : 0;
952
953 return weight2 - weight1;
954 });
955
956 for (let ruleInfo of candidateRulesInfo)
957 {
958 let rule = ruleInfo.rule;
959
960 // If this rule has already been merged into another rule, we skip it.
961 if (ruleInfo.merged)
962 continue;
963
964 // Find the best set of rules to group, which is simply the largest set.
965 let best = (ruleInfo.matches || []).reduce((best, matchesForIndex) =>
966 {
967 matchesForIndex = (matchesForIndex || []).filter(match =>
968 {
969 // Filter out rules that have either already been merged into other
970 // rules or have had other rules merged into them.
971 return !rulesInfo[match.index].merged &&
972 !rulesInfo[match.index].mergedInto;
973 });
974
975 return matchesForIndex.length > best.length ? matchesForIndex : best;
976 },
977 []);
978
979 let multiEdit = false;
980
981 // If we couldn't find a single rule to merge with, let's see if we have a
982 // multiple character edit. e.g. we could merge "ad" and "adserver" into
983 // "ad(server)?".
984 if (best.length == 0 && ruleInfo.multiEditMatch &&
985 !rulesInfo[ruleInfo.multiEditMatch.index].merged &&
986 !rulesInfo[ruleInfo.multiEditMatch.index].mergedInto)
987 {
988 best = [ruleInfo.multiEditMatch];
989 multiEdit = true;
990 }
991
992 if (best.length > 0)
993 {
994 let urlFilter = rule.trigger["url-filter"];
995
996 let editIndex = best[0].edit.index;
997
998 if (!multiEdit)
999 {
1000 // Merge all the matching rules into this one.
1001
1002 let characters = [urlFilter[editIndex]];
1003 let quantifier = "";
1004
1005 for (let match of best)
1006 {
1007 if (match.edit.type == "delete")
607 { 1008 {
608 // Convert the insertion into a deletion and stick it on the target 1009 quantifier = "?";
609 // rule instead. We can only group deletions and substitutions;
610 // therefore insertions must be treated as deletions on the target
611 // rule.
612 urlFilter = target;
613 ruleInfo = rulesInfo[j];
614 match.index = i;
615 edit.type = "delete";
616 } 1010 }
617 else 1011 else
618 { 1012 {
619 urlFilter = source; 1013 let character = rulesInfo[match.index].rule
620 ruleInfo = rulesInfo[i]; 1014 .trigger["url-filter"][editIndex];
621 match.index = j; 1015
1016 // Insert any hyphen at the beginning so it gets interpreted as a
1017 // literal hyphen.
1018 if (character == "-")
1019 characters.unshift(character);
1020 else
1021 characters.push(character);
622 } 1022 }
623 1023
624 // If the edit has an end index, it represents a multiple character 1024 // Mark the target rule as merged so other rules don't try to merge
625 // edit. 1025 // it again.
626 let multiEdit = !!edit.endIndex; 1026 rulesInfo[match.index].merged = true;
627 1027 }
628 if (multiEdit) 1028
629 { 1029 urlFilter = urlFilter.substring(0, editIndex + 1) + quantifier +
630 // We only care about a single multiple character edit because the 1030 urlFilter.substring(editIndex + 1);
631 // number of characters for such a match doesn't matter, we can 1031 if (characters.length > 1)
632 // only merge with one other rule. 1032 {
633 if (!ruleInfo.multiEditMatch) 1033 urlFilter = urlFilter.substring(0, editIndex) + "[" +
634 ruleInfo.multiEditMatch = match; 1034 characters.join("") + "]" +
635 } 1035 urlFilter.substring(editIndex + 1);
636 else
637 {
638 // For single character edits, multiple rules can be merged into
639 // one. e.g. "ad", "ads", and "adv" can be merged into "ad[sv]?".
640 if (!ruleInfo.matches)
641 ruleInfo.matches = new Array(urlFilter.length);
642
643 // Matches at a particular index. For example, for a source string
644 // "ads", both target strings "ad" (deletion) and "adv"
645 // (substitution) match at index 2, hence they are grouped together
646 // to possibly be merged later into "ad[sv]?".
647 let matchesForIndex = ruleInfo.matches[edit.index];
648
649 if (matchesForIndex)
650 {
651 matchesForIndex.push(match);
652 }
653 else
654 {
655 matchesForIndex = [match];
656 ruleInfo.matches[edit.index] = matchesForIndex;
657 }
658
659 // Keep track of the best set of matches. We later sort by this to
660 // get best results.
661 if (!ruleInfo.bestMatches ||
662 matchesForIndex.length > ruleInfo.bestMatches.length)
663 ruleInfo.bestMatches = matchesForIndex;
664 }
665 } 1036 }
666 } 1037 }
667 })); 1038 else
668 }) 1039 {
669 .then(() => 1040 let editEndIndex = best[0].edit.endIndex;
670 { 1041
671 // Filter out rules that have no matches at all. 1042 // Mark the target rule as merged so other rules don't try to merge it
672 let candidateRulesInfo = rulesInfo.filter(ruleInfo => 1043 // again.
673 { 1044 rulesInfo[best[0].index].merged = true;
674 return ruleInfo.bestMatches || ruleInfo.multiEditMatch 1045
675 }); 1046 urlFilter = urlFilter.substring(0, editIndex) + "(" +
676 1047 urlFilter.substring(editIndex, editEndIndex) + ")?" +
677 // For best results, we have to sort the candidates by the largest set of 1048 urlFilter.substring(editEndIndex);
678 // matches.
679 //
680 // For example, we want "ads", "bds", "adv", "bdv", "adx", and "bdx" to
681 // generate "ad[svx]" and "bd[svx]" (2 rules), not "[ab]ds", "[ab]dv", and
682 // "[ab]dx" (3 rules).
683 candidateRulesInfo.sort((ruleInfo1, ruleInfo2) =>
684 {
685 let weight1 = ruleInfo1.bestMatches ? ruleInfo1.bestMatches.length :
686 ruleInfo1.multiEditMatch ? 1 : 0;
687 let weight2 = ruleInfo2.bestMatches ? ruleInfo2.bestMatches.length :
688 ruleInfo2.multiEditMatch ? 1 : 0;
689
690 return weight2 - weight1;
691 });
692
693 for (let ruleInfo of candidateRulesInfo)
694 {
695 let rule = ruleInfo.rule;
696
697 // If this rule has already been merged into another rule, we skip it.
698 if (ruleInfo.merged)
699 continue;
700
701 // Find the best set of rules to group, which is simply the largest set.
702 let best = (ruleInfo.matches || []).reduce((best, matchesForIndex) =>
703 {
704 matchesForIndex = (matchesForIndex || []).filter(match =>
705 {
706 // Filter out rules that have either already been merged into other
707 // rules or have had other rules merged into them.
708 return !rulesInfo[match.index].merged &&
709 !rulesInfo[match.index].mergedInto;
710 });
711
712 return matchesForIndex.length > best.length ? matchesForIndex : best;
713 },
714 []);
715
716 let multiEdit = false;
717
718 // If we couldn't find a single rule to merge with, let's see if we have a
719 // multiple character edit. e.g. we could merge "ad" and "adserver" into
720 // "ad(server)?".
721 if (best.length == 0 && ruleInfo.multiEditMatch &&
722 !rulesInfo[ruleInfo.multiEditMatch.index].merged &&
723 !rulesInfo[ruleInfo.multiEditMatch.index].mergedInto)
724 {
725 best = [ruleInfo.multiEditMatch];
726 multiEdit = true;
727 } 1049 }
728 1050
729 if (best.length > 0) 1051 rule.trigger["url-filter"] = urlFilter;
730 { 1052
731 let urlFilter = rule.trigger["url-filter"]; 1053 // Mark this rule as one that has had other rules merged into it.
732 1054 ruleInfo.mergedInto = true;
733 let editIndex = best[0].edit.index; 1055 }
734 1056 }
735 if (!multiEdit) 1057 }
736 { 1058
737 // Merge all the matching rules into this one. 1059 function mergeRulesByURLFilter(rulesInfo, exhaustive)
738 1060 {
739 let characters = []; 1061 return async(rulesInfo, (ruleInfo, index) => () =>
740 let quantifier = ""; 1062 findMatchesForRuleByURLFilter(rulesInfo, index, exhaustive)
741 1063 )
742 for (let match of best) 1064 .then(() => mergeCandidateRulesByURLFilter(rulesInfo));
743 {
744 if (match.edit.type == "delete")
745 {
746 quantifier = "?";
747 }
748 else
749 {
750 let character = rulesInfo[match.index].rule
751 .trigger["url-filter"][editIndex];
752 characters.push(character);
753 }
754
755 // Mark the target rule as merged so other rules don't try to merge
756 // it again.
757 rulesInfo[match.index].merged = true;
758 }
759
760 urlFilter = urlFilter.substring(0, editIndex + 1) + quantifier +
761 urlFilter.substring(editIndex + 1);
762 if (characters.length > 0)
763 {
764 urlFilter = urlFilter.substring(0, editIndex) + "[" +
765 urlFilter[editIndex] + characters.join("") + "]" +
766 urlFilter.substring(editIndex + 1);
767 }
768 }
769 else
770 {
771 let editEndIndex = best[0].edit.endIndex;
772
773 // Mark the target rule as merged so other rules don't try to merge it
774 // again.
775 rulesInfo[best[0].index].merged = true;
776
777 urlFilter = urlFilter.substring(0, editIndex) + "(" +
778 urlFilter.substring(editIndex, editEndIndex) + ")?" +
779 urlFilter.substring(editEndIndex);
780 }
781
782 rule.trigger["url-filter"] = urlFilter;
783
784 // Mark this rule as one that has had other rules merged into it.
785 ruleInfo.mergedInto = true;
786 }
787 }
788 });
789 } 1065 }
790 1066
791 function mergeRulesByArrayProperty(rulesInfo, propertyType, property) 1067 function mergeRulesByArrayProperty(rulesInfo, propertyType, property)
792 { 1068 {
793 if (rulesInfo.length <= 1) 1069 if (rulesInfo.length <= 1)
794 return; 1070 return;
795 1071
796 let oneRuleInfo = rulesInfo.shift(); 1072 let valueSet = new Set(rulesInfo[0].rule[propertyType][property]);
797 let valueSet = new Set(oneRuleInfo.rule[propertyType][property]); 1073
798 1074 for (let i = 1; i < rulesInfo.length; i++)
799 for (let ruleInfo of rulesInfo) 1075 {
800 { 1076 for (let value of rulesInfo[i].rule[propertyType][property] || [])
801 if (ruleInfo.rule[propertyType][property]) 1077 valueSet.add(value);
802 { 1078
803 for (let value of ruleInfo.rule[propertyType][property]) 1079 rulesInfo[i].merged = true;
804 valueSet.add(value);
805 }
806
807 ruleInfo.merged = true;
808 } 1080 }
809 1081
810 if (valueSet.size > 0) 1082 if (valueSet.size > 0)
811 oneRuleInfo.rule[propertyType][property] = Array.from(valueSet); 1083 rulesInfo[0].rule[propertyType][property] = Array.from(valueSet);
812 1084
813 oneRuleInfo.mergedInto = true; 1085 rulesInfo[0].mergedInto = true;
814 } 1086 }
815 1087
816 function groupRulesByMergeableProperty(rulesInfo, propertyType, property) 1088 function groupRulesByMergeableProperty(rulesInfo, propertyType, property)
817 { 1089 {
818 let mergeableRulesInfoByGroup = new Map(); 1090 let mergeableRulesInfoByGroup = new Map();
819 1091
820 for (let ruleInfo of rulesInfo) 1092 for (let ruleInfo of rulesInfo)
821 { 1093 {
822 let copy = { 1094 let copy = {
823 trigger: Object.assign({}, ruleInfo.rule.trigger), 1095 trigger: Object.assign({}, ruleInfo.rule.trigger),
(...skipping 12 matching lines...) Expand all
836 mergeableRulesInfoByGroup.set(groupKey, [ruleInfo]); 1108 mergeableRulesInfoByGroup.set(groupKey, [ruleInfo]);
837 } 1109 }
838 1110
839 return mergeableRulesInfoByGroup; 1111 return mergeableRulesInfoByGroup;
840 } 1112 }
841 1113
842 function mergeRules(rules, exhaustive) 1114 function mergeRules(rules, exhaustive)
843 { 1115 {
844 let rulesInfo = rules.map(rule => ({rule})); 1116 let rulesInfo = rules.map(rule => ({rule}));
845 1117
1118 let arrayPropertiesToMergeBy = ["resource-type", "if-domain"];
1119
846 return async(() => 1120 return async(() =>
847 { 1121 {
848 let map = groupRulesByMergeableProperty(rulesInfo, "trigger", "url-filter"); 1122 let map = groupRulesByMergeableProperty(rulesInfo, "trigger", "url-filter");
849 return async(Array.from(map.values()).map(mergeableRulesInfo => () => 1123 return async(map.values(), mergeableRulesInfo => () =>
850 { 1124 eliminateRedundantRulesByURLFilter(mergeableRulesInfo, exhaustive)
851 if (mergeableRulesInfo.length > 1) 1125 .then(rulesInfo => mergeRulesByURLFilter(rulesInfo, exhaustive))
852 return mergeRulesByURLFilter(mergeableRulesInfo, exhaustive); 1126 )
853 })); 1127 .then(() =>
1128 {
1129 // Filter out rules that are redundant or have been merged into other
1130 // rules.
1131 rulesInfo = rulesInfo.filter(ruleInfo => !ruleInfo.redundant &&
1132 !ruleInfo.merged);
1133 });
854 }) 1134 })
855 .then(() => 1135 .then(() => async(arrayPropertiesToMergeBy, arrayProperty => () =>
856 {
857 // Filter out rules that are redundant or have been merged into other rules.
858 rulesInfo = rulesInfo.filter(ruleInfo => !ruleInfo.redundant &&
859 !ruleInfo.merged);
860 })
861 .then(() => async(["resource-type", "if-domain"].map(arrayProperty => () =>
862 { 1136 {
863 let map = groupRulesByMergeableProperty(rulesInfo, "trigger", 1137 let map = groupRulesByMergeableProperty(rulesInfo, "trigger",
864 arrayProperty); 1138 arrayProperty);
865 return async(Array.from(map.values()).map(mergeableRulesInfo => () => 1139 return async(map.values(), mergeableRulesInfo => () =>
866 { 1140 mergeRulesByArrayProperty(mergeableRulesInfo, "trigger", arrayProperty)
867 if (mergeableRulesInfo.length > 1) 1141 )
868 mergeRulesByArrayProperty(mergeableRulesInfo, "trigger", arrayProperty);
869 }))
870 .then(() => 1142 .then(() =>
871 { 1143 {
872 rulesInfo = rulesInfo.filter(ruleInfo => !ruleInfo.merged); 1144 rulesInfo = rulesInfo.filter(ruleInfo => !ruleInfo.merged);
873 }); 1145 });
874 }))) 1146 }))
875 .then(() => rulesInfo.map(ruleInfo => ruleInfo.rule)); 1147 .then(() => rulesInfo.map(ruleInfo => ruleInfo.rule));
876 } 1148 }
877 1149
878 let ContentBlockerList = 1150 let ContentBlockerList =
879 /** 1151 /**
880 * Create a new Adblock Plus filter to content blocker list converter 1152 * Create a new Adblock Plus filter to content blocker list converter
881 * 1153 *
882 * @param {object} options Options for content blocker list generation 1154 * @param {object} options Options for content blocker list generation
883 * 1155 *
884 * @constructor 1156 * @constructor
885 */ 1157 */
886 exports.ContentBlockerList = function(options) 1158 exports.ContentBlockerList = function (options)
887 { 1159 {
888 const defaultOptions = { 1160 const defaultOptions = {
889 merge: false, 1161 merge: "auto"
890 exhaustiveMerge: false
891 }; 1162 };
892 1163
893 this.options = Object.assign({}, defaultOptions, options); 1164 this.options = Object.assign({}, defaultOptions, options);
894 1165
895 this.requestFilters = []; 1166 this.requestFilters = [];
896 this.requestExceptions = []; 1167 this.requestExceptions = [];
897 this.elemhideFilters = []; 1168 this.elemhideFilters = [];
898 this.elemhideExceptions = []; 1169 this.elemhideExceptions = [];
1170 this.genericblockExceptions = [];
1171 this.generichideExceptions = [];
899 this.elemhideSelectorExceptions = new Map(); 1172 this.elemhideSelectorExceptions = new Map();
900 }; 1173 };
901 1174
902 /** 1175 /**
903 * Add Adblock Plus filter to be converted 1176 * Add Adblock Plus filter to be converted
904 * 1177 *
905 * @param {Filter} filter Filter to convert 1178 * @param {Filter} filter Filter to convert
906 */ 1179 */
907 ContentBlockerList.prototype.addFilter = function(filter) 1180 ContentBlockerList.prototype.addFilter = function(filter)
908 { 1181 {
909 if (filter.sitekeys) 1182 if (filter.sitekeys)
910 return; 1183 return;
911 if (filter instanceof filterClasses.RegExpFilter && 1184 if (filter instanceof filterClasses.RegExpFilter &&
912 filter.regexpSource == null) 1185 filter.regexpSource == null)
913 return; 1186 return;
914 1187
915 if (filter instanceof filterClasses.BlockingFilter) 1188 if (filter instanceof filterClasses.BlockingFilter)
916 this.requestFilters.push(filter); 1189 this.requestFilters.push(filter);
917 1190
918 if (filter instanceof filterClasses.WhitelistFilter) 1191 if (filter instanceof filterClasses.WhitelistFilter)
919 { 1192 {
920 if (filter.contentType & (typeMap.DOCUMENT | whitelistableRequestTypes)) 1193 if (filter.contentType & (typeMap.DOCUMENT | whitelistableRequestTypes))
921 this.requestExceptions.push(filter); 1194 this.requestExceptions.push(filter);
922 1195
923 if (filter.contentType & typeMap.ELEMHIDE) 1196 if (filter.contentType & typeMap.GENERICBLOCK)
924 this.elemhideExceptions.push(filter); 1197 this.genericblockExceptions.push(filter);
1198
1199 if (filter.contentType & typeMap.ELEMHIDE)
1200 this.elemhideExceptions.push(filter);
1201 else if (filter.contentType & typeMap.GENERICHIDE)
1202 this.generichideExceptions.push(filter);
925 } 1203 }
926 1204
927 if (filter instanceof filterClasses.ElemHideFilter) 1205 if (filter instanceof filterClasses.ElemHideFilter)
928 this.elemhideFilters.push(filter); 1206 this.elemhideFilters.push(filter);
929 1207
930 if (filter instanceof filterClasses.ElemHideException) 1208 if (filter instanceof filterClasses.ElemHideException)
931 { 1209 {
932 let domains = this.elemhideSelectorExceptions[filter.selector]; 1210 let domains = this.elemhideSelectorExceptions[filter.selector];
933 if (!domains) 1211 if (!domains)
934 domains = this.elemhideSelectorExceptions[filter.selector] = []; 1212 domains = this.elemhideSelectorExceptions[filter.selector] = [];
935 1213
936 parseDomains(filter.domains, domains, []); 1214 parseDomains(filter.domains, domains, []);
937 } 1215 }
938 }; 1216 };
939 1217
940 /** 1218 /**
941 * Generate content blocker list for all filters that were added 1219 * Generate content blocker list for all filters that were added
942 */ 1220 */
943 ContentBlockerList.prototype.generateRules = function() 1221 ContentBlockerList.prototype.generateRules = function()
944 { 1222 {
945 let cssRules = []; 1223 let cssRules = [];
946 let cssExceptionRules = []; 1224 let cssExceptionRules = [];
947 let blockingRules = []; 1225 let blockingRules = [];
948 let blockingExceptionRules = []; 1226 let blockingExceptionRules = [];
949 1227
950 let ruleGroups = [cssRules, cssExceptionRules, 1228 let ruleGroups = [cssRules, cssExceptionRules,
951 blockingRules, blockingExceptionRules]; 1229 blockingRules, blockingExceptionRules];
952 1230
1231 let genericSelectors = [];
953 let groupedElemhideFilters = new Map(); 1232 let groupedElemhideFilters = new Map();
1233
954 for (let filter of this.elemhideFilters) 1234 for (let filter of this.elemhideFilters)
955 { 1235 {
956 let result = convertElemHideFilter(filter, this.elemhideSelectorExceptions); 1236 let result = convertElemHideFilter(filter, this.elemhideSelectorExceptions);
957 if (!result) 1237 if (!result)
958 continue; 1238 continue;
959 1239
960 if (result.matchDomains.length == 0) 1240 if (result.matchDomains.length == 0)
961 result.matchDomains = ["^https?://"]; 1241 {
962 1242 genericSelectors.push(result.selector);
963 for (let matchDomain of result.matchDomains) 1243 }
964 { 1244 else
965 let group = groupedElemhideFilters.get(matchDomain) || []; 1245 {
966 group.push(result.selector); 1246 for (let matchDomain of result.matchDomains)
967 groupedElemhideFilters.set(matchDomain, group); 1247 {
968 } 1248 let group = groupedElemhideFilters.get(matchDomain) || [];
969 } 1249 group.push(result.selector);
1250 groupedElemhideFilters.set(matchDomain, group);
1251 }
1252 }
1253 }
1254
1255 // Separate out the element hiding exceptions that have only a hostname part
1256 // from the rest. This allows us to implement a workaround for issue #5345
1257 // (WebKit bug #167423), but as a bonus it also reduces the number of
1258 // generated rules. The downside is that the exception will only apply to the
1259 // top-level document, not to iframes. We have to live with this until the
1260 // WebKit bug is fixed in all supported versions of Safari.
1261 // https://bugs.webkit.org/show_bug.cgi?id=167423
1262 //
1263 // Note that as a result of this workaround we end up with a huge rule set in
1264 // terms of the amount of memory used. This can cause Node.js to throw
1265 // "JavaScript heap out of memory". To avoid this, call Node.js with
1266 // --max_old_space_size=4096
1267 let elemhideExceptionDomains = extractFilterDomains(this.elemhideExceptions);
1268
1269 let genericSelectorExceptionDomains =
1270 extractFilterDomains(this.generichideExceptions);
1271 elemhideExceptionDomains.forEach(name =>
1272 {
1273 genericSelectorExceptionDomains.add(name);
1274 });
1275
1276 addCSSRules(cssRules, genericSelectors, null,
1277 genericSelectorExceptionDomains);
1278
1279 // Filter out whitelisted domains.
1280 elemhideExceptionDomains.forEach(domain =>
1281 groupedElemhideFilters.delete(domain));
970 1282
971 groupedElemhideFilters.forEach((selectors, matchDomain) => 1283 groupedElemhideFilters.forEach((selectors, matchDomain) =>
972 { 1284 {
973 while (selectors.length) 1285 addCSSRules(cssRules, selectors, matchDomain, elemhideExceptionDomains);
974 {
975 let selector = selectors.splice(0, selectorLimit).join(", ");
976
977 // As of Safari 9.0 element IDs are matched as lowercase. We work around
978 // this by converting to the attribute format [id="elementID"]
979 selector = convertIDSelectorsToAttributeSelectors(selector);
980
981 cssRules.push({
982 trigger: {"url-filter": matchDomain,
983 "url-filter-is-case-sensitive": true},
984 action: {type: "css-display-none",
985 selector: selector}
986 });
987 }
988 }); 1286 });
989 1287
990 for (let filter of this.elemhideExceptions) 1288 let requestFilterExceptionDomains = [];
991 { 1289 for (let filter of this.genericblockExceptions)
992 convertFilterAddRules(cssExceptionRules, filter, 1290 {
993 "ignore-previous-rules", false); 1291 let parsed = parseFilterRegexpSource(filter.regexpSource);
1292 if (parsed.hostname)
1293 requestFilterExceptionDomains.push(parsed.hostname);
994 } 1294 }
995 1295
996 for (let filter of this.requestFilters) 1296 for (let filter of this.requestFilters)
997 convertFilterAddRules(blockingRules, filter, "block", true); 1297 {
1298 convertFilterAddRules(blockingRules, filter, "block", true,
1299 requestFilterExceptionDomains);
1300 }
998 1301
999 for (let filter of this.requestExceptions) 1302 for (let filter of this.requestExceptions)
1000 { 1303 {
1001 convertFilterAddRules(blockingExceptionRules, filter, 1304 convertFilterAddRules(blockingExceptionRules, filter,
1002 "ignore-previous-rules", true); 1305 "ignore-previous-rules", true);
1003 } 1306 }
1004 1307
1005 return async(ruleGroups.map((group, index) => () => 1308 return async(ruleGroups, (group, index) => () =>
1006 { 1309 {
1007 let next = () => 1310 let next = () =>
1008 { 1311 {
1009 if (index == ruleGroups.length - 1) 1312 if (index == ruleGroups.length - 1)
1010 return ruleGroups.reduce((all, rules) => all.concat(rules), []); 1313 return ruleGroups.reduce((all, rules) => all.concat(rules), []);
1011 }; 1314 };
1012 1315
1013 ruleGroups[index] = ruleGroups[index].filter(rule => !hasNonASCI(rule)); 1316 if (this.options.merge == "all" ||
1014 1317 (this.options.merge == "auto" &&
1015 if (this.options.merge) 1318 ruleGroups.reduce((n, group) => n + group.length, 0) > 50000))
1016 { 1319 {
1017 return mergeRules(ruleGroups[index], this.options.exhaustiveMerge) 1320 return mergeRules(ruleGroups[index], this.options.merge == "all")
1018 .then(rules => 1321 .then(rules =>
1019 { 1322 {
1020 ruleGroups[index] = rules; 1323 ruleGroups[index] = rules;
1021 return next(); 1324 return next();
1022 }); 1325 });
1023 } 1326 }
1024 1327
1025 return next(); 1328 return next();
1026 })); 1329 });
1027 }; 1330 };
LEFTRIGHT

Powered by Google App Engine
This is Rietveld