Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Delta Between Two Patch Sets: lib/abp2blocklist.js

Issue 29452289: Issue 5283 - Add support for $websocket and $webrtc (Closed) Base URL: https://hg.adblockplus.org/abp2blocklist
Left Patch Set: Created May 31, 2017, 2:42 a.m.
Right Patch Set: Rebase Created July 13, 2017, 11:41 a.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
Left: Side by side diff | Download
Right: Side by side diff | Download
« no previous file with change/comment | « no previous file | node_modules/filterClasses.js » ('j') | no next file with change/comment »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
LEFTRIGHT
1 /* 1 /*
2 * This file is part of Adblock Plus <https://adblockplus.org/>, 2 * This file is part of Adblock Plus <https://adblockplus.org/>,
3 * Copyright (C) 2006-2017 eyeo GmbH 3 * Copyright (C) 2006-2017 eyeo GmbH
4 * 4 *
5 * Adblock Plus is free software: you can redistribute it and/or modify 5 * Adblock Plus is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 3 as 6 * it under the terms of the GNU General Public License version 3 as
7 * published by the Free Software Foundation. 7 * published by the Free Software Foundation.
8 * 8 *
9 * Adblock Plus is distributed in the hope that it will be useful, 9 * Adblock Plus is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
(...skipping 50 matching lines...) Expand 10 before | Expand all | Expand 10 after
61 return s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); 61 return s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
62 } 62 }
63 63
64 function matchDomain(domain) 64 function matchDomain(domain)
65 { 65 {
66 return "^https?://([^/:]*\\.)?" + escapeRegExp(domain).toLowerCase() + "[/:]"; 66 return "^https?://([^/:]*\\.)?" + escapeRegExp(domain).toLowerCase() + "[/:]";
67 } 67 }
68 68
69 function getURLSchemes(contentType) 69 function getURLSchemes(contentType)
70 { 70 {
71 if (contentType == typeMap.WEBSOCKET) 71 // If the given content type includes all supported URL schemes, simply
72 return ["wss?://"]; 72 // return a single generic URL scheme pattern. This minimizes the size of the
73 73 // generated rule set. The downside to this is that it will also match
74 if (contentType == typeMap.WEBRTC) 74 // schemes that we do not want to match (e.g. "ftp://"), but this can be
75 return ["stuns?:", "turns?:"]; 75 // mitigated by adding exceptions for those schemes.
76 76 if (contentType & typeMap.WEBSOCKET && contentType & typeMap.WEBRTC &&
77 return ["https?://"]; 77 contentType & ~(typeMap.WEBSOCKET | typeMap.WEBRTC))
78 return ["[^:]+:(//)?"];
79
80 let urlSchemes = [];
81
82 if (contentType & typeMap.WEBSOCKET)
83 urlSchemes.push("wss?://");
84
85 if (contentType & typeMap.WEBRTC)
86 urlSchemes.push("stuns?:", "turns?:");
87
88 if (contentType & ~(typeMap.WEBSOCKET | typeMap.WEBRTC))
89 urlSchemes.push("https?://");
90
91 return urlSchemes;
78 } 92 }
79 93
80 function findSubdomainsInList(domain, list) 94 function findSubdomainsInList(domain, list)
81 { 95 {
82 let subdomains = []; 96 let subdomains = [];
83 let suffixLength = domain.length + 1; 97 let suffixLength = domain.length + 1;
84 98
85 for (let name of list) 99 for (let name of list)
86 { 100 {
87 if (name.length > suffixLength && name.slice(-suffixLength) == "." + domain) 101 if (name.length > suffixLength && name.slice(-suffixLength) == "." + domain)
88 subdomains.push(name.slice(0, -suffixLength)); 102 subdomains.push(name.slice(0, -suffixLength));
89 } 103 }
90 104
91 return subdomains; 105 return subdomains;
106 }
107
108 function extractFilterDomains(filters)
109 {
110 let domains = new Set();
111 for (let filter of filters)
112 {
113 let parsed = parseFilterRegexpSource(filter.regexpSource);
114 if (parsed.justHostname)
115 domains.add(parsed.hostname);
116 }
117 return domains;
92 } 118 }
93 119
94 function convertElemHideFilter(filter, elemhideSelectorExceptions) 120 function convertElemHideFilter(filter, elemhideSelectorExceptions)
95 { 121 {
96 let included = []; 122 let included = [];
97 let excluded = []; 123 let excluded = [];
98 let rules = []; 124 let rules = [];
99 125
100 parseDomains(filter.domains, included, excluded); 126 parseDomains(filter.domains, included, excluded);
101 127
(...skipping 14 matching lines...) Expand all
116 * case, a hostname string (or undefined) and a bool 142 * case, a hostname string (or undefined) and a bool
117 * indicating if the source only contains a hostname or not: 143 * indicating if the source only contains a hostname or not:
118 * {regexp: "...", 144 * {regexp: "...",
119 * canSafelyMatchAsLowercase: true/false, 145 * canSafelyMatchAsLowercase: true/false,
120 * hostname: "...", 146 * hostname: "...",
121 * justHostname: true/false} 147 * justHostname: true/false}
122 */ 148 */
123 function parseFilterRegexpSource(text, urlScheme) 149 function parseFilterRegexpSource(text, urlScheme)
124 { 150 {
125 let regexp = []; 151 let regexp = [];
126 let lastIndex = text.length - 1; 152
153 // Convert the text into an array of Unicode characters.
154 //
155 // In the case of surrogate pairs (the smiley emoji, for example), one
156 // Unicode code point is represented by two JavaScript characters together.
157 // We want to iterate over Unicode code points rather than JavaScript
158 // characters.
159 let characters = Array.from(text);
160
161 let lastIndex = characters.length - 1;
127 let hostname; 162 let hostname;
128 let hostnameStart = null; 163 let hostnameStart = null;
129 let hostnameFinished = false; 164 let hostnameFinished = false;
130 let justHostname = false; 165 let justHostname = false;
131 let canSafelyMatchAsLowercase = false; 166 let canSafelyMatchAsLowercase = false;
132 167
133 if (!urlScheme) 168 if (!urlScheme)
134 urlScheme = getURLSchemes()[0]; 169 urlScheme = getURLSchemes()[0];
135 170
136 for (let i = 0; i < text.length; i++) 171 for (let i = 0; i < characters.length; i++)
137 { 172 {
138 let c = text[i]; 173 let c = characters[i];
139 174
140 if (hostnameFinished) 175 if (hostnameFinished)
141 justHostname = false; 176 justHostname = false;
142 177
143 // If we're currently inside the hostname we have to be careful not to 178 // If we're currently inside the hostname we have to be careful not to
144 // escape any characters until after we have converted it to punycode. 179 // escape any characters until after we have converted it to punycode.
145 if (hostnameStart != null && !hostnameFinished) 180 if (hostnameStart != null && !hostnameFinished)
146 { 181 {
147 let endingChar = (c == "*" || c == "^" || 182 let endingChar = (c == "*" || c == "^" ||
148 c == "?" || c == "/" || c == "|"); 183 c == "?" || c == "/" || c == "|");
149 if (!endingChar && i != lastIndex) 184 if (!endingChar && i != lastIndex)
150 continue; 185 continue;
151 186
152 hostname = punycode.toASCII( 187 hostname = punycode.toASCII(
153 text.substring(hostnameStart, endingChar ? i : i + 1) 188 characters.slice(hostnameStart, endingChar ? i : i + 1).join("")
189 .toLowerCase()
154 ); 190 );
155 hostnameFinished = justHostname = true; 191 hostnameFinished = justHostname = true;
156 regexp.push(escapeRegExp(hostname)); 192 regexp.push(escapeRegExp(hostname));
157 if (!endingChar) 193 if (!endingChar)
158 break; 194 break;
159 } 195 }
160 196
161 switch (c) 197 switch (c)
162 { 198 {
163 case "*": 199 case "*":
164 if (regexp.length > 0 && i < lastIndex && text[i + 1] != "*") 200 if (regexp.length > 0 && i < lastIndex && characters[i + 1] != "*")
165 regexp.push(".*"); 201 regexp.push(".*");
166 break; 202 break;
167 case "^": 203 case "^":
168 if (i < lastIndex) 204 let alphabet = "a-z";
169 regexp.push("."); 205 // If justHostname is true and we've encountered a "^", it means we're
206 // still in the hostname part of the URL. Since hostnames are always
207 // lower case (Punycode), there's no need to include "A-Z" in the
208 // pattern. Further, subsequent code may lower-case the entire regular
209 // expression (if the URL contains only the hostname part), leaving us
210 // with "a-za-z", which would be redundant.
211 if (!justHostname)
212 alphabet = "A-Z" + alphabet;
213 let digits = "0-9";
214 // Note that the "-" must appear first here in order to retain its
215 // literal meaning within the brackets.
216 let specialCharacters = "-_.%";
217 let separator = "[^" + specialCharacters + alphabet + digits + "]";
218 if (i == 0)
219 regexp.push("^" + urlScheme + "(.*" + separator + ")?");
220 else if (i == lastIndex)
221 regexp.push("(" + separator + ".*)?$");
222 else
223 regexp.push(separator);
170 break; 224 break;
171 case "|": 225 case "|":
172 if (i == 0) 226 if (i == 0)
173 { 227 {
174 regexp.push("^"); 228 regexp.push("^");
175 break; 229 break;
176 } 230 }
177 if (i == lastIndex) 231 if (i == lastIndex)
178 { 232 {
179 regexp.push("$"); 233 regexp.push("$");
180 break; 234 break;
181 } 235 }
182 if (i == 1 && text[0] == "|") 236 if (i == 1 && characters[0] == "|")
183 { 237 {
184 hostnameStart = i + 1; 238 hostnameStart = i + 1;
185 canSafelyMatchAsLowercase = true; 239 canSafelyMatchAsLowercase = true;
186 regexp.push(urlScheme + "([^/]+\\.)?"); 240 regexp.push(urlScheme + "([^/]+\\.)?");
187 break; 241 break;
188 } 242 }
189 regexp.push("\\|"); 243 regexp.push("\\|");
190 break; 244 break;
191 case "/": 245 case "/":
192 if (!hostnameFinished && 246 if (!hostnameFinished &&
193 text.charAt(i-2) == ":" && text.charAt(i-1) == "/") 247 characters[i - 2] == ":" && characters[i - 1] == "/")
194 { 248 {
195 hostnameStart = i + 1; 249 hostnameStart = i + 1;
196 canSafelyMatchAsLowercase = true; 250 canSafelyMatchAsLowercase = true;
197 } 251 }
198 regexp.push("/"); 252 regexp.push("/");
199 break; 253 break;
200 case ".": case "+": case "$": case "?": 254 case ".": case "+": case "$": case "?":
201 case "{": case "}": case "(": case ")": 255 case "{": case "}": case "(": case ")":
202 case "[": case "]": case "\\": 256 case "[": case "]": case "\\":
203 regexp.push("\\", c); 257 regexp.push("\\", c);
204 break; 258 break;
205 default: 259 default:
206 if (hostnameFinished && (c >= "a" && c <= "z" || 260 if (hostnameFinished && (c >= "a" && c <= "z" ||
207 c >= "A" && c <= "Z")) 261 c >= "A" && c <= "Z"))
208 canSafelyMatchAsLowercase = false; 262 canSafelyMatchAsLowercase = false;
209 regexp.push(c); 263 regexp.push(c == "%" ? c : encodeURI(c));
210 } 264 }
211 } 265 }
212
213 if (regexp.length == 0 || regexp[0] != "^")
214 regexp.unshift("^" + urlScheme + ".*");
215 266
216 return { 267 return {
217 regexp: regexp.join(""), 268 regexp: regexp.join(""),
218 canSafelyMatchAsLowercase: canSafelyMatchAsLowercase, 269 canSafelyMatchAsLowercase: canSafelyMatchAsLowercase,
219 hostname: hostname, 270 hostname: hostname,
220 justHostname: justHostname 271 justHostname: justHostname
221 }; 272 };
222 } 273 }
223 274
224 function getResourceTypes(contentType) 275 function getResourceTypes(contentType)
225 { 276 {
226 let types = []; 277 let types = [];
227 278
228 if (contentType & typeMap.IMAGE) 279 if (contentType & typeMap.IMAGE)
229 types.push("image"); 280 types.push("image");
230 if (contentType & typeMap.STYLESHEET) 281 if (contentType & typeMap.STYLESHEET)
231 types.push("style-sheet"); 282 types.push("style-sheet");
232 if (contentType & typeMap.SCRIPT) 283 if (contentType & typeMap.SCRIPT)
233 types.push("script"); 284 types.push("script");
234 if (contentType & typeMap.FONT) 285 if (contentType & typeMap.FONT)
235 types.push("font"); 286 types.push("font");
236 if (contentType & (typeMap.MEDIA | typeMap.OBJECT)) 287 if (contentType & (typeMap.MEDIA | typeMap.OBJECT))
237 types.push("media"); 288 types.push("media");
238 if (contentType & typeMap.POPUP) 289 if (contentType & typeMap.POPUP)
239 types.push("popup"); 290 types.push("popup");
240 if (contentType & (typeMap.XMLHTTPREQUEST | 291 if (contentType & (typeMap.XMLHTTPREQUEST |
241 typeMap.WEBSOCKET | 292 typeMap.WEBSOCKET |
242 typeMap.WEBRTC | 293 typeMap.WEBRTC |
243 typeMap.OBJECT_SUBREQUEST | 294 typeMap.OBJECT_SUBREQUEST |
244 typeMap.PING | 295 typeMap.PING |
245 typeMap.OTHER)) 296 typeMap.OTHER))
246 { 297 {
247 types.push("raw"); 298 types.push("raw");
248 } 299 }
249 if (contentType & typeMap.SUBDOCUMENT) 300 if (contentType & typeMap.SUBDOCUMENT)
250 types.push("document"); 301 types.push("document");
251 302
252 return types; 303 return types;
253 } 304 }
254 305
306 function makeRuleCopies(trigger, action, urlSchemes)
307 {
308 let copies = [];
309
310 // Always make a deep copy of the rule, since rules may have to be
311 // manipulated individually at a later stage.
312 let stringifiedTrigger = JSON.stringify(trigger);
313
314 let filterPattern = trigger["url-filter"].substring(1);
315 let startIndex = 0;
316
317 // If the URL filter already begins with the first URL scheme pattern, skip
318 // it.
319 if (trigger["url-filter"].startsWith("^" + urlSchemes[0]))
320 {
321 filterPattern = filterPattern.substring(urlSchemes[0].length);
322 startIndex = 1;
323 }
324 else
325 {
326 filterPattern = ".*" + filterPattern;
327 }
328
329 for (let i = startIndex; i < urlSchemes.length; i++)
330 {
331 let copyTrigger = Object.assign(JSON.parse(stringifiedTrigger), {
332 "url-filter": "^" + urlSchemes[i] + filterPattern
333 });
334 copies.push({trigger: copyTrigger, action});
335 }
336
337 return copies;
338 }
339
340 function excludeTopURLFromTrigger(trigger)
341 {
342 trigger["unless-top-url"] = [trigger["url-filter"]];
343 if (trigger["url-filter-is-case-sensitive"])
344 trigger["top-url-filter-is-case-sensitive"] = true;
345 }
346
255 function convertFilterAddRules(rules, filter, action, withResourceTypes, 347 function convertFilterAddRules(rules, filter, action, withResourceTypes,
256 exceptionDomains) 348 exceptionDomains, contentType)
257 { 349 {
258 let contentType = filter.contentType; 350 if (!contentType)
259 351 contentType = filter.contentType;
260 // Support WebSocket and WebRTC only if they're the only option. If we try to 352
261 // support them otherwise (e.g. $xmlhttprequest,websocket,webrtc), we end up 353 // If WebSocket or WebRTC are given along with other options but not
262 // having to generate multiple rules, which bloats the rule set and is not 354 // including all three of WebSocket, WebRTC, and XMLHttpRequest, we must
263 // really necessary in practice. 355 // generate multiple rules. For example, for the filter
264 if ((contentType & typeMap.WEBSOCKET && contentType != typeMap.WEBSOCKET) || 356 // "foo$websocket,image", we must generate one rule with "^wss?://" and "raw"
265 (contentType & typeMap.WEBRTC && contentType != typeMap.WEBRTC)) 357 // and another rule with "^https?://" and "image". If we merge the two, we
266 { 358 // end up blocking requests of type XMLHttpRequest inadvertently.
359 if ((contentType & typeMap.WEBSOCKET && contentType != typeMap.WEBSOCKET &&
360 !(contentType & typeMap.WEBRTC &&
361 contentType & typeMap.XMLHTTPREQUEST)) ||
362 (contentType & typeMap.WEBRTC && contentType != typeMap.WEBRTC &&
363 !(contentType & typeMap.WEBSOCKET &&
364 contentType & typeMap.XMLHTTPREQUEST)))
365 {
366 if (contentType & typeMap.WEBSOCKET)
367 {
368 convertFilterAddRules(rules, filter, action, withResourceTypes,
369 exceptionDomains, typeMap.WEBSOCKET);
370 }
371
372 if (contentType & typeMap.WEBRTC)
373 {
374 convertFilterAddRules(rules, filter, action, withResourceTypes,
375 exceptionDomains, typeMap.WEBRTC);
376 }
377
267 contentType &= ~(typeMap.WEBSOCKET | typeMap.WEBRTC); 378 contentType &= ~(typeMap.WEBSOCKET | typeMap.WEBRTC);
379
380 if (!contentType)
381 return;
268 } 382 }
269 383
270 let urlSchemes = getURLSchemes(contentType); 384 let urlSchemes = getURLSchemes(contentType);
271 let parsed = parseFilterRegexpSource(filter.regexpSource, urlSchemes[0]); 385 let parsed = parseFilterRegexpSource(filter.regexpSource, urlSchemes[0]);
272 386
273 // For the special case of $document whitelisting filters with just a domain 387 // For the special case of $document whitelisting filters with just a domain
274 // we can generate an equivalent blocking rule exception using if-domain. 388 // we can generate an equivalent blocking rule exception using if-domain.
275 if (filter instanceof filterClasses.WhitelistFilter && 389 if (filter instanceof filterClasses.WhitelistFilter &&
276 contentType & typeMap.DOCUMENT && 390 contentType & typeMap.DOCUMENT &&
277 parsed.justHostname) 391 parsed.justHostname)
278 { 392 {
279 rules.push({ 393 rules.push({
280 trigger: { 394 trigger: {
281 "url-filter": ".*", 395 "url-filter": ".*",
282 "if-domain": ["*" + parsed.hostname] 396 "if-domain": ["*" + parsed.hostname]
283 }, 397 },
284 action: {type: "ignore-previous-rules"} 398 action: {type: "ignore-previous-rules"}
285 }); 399 });
286 // If the filter contains other supported options we'll need to generate 400 // If the filter contains other supported options we'll need to generate
287 // further rules for it, but if not we can simply return now. 401 // further rules for it, but if not we can simply return now.
288 if (!(contentType & whitelistableRequestTypes)) 402 if (!(contentType & whitelistableRequestTypes))
289 return; 403 return;
290 } 404 }
291 405
292 let trigger = {"url-filter": parsed.regexp}; 406 let trigger = {"url-filter": parsed.regexp};
293 407
408 // If the URL filter begins with one of the URL schemes for this content
409 // type, we generate additional rules for all the URL scheme patterns;
410 // otherwise, if the start of the URL filter literally matches the first URL
411 // scheme pattern, we just generate additional rules for the remaining URL
412 // scheme patterns.
413 //
414 // For example, "stun:foo$webrtc" will give us "stun:foo", then we add a "^"
415 // in front of this and generate two additional rules for
416 // "^stuns?:.*stun:foo" and "^turns?:.*stun:foo". On the other hand,
417 // "||foo$webrtc" will give us "^stuns?:([^/]+\\.)?foo", so we just generate
418 // "^turns?:([^/]+\\.)?foo" in addition.
419 //
420 // Note that the filter can be already anchored to the beginning
421 // (e.g. "|stun:foo$webrtc"), in which case we do not generate any additional
422 // rules.
423 let needAltRules = trigger["url-filter"][0] != "^" ||
424 trigger["url-filter"].startsWith("^" + urlSchemes[0]);
425
426 if (trigger["url-filter"][0] != "^")
427 {
428 if (!urlSchemes.some(scheme => new RegExp("^" + scheme)
429 .test(trigger["url-filter"])))
430 {
431 trigger["url-filter"] = urlSchemes[0] + ".*" + trigger["url-filter"];
432 }
433
434 trigger["url-filter"] = "^" + trigger["url-filter"];
435 }
436
294 // For rules containing only a hostname we know that we're matching against 437 // For rules containing only a hostname we know that we're matching against
295 // a lowercase string unless the matchCase option was passed. 438 // a lowercase string unless the matchCase option was passed.
296 if (parsed.canSafelyMatchAsLowercase && !filter.matchCase) 439 if (parsed.canSafelyMatchAsLowercase && !filter.matchCase)
297 trigger["url-filter"] = trigger["url-filter"].toLowerCase(); 440 trigger["url-filter"] = trigger["url-filter"].toLowerCase();
298 441
299 if (parsed.canSafelyMatchAsLowercase || filter.matchCase) 442 if (parsed.canSafelyMatchAsLowercase || filter.matchCase)
300 trigger["url-filter-is-case-sensitive"] = true; 443 trigger["url-filter-is-case-sensitive"] = true;
301 444
302 let included = []; 445 let included = [];
303 let excluded = []; 446 let excluded = [];
304 447
305 parseDomains(filter.domains, included, excluded); 448 parseDomains(filter.domains, included, excluded);
306 449
307 if (exceptionDomains) 450 if (exceptionDomains)
308 excluded = excluded.concat(exceptionDomains); 451 excluded = excluded.concat(exceptionDomains);
309 452
310 if (withResourceTypes) 453 if (withResourceTypes)
311 { 454 {
312 trigger["resource-type"] = getResourceTypes(contentType); 455 let resourceTypes = getResourceTypes(contentType);
313 456
314 if (trigger["resource-type"].length == 0) 457 // Content blocker rules can't differentiate between sub-document requests
458 // (iframes) and top-level document requests. To avoid too many false
459 // positives, we prevent rules with no hostname part from blocking document
460 // requests.
461 //
462 // Once Safari 11 becomes our minimum supported version, we could change
463 // our approach here to use the new "unless-top-url" property instead.
464 if (filter instanceof filterClasses.BlockingFilter && !parsed.hostname)
465 resourceTypes = resourceTypes.filter(type => type != "document");
466
467 if (resourceTypes.length == 0)
315 return; 468 return;
469
470 trigger["resource-type"] = resourceTypes;
316 } 471 }
317 472
318 if (filter.thirdParty != null) 473 if (filter.thirdParty != null)
319 trigger["load-type"] = [filter.thirdParty ? "third-party" : "first-party"]; 474 trigger["load-type"] = [filter.thirdParty ? "third-party" : "first-party"];
475
476 let addTopLevelException = false;
320 477
321 if (included.length > 0) 478 if (included.length > 0)
322 { 479 {
323 trigger["if-domain"] = []; 480 trigger["if-domain"] = [];
324 481
325 for (let name of included) 482 for (let name of included)
326 { 483 {
327 // If this is a blocking filter or an element hiding filter, add the 484 // If this is a blocking filter or an element hiding filter, add the
328 // subdomain wildcard only if no subdomains have been excluded. 485 // subdomain wildcard only if no subdomains have been excluded.
329 let notSubdomains = null; 486 let notSubdomains = null;
(...skipping 10 matching lines...) Expand all
340 else 497 else
341 { 498 {
342 trigger["if-domain"].push("*" + name); 499 trigger["if-domain"].push("*" + name);
343 } 500 }
344 } 501 }
345 } 502 }
346 else if (excluded.length > 0) 503 else if (excluded.length > 0)
347 { 504 {
348 trigger["unless-domain"] = excluded.map(name => "*" + name); 505 trigger["unless-domain"] = excluded.map(name => "*" + name);
349 } 506 }
507 else if (filter instanceof filterClasses.BlockingFilter &&
508 filter.contentType & typeMap.SUBDOCUMENT && parsed.hostname)
509 {
510 // Rules with a hostname part are still allowed to block document requests,
511 // but we add an exception for top-level documents.
512 //
513 // Note that we can only do this if there's no "unless-domain" property for
514 // now. This also only works in Safari 11 onwards, while older versions
515 // simply ignore this property. Once Safari 11 becomes our minimum
516 // supported version, we can merge "unless-domain" into "unless-top-url".
517 addTopLevelException = true;
518 excludeTopURLFromTrigger(trigger);
519 }
350 520
351 rules.push({trigger: trigger, action: {type: action}}); 521 rules.push({trigger: trigger, action: {type: action}});
352 522
353 // Generate additional rules for any alternative URL schemes. 523 if (needAltRules)
354 if (urlSchemes.length > 1 && 524 {
355 trigger["url-filter"].startsWith("^" + urlSchemes[0])) 525 // Generate additional rules for any alternative URL schemes.
356 { 526 for (let altRule of makeRuleCopies(trigger, {type: action}, urlSchemes))
357 // Always make a deep copy of the rule, since rules may have to be 527 {
358 // manipulated individually at a later stage. 528 if (addTopLevelException)
359 let stringifiedTrigger = JSON.stringify(trigger); 529 excludeTopURLFromTrigger(altRule.trigger);
360 530
361 for (let i = 1; i < urlSchemes.length; i++) 531 rules.push(altRule);
362 { 532 }
363 let altTrigger = Object.assign(JSON.parse(stringifiedTrigger), { 533 }
364 "url-filter": "^" + urlSchemes[i] +
365 trigger["url-filter"].substring(urlSchemes[0].length + 1)
366 });
367 rules.push({trigger: altTrigger, action: {type: action}});
368 }
369 }
370 }
371
372 function hasNonASCI(obj)
373 {
374 if (typeof obj == "string")
375 {
376 if (/[^\x00-\x7F]/.test(obj))
377 return true;
378 }
379
380 if (typeof obj == "object")
381 {
382 if (obj instanceof Array)
383 for (let item of obj)
384 if (hasNonASCI(item))
385 return true;
386
387 let names = Object.getOwnPropertyNames(obj);
388 for (let name of names)
389 if (hasNonASCI(obj[name]))
390 return true;
391 }
392
393 return false;
394 } 534 }
395 535
396 function convertIDSelectorsToAttributeSelectors(selector) 536 function convertIDSelectorsToAttributeSelectors(selector)
397 { 537 {
398 // First we figure out where all the IDs are 538 // First we figure out where all the IDs are
399 let sep = ""; 539 let sep = "";
400 let start = null; 540 let start = null;
401 let positions = []; 541 let positions = [];
402 for (let i = 0; i < selector.length; i++) 542 for (let i = 0; i < selector.length; i++)
403 { 543 {
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after
437 { 577 {
438 newSelector.push(selector.substring(i, pos.start)); 578 newSelector.push(selector.substring(i, pos.start));
439 newSelector.push('[id=', selector.substring(pos.start + 1, pos.end), ']'); 579 newSelector.push('[id=', selector.substring(pos.start + 1, pos.end), ']');
440 i = pos.end; 580 i = pos.end;
441 } 581 }
442 newSelector.push(selector.substring(i)); 582 newSelector.push(selector.substring(i));
443 583
444 return newSelector.join(""); 584 return newSelector.join("");
445 } 585 }
446 586
447 function addCSSRules(rules, selectors, matchDomain) 587 function addCSSRules(rules, selectors, matchDomain, exceptionDomains)
448 { 588 {
589 let unlessDomain = exceptionDomains.size > 0 ? [] : null;
590
591 exceptionDomains.forEach(name => unlessDomain.push("*" + name));
592
449 while (selectors.length) 593 while (selectors.length)
450 { 594 {
451 let selector = selectors.splice(0, selectorLimit).join(", "); 595 let selector = selectors.splice(0, selectorLimit).join(", ");
452 596
453 // As of Safari 9.0 element IDs are matched as lowercase. We work around 597 // As of Safari 9.0 element IDs are matched as lowercase. We work around
454 // this by converting to the attribute format [id="elementID"] 598 // this by converting to the attribute format [id="elementID"]
455 selector = convertIDSelectorsToAttributeSelectors(selector); 599 selector = convertIDSelectorsToAttributeSelectors(selector);
456 600
457 rules.push({ 601 let rule = {
458 trigger: {"url-filter": matchDomain, 602 trigger: {"url-filter": matchDomain,
459 "url-filter-is-case-sensitive": true}, 603 "url-filter-is-case-sensitive": true},
460 action: {type: "css-display-none", 604 action: {type: "css-display-none",
461 selector: selector} 605 selector: selector}
462 }); 606 };
607
608 if (unlessDomain)
609 rule.trigger["unless-domain"] = unlessDomain;
610
611 rules.push(rule);
463 } 612 }
464 } 613 }
465 614
466 let ContentBlockerList = 615 let ContentBlockerList =
467 /** 616 /**
468 * Create a new Adblock Plus filter to content blocker list converter 617 * Create a new Adblock Plus filter to content blocker list converter
469 * 618 *
470 * @constructor 619 * @constructor
471 */ 620 */
472 exports.ContentBlockerList = function () 621 exports.ContentBlockerList = function ()
(...skipping 76 matching lines...) Expand 10 before | Expand all | Expand 10 after
549 { 698 {
550 for (let matchDomain of result.matchDomains) 699 for (let matchDomain of result.matchDomains)
551 { 700 {
552 let group = groupedElemhideFilters.get(matchDomain) || []; 701 let group = groupedElemhideFilters.get(matchDomain) || [];
553 group.push(result.selector); 702 group.push(result.selector);
554 groupedElemhideFilters.set(matchDomain, group); 703 groupedElemhideFilters.set(matchDomain, group);
555 } 704 }
556 } 705 }
557 } 706 }
558 707
559 addCSSRules(rules, genericSelectors, "^https?://"); 708 // Separate out the element hiding exceptions that have only a hostname part
560 709 // from the rest. This allows us to implement a workaround for issue #5345
561 // Right after the generic element hiding filters, add the exceptions that 710 // (WebKit bug #167423), but as a bonus it also reduces the number of
562 // should apply only to those filters. 711 // generated rules. The downside is that the exception will only apply to the
563 for (let filter of this.generichideExceptions) 712 // top-level document, not to iframes. We have to live with this until the
564 convertFilterAddRules(rules, filter, "ignore-previous-rules", false); 713 // WebKit bug is fixed in all supported versions of Safari.
714 // https://bugs.webkit.org/show_bug.cgi?id=167423
715 //
716 // Note that as a result of this workaround we end up with a huge rule set in
717 // terms of the amount of memory used. This can cause Node.js to throw
718 // "JavaScript heap out of memory". To avoid this, call Node.js with
719 // --max_old_space_size=4096
720 let elemhideExceptionDomains = extractFilterDomains(this.elemhideExceptions);
721
722 let genericSelectorExceptionDomains =
723 extractFilterDomains(this.generichideExceptions);
724 elemhideExceptionDomains.forEach(name =>
725 {
726 genericSelectorExceptionDomains.add(name);
727 });
728
729 addCSSRules(rules, genericSelectors, "^https?://",
730 genericSelectorExceptionDomains);
565 731
566 groupedElemhideFilters.forEach((selectors, matchDomain) => 732 groupedElemhideFilters.forEach((selectors, matchDomain) =>
567 { 733 {
568 addCSSRules(rules, selectors, matchDomain); 734 addCSSRules(rules, selectors, matchDomain, elemhideExceptionDomains);
569 }); 735 });
570
571 for (let filter of this.elemhideExceptions)
572 convertFilterAddRules(rules, filter, "ignore-previous-rules", false);
573 736
574 let requestFilterExceptionDomains = []; 737 let requestFilterExceptionDomains = [];
575 for (let filter of this.genericblockExceptions) 738 for (let filter of this.genericblockExceptions)
576 { 739 {
577 let parsed = parseFilterRegexpSource(filter.regexpSource); 740 let parsed = parseFilterRegexpSource(filter.regexpSource);
578 if (parsed.hostname) 741 if (parsed.hostname)
579 requestFilterExceptionDomains.push(parsed.hostname); 742 requestFilterExceptionDomains.push(parsed.hostname);
580 } 743 }
581 744
582 for (let filter of this.requestFilters) 745 for (let filter of this.requestFilters)
583 { 746 {
584 convertFilterAddRules(rules, filter, "block", true, 747 convertFilterAddRules(rules, filter, "block", true,
585 requestFilterExceptionDomains); 748 requestFilterExceptionDomains);
586 } 749 }
587 750
588 for (let filter of this.requestExceptions) 751 for (let filter of this.requestExceptions)
589 convertFilterAddRules(rules, filter, "ignore-previous-rules", true); 752 convertFilterAddRules(rules, filter, "ignore-previous-rules", true);
590 753
591 return rules.filter(rule => !hasNonASCI(rule)); 754 return rules;
592 }; 755 };
LEFTRIGHT

Powered by Google App Engine
This is Rietveld