Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Side by Side Diff: lib/abp2blocklist.js

Issue 29473555: Issue 5345 - Whitelist $elemhide and $generichide domains where possible (Closed) Base URL: https://hg.adblockplus.org/abp2blocklist
Patch Set: Created June 24, 2017, 2:48 p.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 * This file is part of Adblock Plus <https://adblockplus.org/>, 2 * This file is part of Adblock Plus <https://adblockplus.org/>,
3 * Copyright (C) 2006-2017 eyeo GmbH 3 * Copyright (C) 2006-2017 eyeo GmbH
4 * 4 *
5 * Adblock Plus is free software: you can redistribute it and/or modify 5 * Adblock Plus is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 3 as 6 * it under the terms of the GNU General Public License version 3 as
7 * published by the Free Software Foundation. 7 * published by the Free Software Foundation.
8 * 8 *
9 * Adblock Plus is distributed in the hope that it will be useful, 9 * Adblock Plus is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
(...skipping 60 matching lines...) Expand 10 before | Expand all | Expand 10 after
71 71
72 for (let name of list) 72 for (let name of list)
73 { 73 {
74 if (name.length > suffixLength && name.slice(-suffixLength) == "." + domain) 74 if (name.length > suffixLength && name.slice(-suffixLength) == "." + domain)
75 subdomains.push(name.slice(0, -suffixLength)); 75 subdomains.push(name.slice(0, -suffixLength));
76 } 76 }
77 77
78 return subdomains; 78 return subdomains;
79 } 79 }
80 80
81 function extractFilterDomains(filters)
82 {
83 let domains = [];
84 for (let filter of filters)
85 {
86 let parsed = parseFilterRegexpSource(filter.regexpSource);
87 if (parsed.justHostname)
88 domains.push(parsed.hostname);
89 }
90 return domains;
kzar 2017/07/07 11:40:13 Why not make domains a Set instead of an Array her
Manish Jethani 2017/07/08 05:33:59 That's a good point, it does seem to make a huge d
91 }
92
81 function convertElemHideFilter(filter, elemhideSelectorExceptions) 93 function convertElemHideFilter(filter, elemhideSelectorExceptions)
82 { 94 {
83 let included = []; 95 let included = [];
84 let excluded = []; 96 let excluded = [];
85 let rules = []; 97 let rules = [];
86 98
87 parseDomains(filter.domains, included, excluded); 99 parseDomains(filter.domains, included, excluded);
88 100
89 if (excluded.length == 0 && !(filter.selector in elemhideSelectorExceptions)) 101 if (excluded.length == 0 && !(filter.selector in elemhideSelectorExceptions))
90 return {matchDomains: included.map(matchDomain), selector: filter.selector}; 102 return {matchDomains: included.map(matchDomain), selector: filter.selector};
(...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after
126 // If we're currently inside the hostname we have to be careful not to 138 // If we're currently inside the hostname we have to be careful not to
127 // escape any characters until after we have converted it to punycode. 139 // escape any characters until after we have converted it to punycode.
128 if (hostnameStart != null && !hostnameFinished) 140 if (hostnameStart != null && !hostnameFinished)
129 { 141 {
130 let endingChar = (c == "*" || c == "^" || 142 let endingChar = (c == "*" || c == "^" ||
131 c == "?" || c == "/" || c == "|"); 143 c == "?" || c == "/" || c == "|");
132 if (!endingChar && i != lastIndex) 144 if (!endingChar && i != lastIndex)
133 continue; 145 continue;
134 146
135 hostname = punycode.toASCII( 147 hostname = punycode.toASCII(
136 text.substring(hostnameStart, endingChar ? i : i + 1) 148 text.substring(hostnameStart, endingChar ? i : i + 1).toLowerCase()
Manish Jethani 2017/06/24 14:54:15 punycode.toASCII doesn't lower-case the string, we
137 ); 149 );
138 hostnameFinished = justHostname = true; 150 hostnameFinished = justHostname = true;
139 regexp.push(escapeRegExp(hostname)); 151 regexp.push(escapeRegExp(hostname));
140 if (!endingChar) 152 if (!endingChar)
141 break; 153 break;
142 } 154 }
143 155
144 switch (c) 156 switch (c)
145 { 157 {
146 case "*": 158 case "*":
(...skipping 246 matching lines...) Expand 10 before | Expand all | Expand 10 after
393 { 405 {
394 newSelector.push(selector.substring(i, pos.start)); 406 newSelector.push(selector.substring(i, pos.start));
395 newSelector.push('[id=', selector.substring(pos.start + 1, pos.end), ']'); 407 newSelector.push('[id=', selector.substring(pos.start + 1, pos.end), ']');
396 i = pos.end; 408 i = pos.end;
397 } 409 }
398 newSelector.push(selector.substring(i)); 410 newSelector.push(selector.substring(i));
399 411
400 return newSelector.join(""); 412 return newSelector.join("");
401 } 413 }
402 414
403 function addCSSRules(rules, selectors, matchDomain) 415 function addCSSRules(rules, selectors, matchDomain, exceptionDomains)
404 { 416 {
417 exceptionDomains = Array.from(new Set(exceptionDomains));
Manish Jethani 2017/06/24 14:54:15 Ensure no duplicates.
418
405 while (selectors.length) 419 while (selectors.length)
406 { 420 {
407 let selector = selectors.splice(0, selectorLimit).join(", "); 421 let selector = selectors.splice(0, selectorLimit).join(", ");
408 422
409 // As of Safari 9.0 element IDs are matched as lowercase. We work around 423 // As of Safari 9.0 element IDs are matched as lowercase. We work around
410 // this by converting to the attribute format [id="elementID"] 424 // this by converting to the attribute format [id="elementID"]
411 selector = convertIDSelectorsToAttributeSelectors(selector); 425 selector = convertIDSelectorsToAttributeSelectors(selector);
412 426
413 rules.push({ 427 let rule = {
414 trigger: {"url-filter": matchDomain, 428 trigger: {"url-filter": matchDomain,
415 "url-filter-is-case-sensitive": true}, 429 "url-filter-is-case-sensitive": true},
416 action: {type: "css-display-none", 430 action: {type: "css-display-none",
417 selector: selector} 431 selector: selector}
418 }); 432 };
433
434 if (exceptionDomains.length > 0)
435 rule.trigger["unless-domain"] = exceptionDomains.map(name => "*" + name);
kzar 2017/07/07 11:40:13 Maybe we should do this work outside of the while
Manish Jethani 2017/07/08 05:33:59 We have to make a copy of the array as a rule, bec
kzar 2017/07/10 12:33:07 I'd rather you did the work outside the loop here
Manish Jethani 2017/07/11 11:19:18 Done.
436
437 rules.push(rule);
419 } 438 }
420 } 439 }
421 440
422 let ContentBlockerList = 441 let ContentBlockerList =
423 /** 442 /**
424 * Create a new Adblock Plus filter to content blocker list converter 443 * Create a new Adblock Plus filter to content blocker list converter
425 * 444 *
426 * @constructor 445 * @constructor
427 */ 446 */
428 exports.ContentBlockerList = function () 447 exports.ContentBlockerList = function ()
(...skipping 76 matching lines...) Expand 10 before | Expand all | Expand 10 after
505 { 524 {
506 for (let matchDomain of result.matchDomains) 525 for (let matchDomain of result.matchDomains)
507 { 526 {
508 let group = groupedElemhideFilters.get(matchDomain) || []; 527 let group = groupedElemhideFilters.get(matchDomain) || [];
509 group.push(result.selector); 528 group.push(result.selector);
510 groupedElemhideFilters.set(matchDomain, group); 529 groupedElemhideFilters.set(matchDomain, group);
511 } 530 }
512 } 531 }
513 } 532 }
514 533
515 addCSSRules(rules, genericSelectors, "^https?://"); 534 // Separate out the element hiding exceptions that have only a hostname part
535 // from the rest. This allows us to implement a workaround for issue #5345
536 // (WebKit bug #167423), but as a bonus it also reduces the number of
kzar 2017/07/07 11:40:13 Mind giving the full URL for the WebKit bug?
Manish Jethani 2017/07/08 05:33:59 Done. By the way, there are new comments on that
kzar 2017/07/10 12:33:08 Acknowledged.
537 // generated rules. The downside is that the exception will only apply to the
538 // top-level document, not to iframes. We have to live with this until the
539 // WebKit bug is fixed in all supported versions of Safari.
540 //
541 // Note that as a result of this workaround we end up with a huge rule set in
542 // terms of the amount of memory used. This can cause Node.js to throw
kzar 2017/07/07 11:40:13 Have you tested that rule generation still works O
Manish Jethani 2017/07/08 05:33:59 I tested it there and it works without problems.
kzar 2017/07/10 12:33:07 Acknowledged.
543 // "JavaScript heap out of memory". To avoid this, call Node.js with
544 // --max_old_space_size=4096
545 let generichideExceptionDomains =
546 extractFilterDomains(this.generichideExceptions);
547 let elemhideExceptionDomains = extractFilterDomains(this.elemhideExceptions);
516 548
517 // Right after the generic element hiding filters, add the exceptions that 549 addCSSRules(rules, genericSelectors, "^https?://",
Manish Jethani 2017/06/24 14:54:15 We could continue generating individual rules for
kzar 2017/07/07 11:40:13 Maybe add a comment explaining what needs to chang
518 // should apply only to those filters. 550 generichideExceptionDomains.concat(elemhideExceptionDomains));
519 for (let filter of this.generichideExceptions)
520 convertFilterAddRules(rules, filter, "ignore-previous-rules", false);
521 551
522 groupedElemhideFilters.forEach((selectors, matchDomain) => 552 groupedElemhideFilters.forEach((selectors, matchDomain) =>
523 { 553 {
524 addCSSRules(rules, selectors, matchDomain); 554 addCSSRules(rules, selectors, matchDomain, elemhideExceptionDomains);
525 }); 555 });
526 556
527 for (let filter of this.elemhideExceptions)
528 convertFilterAddRules(rules, filter, "ignore-previous-rules", false);
529
530 let requestFilterExceptionDomains = []; 557 let requestFilterExceptionDomains = [];
531 for (let filter of this.genericblockExceptions) 558 for (let filter of this.genericblockExceptions)
532 { 559 {
533 let parsed = parseFilterRegexpSource(filter.regexpSource); 560 let parsed = parseFilterRegexpSource(filter.regexpSource);
534 if (parsed.hostname) 561 if (parsed.hostname)
535 requestFilterExceptionDomains.push(parsed.hostname); 562 requestFilterExceptionDomains.push(parsed.hostname);
536 } 563 }
537 564
538 for (let filter of this.requestFilters) 565 for (let filter of this.requestFilters)
539 { 566 {
540 convertFilterAddRules(rules, filter, "block", true, 567 convertFilterAddRules(rules, filter, "block", true,
541 requestFilterExceptionDomains); 568 requestFilterExceptionDomains);
542 } 569 }
543 570
544 for (let filter of this.requestExceptions) 571 for (let filter of this.requestExceptions)
545 convertFilterAddRules(rules, filter, "ignore-previous-rules", true); 572 convertFilterAddRules(rules, filter, "ignore-previous-rules", true);
546 573
547 return rules.filter(rule => !hasNonASCI(rule)); 574 return rules.filter(rule => !hasNonASCI(rule));
548 }; 575 };
OLDNEW
« abp2blocklist.js ('K') | « abp2blocklist.js ('k') | test/abp2blocklist.js » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld