Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Delta Between Two Patch Sets: lib/abp2blocklist.js

Issue 29340694: Issue 3956 - Convert domain whitelisting filters (Closed)
Left Patch Set: Created April 20, 2016, 5:09 p.m.
Right Patch Set: Fix whitelisting request type logic Created May 17, 2016, 11:22 a.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
Left: Side by side diff | Download
Right: Side by side diff | Download
« no previous file with change/comment | « no previous file | no next file » | no next file with change/comment »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
LEFTRIGHT
1 /* 1 /*
2 * This file is part of Adblock Plus <https://adblockplus.org/>, 2 * This file is part of Adblock Plus <https://adblockplus.org/>,
3 * Copyright (C) 2006-2016 Eyeo GmbH 3 * Copyright (C) 2006-2016 Eyeo GmbH
4 * 4 *
5 * Adblock Plus is free software: you can redistribute it and/or modify 5 * Adblock Plus is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 3 as 6 * it under the terms of the GNU General Public License version 3 as
7 * published by the Free Software Foundation. 7 * published by the Free Software Foundation.
8 * 8 *
9 * Adblock Plus is distributed in the hope that it will be useful, 9 * Adblock Plus is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details. 12 * GNU General Public License for more details.
13 * 13 *
14 * You should have received a copy of the GNU General Public License 14 * You should have received a copy of the GNU General Public License
15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. 15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.
16 */ 16 */
17 17
18 /** @module abp2blocklist */ 18 /** @module abp2blocklist */
19 19
20 "use strict"; 20 "use strict";
21 21
22 let filterClasses = require("filterClasses"); 22 let filterClasses = require("filterClasses");
23 let tldjs = require("tldjs"); 23 let tldjs = require("tldjs");
24 let punycode = require("punycode"); 24 let punycode = require("punycode");
25 25
26 const selectorLimit = 5000; 26 const selectorLimit = 5000;
27 const typeMap = filterClasses.RegExpFilter.typeMap; 27 const typeMap = filterClasses.RegExpFilter.typeMap;
28 const whitelistableRequestTypes = (typeMap.IMAGE
29 | typeMap.STYLESHEET
30 | typeMap.SCRIPT
31 | typeMap.FONT
32 | typeMap.MEDIA
33 | typeMap.POPUP
34 | typeMap.OBJECT
35 | typeMap.OBJECT_SUBREQUEST
36 | typeMap.XMLHTTPREQUEST
37 | typeMap.PING
38 | typeMap.SUBDOCUMENT
39 | typeMap.OTHER);
28 40
29 function parseDomains(domains, included, excluded) 41 function parseDomains(domains, included, excluded)
30 { 42 {
31 for (let domain in domains) 43 for (let domain in domains)
32 { 44 {
33 if (domain != "") 45 if (domain != "")
34 { 46 {
35 let enabled = domains[domain]; 47 let enabled = domains[domain];
36 domain = punycode.toASCII(domain.toLowerCase()); 48 domain = punycode.toASCII(domain.toLowerCase());
37 49
(...skipping 173 matching lines...) Expand 10 before | Expand all | Expand 10 after
211 { 223 {
212 result.push(domain); 224 result.push(domain);
213 225
214 if (tldjs.getDomain(domain) == domain) 226 if (tldjs.getDomain(domain) == domain)
215 result.push("www." + domain); 227 result.push("www." + domain);
216 } 228 }
217 229
218 return result; 230 return result;
219 } 231 }
220 232
221 function convertFilter(filter, action, withResourceTypes) 233 function convertFilterAddRules(rules, filter, action, withResourceTypes)
222 { 234 {
223 let parsed = parseFilterRegexpSource(filter.regexpSource); 235 let parsed = parseFilterRegexpSource(filter.regexpSource);
224 236
225 // For the special case of $document whitelisting filters with just a domain 237 // For the special case of $document whitelisting filters with just a domain
226 // we can generate an equivalent blocking rule exception using if-domain. 238 // we can generate an equivalent blocking rule exception using if-domain.
227 if (filter.contentType == typeMap.DOCUMENT && parsed.justHostname) 239 if (filter instanceof filterClasses.WhitelistFilter &&
Sebastian Noack 2016/05/12 12:12:25 For filters like example.com$document,image we wou
kzar 2016/05/16 16:22:36 Done.
228 return {trigger: {"url-filter": ".*", 240 filter.contentType & typeMap.DOCUMENT &&
Sebastian Noack 2016/05/12 12:12:26 Nit: Mind wrapping the nested object for better re
Sebastian Noack 2016/05/12 12:12:26 Wouldn't an empty string be sufficient as url-filt
kzar 2016/05/16 16:22:36 Done.
kzar 2016/05/16 16:22:36 Unfortunately this causes a "Extension compilation
229 "if-domain": addDomainPrefix([parsed.hostname])}, 241 parsed.justHostname)
230 action: {type: "ignore-previous-rules"}}; 242 {
243 rules.push({
244 trigger: {
245 "url-filter": ".*",
246 "if-domain": addDomainPrefix([parsed.hostname])
247 },
248 action: {type: "ignore-previous-rules"}
249 });
250 // If the filter contains other supported options we'll need to generate
251 // further rules for it, but if not we can simply return now.
252 if (!(filter.contentType | whitelistableRequestTypes))
253 return;
254 }
231 255
232 let trigger = {"url-filter": parsed.regexp}; 256 let trigger = {"url-filter": parsed.regexp};
233 257
234 // Limit rules to to HTTP(S) URLs 258 // Limit rules to HTTP(S) URLs
Sebastian Noack 2016/05/12 12:12:26 Typo: to to
kzar 2016/05/16 16:22:36 Done.
235 if (!/^(\^|http)/i.test(trigger["url-filter"])) 259 if (!/^(\^|http)/i.test(trigger["url-filter"]))
236 trigger["url-filter"] = "^https?://.*" + trigger["url-filter"]; 260 trigger["url-filter"] = "^https?://.*" + trigger["url-filter"];
237 261
238 // For rules containing only a hostname we know that we're matching against 262 // For rules containing only a hostname we know that we're matching against
239 // a lowercase string unless the matchCase option was passed. 263 // a lowercase string unless the matchCase option was passed.
240 if (parsed.canSafelyMatchAsLowercase && !filter.matchCase) 264 if (parsed.canSafelyMatchAsLowercase && !filter.matchCase)
241 trigger["url-filter"] = trigger["url-filter"].toLowerCase(); 265 trigger["url-filter"] = trigger["url-filter"].toLowerCase();
242 266
243 if (parsed.canSafelyMatchAsLowercase || filter.matchCase) 267 if (parsed.canSafelyMatchAsLowercase || filter.matchCase)
244 trigger["url-filter-is-case-sensitive"] = true; 268 trigger["url-filter-is-case-sensitive"] = true;
245 269
246 let included = []; 270 let included = [];
247 let excluded = []; 271 let excluded = [];
248 272
249 parseDomains(filter.domains, included, excluded); 273 parseDomains(filter.domains, included, excluded);
250 274
251 if (withResourceTypes) 275 if (withResourceTypes)
252 trigger["resource-type"] = getResourceTypes(filter); 276 trigger["resource-type"] = getResourceTypes(filter);
253 if (filter.thirdParty != null) 277 if (filter.thirdParty != null)
254 trigger["load-type"] = [filter.thirdParty ? "third-party" : "first-party"]; 278 trigger["load-type"] = [filter.thirdParty ? "third-party" : "first-party"];
255 279
256 if (included.length > 0) 280 if (included.length > 0)
257 trigger["if-domain"] = addDomainPrefix(included); 281 trigger["if-domain"] = addDomainPrefix(included);
258 else if (excluded.length > 0) 282 else if (excluded.length > 0)
259 trigger["unless-domain"] = addDomainPrefix(excluded); 283 trigger["unless-domain"] = addDomainPrefix(excluded);
260 284
261 return {trigger: trigger, action: {type: action}}; 285 rules.push({trigger: trigger, action: {type: action}});
262 } 286 }
263 287
264 function hasNonASCI(obj) 288 function hasNonASCI(obj)
265 { 289 {
266 if (typeof obj == "string") 290 if (typeof obj == "string")
267 { 291 {
268 if (/[^\x00-\x7F]/.test(obj)) 292 if (/[^\x00-\x7F]/.test(obj))
269 return true; 293 return true;
270 } 294 }
271 295
(...skipping 90 matching lines...) Expand 10 before | Expand all | Expand 10 after
362 return; 386 return;
363 if (filter instanceof filterClasses.RegExpFilter && 387 if (filter instanceof filterClasses.RegExpFilter &&
364 filter.regexpSource == null) 388 filter.regexpSource == null)
365 return; 389 return;
366 390
367 if (filter instanceof filterClasses.BlockingFilter) 391 if (filter instanceof filterClasses.BlockingFilter)
368 this.requestFilters.push(filter); 392 this.requestFilters.push(filter);
369 393
370 if (filter instanceof filterClasses.WhitelistFilter) 394 if (filter instanceof filterClasses.WhitelistFilter)
371 { 395 {
372 if (filter.contentType & (typeMap.DOCUMENT 396 if (filter.contentType & (typeMap.DOCUMENT | whitelistableRequestTypes))
373 | typeMap.IMAGE
374 | typeMap.STYLESHEET
375 | typeMap.SCRIPT
376 | typeMap.FONT
377 | typeMap.MEDIA
378 | typeMap.POPUP
379 | typeMap.OBJECT
380 | typeMap.OBJECT_SUBREQUEST
381 | typeMap.XMLHTTPREQUEST
382 | typeMap.PING
383 | typeMap.SUBDOCUMENT
384 | typeMap.OTHER))
385 this.requestExceptions.push(filter); 397 this.requestExceptions.push(filter);
386 398
387 if (filter.contentType & typeMap.ELEMHIDE) 399 if (filter.contentType & typeMap.ELEMHIDE)
388 this.elemhideExceptions.push(filter); 400 this.elemhideExceptions.push(filter);
389 } 401 }
390 402
391 if (filter instanceof filterClasses.ElemHideFilter) 403 if (filter instanceof filterClasses.ElemHideFilter)
392 this.elemhideFilters.push(filter); 404 this.elemhideFilters.push(filter);
393 405
394 if (filter instanceof filterClasses.ElemHideException) 406 if (filter instanceof filterClasses.ElemHideException)
395 { 407 {
396 let domains = this.elemhideSelectorExceptions[filter.selector]; 408 let domains = this.elemhideSelectorExceptions[filter.selector];
397 if (!domains) 409 if (!domains)
398 domains = this.elemhideSelectorExceptions[filter.selector] = []; 410 domains = this.elemhideSelectorExceptions[filter.selector] = [];
399 411
400 parseDomains(filter.domains, domains, []); 412 parseDomains(filter.domains, domains, []);
401 } 413 }
402 }; 414 };
403 415
404 /** 416 /**
405 * Generate content blocker list for all filters that were added 417 * Generate content blocker list for all filters that were added
406 * 418 *
407 * @returns {Filter} filter Filter to convert 419 * @returns {Filter} filter Filter to convert
408 */ 420 */
409 ContentBlockerList.prototype.generateRules = function(filter) 421 ContentBlockerList.prototype.generateRules = function(filter)
410 { 422 {
411 let rules = []; 423 let rules = [];
412 424
413 function addRule(rule)
414 {
415 if (!hasNonASCI(rule))
416 rules.push(rule);
417 }
418
419 let groupedElemhideFilters = new Map(); 425 let groupedElemhideFilters = new Map();
420 for (let filter of this.elemhideFilters) 426 for (let filter of this.elemhideFilters)
421 { 427 {
422 let result = convertElemHideFilter(filter, this.elemhideSelectorExceptions); 428 let result = convertElemHideFilter(filter, this.elemhideSelectorExceptions);
423 if (!result) 429 if (!result)
424 continue; 430 continue;
425 431
426 if (result.matchDomains.length == 0) 432 if (result.matchDomains.length == 0)
427 result.matchDomains = ["^https?://"]; 433 result.matchDomains = ["^https?://"];
428 434
429 for (let matchDomain of result.matchDomains) 435 for (let matchDomain of result.matchDomains)
430 { 436 {
431 let group = groupedElemhideFilters.get(matchDomain) || []; 437 let group = groupedElemhideFilters.get(matchDomain) || [];
432 group.push(result.selector); 438 group.push(result.selector);
433 groupedElemhideFilters.set(matchDomain, group); 439 groupedElemhideFilters.set(matchDomain, group);
434 } 440 }
435 } 441 }
436 442
437 groupedElemhideFilters.forEach((selectors, matchDomain) => 443 groupedElemhideFilters.forEach((selectors, matchDomain) =>
438 { 444 {
439 while (selectors.length) 445 while (selectors.length)
440 { 446 {
441 let selector = selectors.splice(0, selectorLimit).join(", "); 447 let selector = selectors.splice(0, selectorLimit).join(", ");
442 448
443 // As of Safari 9.0 element IDs are matched as lowercase. We work around 449 // As of Safari 9.0 element IDs are matched as lowercase. We work around
444 // this by converting to the attribute format [id="elementID"] 450 // this by converting to the attribute format [id="elementID"]
445 selector = convertIDSelectorsToAttributeSelectors(selector); 451 selector = convertIDSelectorsToAttributeSelectors(selector);
446 452
447 addRule({ 453 rules.push({
448 trigger: {"url-filter": matchDomain, 454 trigger: {"url-filter": matchDomain,
449 "url-filter-is-case-sensitive": true}, 455 "url-filter-is-case-sensitive": true},
450 action: {type: "css-display-none", 456 action: {type: "css-display-none",
451 selector: selector} 457 selector: selector}
452 }); 458 });
453 } 459 }
454 }); 460 });
455 461
456 for (let filter of this.elemhideExceptions) 462 for (let filter of this.elemhideExceptions)
457 addRule(convertFilter(filter, "ignore-previous-rules", false)); 463 convertFilterAddRules(rules, filter, "ignore-previous-rules", false);
458 for (let filter of this.requestFilters) 464 for (let filter of this.requestFilters)
459 addRule(convertFilter(filter, "block", true)); 465 convertFilterAddRules(rules, filter, "block", true);
460 for (let filter of this.requestExceptions) 466 for (let filter of this.requestExceptions)
461 addRule(convertFilter(filter, "ignore-previous-rules", true)); 467 convertFilterAddRules(rules, filter, "ignore-previous-rules", true);
462 468
463 return rules; 469 return rules.filter(rule => !hasNonASCI(rule));
464 }; 470 };
LEFTRIGHT
« no previous file | no next file » | Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Toggle Comments ('s')

Powered by Google App Engine
This is Rietveld