Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Delta Between Two Patch Sets: lib/abp2blocklist.js

Issue 29473555: Issue 5345 - Whitelist $elemhide and $generichide domains where possible (Closed) Base URL: https://hg.adblockplus.org/abp2blocklist
Left Patch Set: Address comments to Patch Set 1 Created July 8, 2017, 5:32 a.m.
Right Patch Set: Rebase Created July 11, 2017, 5:28 p.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
Left: Side by side diff | Download
Right: Side by side diff | Download
« no previous file with change/comment | « abp2blocklist.js ('k') | test/abp2blocklist.js » ('j') | no next file with change/comment »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
LEFTRIGHT
1 /* 1 /*
2 * This file is part of Adblock Plus <https://adblockplus.org/>, 2 * This file is part of Adblock Plus <https://adblockplus.org/>,
3 * Copyright (C) 2006-2017 eyeo GmbH 3 * Copyright (C) 2006-2017 eyeo GmbH
4 * 4 *
5 * Adblock Plus is free software: you can redistribute it and/or modify 5 * Adblock Plus is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 3 as 6 * it under the terms of the GNU General Public License version 3 as
7 * published by the Free Software Foundation. 7 * published by the Free Software Foundation.
8 * 8 *
9 * Adblock Plus is distributed in the hope that it will be useful, 9 * Adblock Plus is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
(...skipping 103 matching lines...) Expand 10 before | Expand all | Expand 10 after
114 * case, a hostname string (or undefined) and a bool 114 * case, a hostname string (or undefined) and a bool
115 * indicating if the source only contains a hostname or not: 115 * indicating if the source only contains a hostname or not:
116 * {regexp: "...", 116 * {regexp: "...",
117 * canSafelyMatchAsLowercase: true/false, 117 * canSafelyMatchAsLowercase: true/false,
118 * hostname: "...", 118 * hostname: "...",
119 * justHostname: true/false} 119 * justHostname: true/false}
120 */ 120 */
121 function parseFilterRegexpSource(text) 121 function parseFilterRegexpSource(text)
122 { 122 {
123 let regexp = []; 123 let regexp = [];
124 let lastIndex = text.length - 1; 124
125 // Convert the text into an array of Unicode characters.
126 //
127 // In the case of surrogate pairs (the smiley emoji, for example), one
128 // Unicode code point is represented by two JavaScript characters together.
129 // We want to iterate over Unicode code points rather than JavaScript
130 // characters.
131 let characters = Array.from(text);
132
133 let lastIndex = characters.length - 1;
125 let hostname; 134 let hostname;
126 let hostnameStart = null; 135 let hostnameStart = null;
127 let hostnameFinished = false; 136 let hostnameFinished = false;
128 let justHostname = false; 137 let justHostname = false;
129 let canSafelyMatchAsLowercase = false; 138 let canSafelyMatchAsLowercase = false;
130 139
131 for (let i = 0; i < text.length; i++) 140 for (let i = 0; i < characters.length; i++)
132 { 141 {
133 let c = text[i]; 142 let c = characters[i];
134 143
135 if (hostnameFinished) 144 if (hostnameFinished)
136 justHostname = false; 145 justHostname = false;
137 146
138 // If we're currently inside the hostname we have to be careful not to 147 // If we're currently inside the hostname we have to be careful not to
139 // escape any characters until after we have converted it to punycode. 148 // escape any characters until after we have converted it to punycode.
140 if (hostnameStart != null && !hostnameFinished) 149 if (hostnameStart != null && !hostnameFinished)
141 { 150 {
142 let endingChar = (c == "*" || c == "^" || 151 let endingChar = (c == "*" || c == "^" ||
143 c == "?" || c == "/" || c == "|"); 152 c == "?" || c == "/" || c == "|");
144 if (!endingChar && i != lastIndex) 153 if (!endingChar && i != lastIndex)
145 continue; 154 continue;
146 155
147 hostname = punycode.toASCII( 156 hostname = punycode.toASCII(
148 text.substring(hostnameStart, endingChar ? i : i + 1).toLowerCase() 157 characters.slice(hostnameStart, endingChar ? i : i + 1).join("")
Manish Jethani 2017/07/12 08:59:56 This was the only conflict during rebase, since te
158 .toLowerCase()
149 ); 159 );
150 hostnameFinished = justHostname = true; 160 hostnameFinished = justHostname = true;
151 regexp.push(escapeRegExp(hostname)); 161 regexp.push(escapeRegExp(hostname));
152 if (!endingChar) 162 if (!endingChar)
153 break; 163 break;
154 } 164 }
155 165
156 switch (c) 166 switch (c)
157 { 167 {
158 case "*": 168 case "*":
159 if (regexp.length > 0 && i < lastIndex && text[i + 1] != "*") 169 if (regexp.length > 0 && i < lastIndex && characters[i + 1] != "*")
160 regexp.push(".*"); 170 regexp.push(".*");
161 break; 171 break;
162 case "^": 172 case "^":
163 if (i < lastIndex) 173 if (i < lastIndex)
164 regexp.push("."); 174 regexp.push(".");
165 break; 175 break;
166 case "|": 176 case "|":
167 if (i == 0) 177 if (i == 0)
168 { 178 {
169 regexp.push("^"); 179 regexp.push("^");
170 break; 180 break;
171 } 181 }
172 if (i == lastIndex) 182 if (i == lastIndex)
173 { 183 {
174 regexp.push("$"); 184 regexp.push("$");
175 break; 185 break;
176 } 186 }
177 if (i == 1 && text[0] == "|") 187 if (i == 1 && characters[0] == "|")
178 { 188 {
179 hostnameStart = i + 1; 189 hostnameStart = i + 1;
180 canSafelyMatchAsLowercase = true; 190 canSafelyMatchAsLowercase = true;
181 regexp.push("https?://([^/]+\\.)?"); 191 regexp.push("https?://([^/]+\\.)?");
182 break; 192 break;
183 } 193 }
184 regexp.push("\\|"); 194 regexp.push("\\|");
185 break; 195 break;
186 case "/": 196 case "/":
187 if (!hostnameFinished && 197 if (!hostnameFinished &&
188 text.charAt(i-2) == ":" && text.charAt(i-1) == "/") 198 characters[i - 2] == ":" && characters[i - 1] == "/")
189 { 199 {
190 hostnameStart = i + 1; 200 hostnameStart = i + 1;
191 canSafelyMatchAsLowercase = true; 201 canSafelyMatchAsLowercase = true;
192 } 202 }
193 regexp.push("/"); 203 regexp.push("/");
194 break; 204 break;
195 case ".": case "+": case "$": case "?": 205 case ".": case "+": case "$": case "?":
196 case "{": case "}": case "(": case ")": 206 case "{": case "}": case "(": case ")":
197 case "[": case "]": case "\\": 207 case "[": case "]": case "\\":
198 regexp.push("\\", c); 208 regexp.push("\\", c);
199 break; 209 break;
200 default: 210 default:
201 if (hostnameFinished && (c >= "a" && c <= "z" || 211 if (hostnameFinished && (c >= "a" && c <= "z" ||
202 c >= "A" && c <= "Z")) 212 c >= "A" && c <= "Z"))
203 canSafelyMatchAsLowercase = false; 213 canSafelyMatchAsLowercase = false;
204 regexp.push(c); 214 regexp.push(c == "%" ? c : encodeURI(c));
205 } 215 }
206 } 216 }
207 217
208 return { 218 return {
209 regexp: regexp.join(""), 219 regexp: regexp.join(""),
210 canSafelyMatchAsLowercase: canSafelyMatchAsLowercase, 220 canSafelyMatchAsLowercase: canSafelyMatchAsLowercase,
211 hostname: hostname, 221 hostname: hostname,
212 justHostname: justHostname 222 justHostname: justHostname
213 }; 223 };
214 } 224 }
(...skipping 66 matching lines...) Expand 10 before | Expand all | Expand 10 after
281 let included = []; 291 let included = [];
282 let excluded = []; 292 let excluded = [];
283 293
284 parseDomains(filter.domains, included, excluded); 294 parseDomains(filter.domains, included, excluded);
285 295
286 if (exceptionDomains) 296 if (exceptionDomains)
287 excluded = excluded.concat(exceptionDomains); 297 excluded = excluded.concat(exceptionDomains);
288 298
289 if (withResourceTypes) 299 if (withResourceTypes)
290 { 300 {
291 trigger["resource-type"] = getResourceTypes(filter); 301 let resourceTypes = getResourceTypes(filter);
292 302
293 if (trigger["resource-type"].length == 0) 303 // Content blocker rules can't differentiate between sub-document requests
304 // (iframes) and top-level document requests. To avoid too many false
305 // positives, we prevent rules with no hostname part from blocking document
306 // requests.
307 //
308 // Once Safari 11 becomes our minimum supported version, we could change
309 // our approach here to use the new "unless-top-url" property instead.
310 if (filter instanceof filterClasses.BlockingFilter && !parsed.hostname)
311 resourceTypes = resourceTypes.filter(type => type != "document");
312
313 if (resourceTypes.length == 0)
294 return; 314 return;
315
316 trigger["resource-type"] = resourceTypes;
295 } 317 }
296 318
297 if (filter.thirdParty != null) 319 if (filter.thirdParty != null)
298 trigger["load-type"] = [filter.thirdParty ? "third-party" : "first-party"]; 320 trigger["load-type"] = [filter.thirdParty ? "third-party" : "first-party"];
299 321
300 if (included.length > 0) 322 if (included.length > 0)
301 { 323 {
302 trigger["if-domain"] = []; 324 trigger["if-domain"] = [];
303 325
304 for (let name of included) 326 for (let name of included)
(...skipping 15 matching lines...) Expand all
320 { 342 {
321 trigger["if-domain"].push("*" + name); 343 trigger["if-domain"].push("*" + name);
322 } 344 }
323 } 345 }
324 } 346 }
325 else if (excluded.length > 0) 347 else if (excluded.length > 0)
326 { 348 {
327 trigger["unless-domain"] = excluded.map(name => "*" + name); 349 trigger["unless-domain"] = excluded.map(name => "*" + name);
328 } 350 }
329 else if (filter instanceof filterClasses.BlockingFilter && 351 else if (filter instanceof filterClasses.BlockingFilter &&
330 filter.contentType & typeMap.SUBDOCUMENT) 352 filter.contentType & typeMap.SUBDOCUMENT && parsed.hostname)
331 { 353 {
354 // Rules with a hostname part are still allowed to block document requests,
355 // but we add an exception for top-level documents.
356 //
357 // Note that we can only do this if there's no "unless-domain" property for
358 // now. This also only works in Safari 11 onwards, while older versions
359 // simply ignore this property. Once Safari 11 becomes our minimum
360 // supported version, we can merge "unless-domain" into "unless-top-url".
332 trigger["unless-top-url"] = [trigger["url-filter"]]; 361 trigger["unless-top-url"] = [trigger["url-filter"]];
333 if (trigger["url-filter-is-case-sensitive"]) 362 if (trigger["url-filter-is-case-sensitive"])
334 trigger["top-url-filter-is-case-sensitive"] = true; 363 trigger["top-url-filter-is-case-sensitive"] = true;
335 } 364 }
336 365
337 rules.push({trigger: trigger, action: {type: action}}); 366 rules.push({trigger: trigger, action: {type: action}});
338 }
339
340 function hasNonASCI(obj)
341 {
342 if (typeof obj == "string")
343 {
344 if (/[^\x00-\x7F]/.test(obj))
345 return true;
346 }
347
348 if (typeof obj == "object")
349 {
350 if (obj instanceof Array)
351 for (let item of obj)
352 if (hasNonASCI(item))
353 return true;
354
355 let names = Object.getOwnPropertyNames(obj);
356 for (let name of names)
357 if (hasNonASCI(obj[name]))
358 return true;
359 }
360
361 return false;
362 } 367 }
363 368
364 function convertIDSelectorsToAttributeSelectors(selector) 369 function convertIDSelectorsToAttributeSelectors(selector)
365 { 370 {
366 // First we figure out where all the IDs are 371 // First we figure out where all the IDs are
367 let sep = ""; 372 let sep = "";
368 let start = null; 373 let start = null;
369 let positions = []; 374 let positions = [];
370 for (let i = 0; i < selector.length; i++) 375 for (let i = 0; i < selector.length; i++)
371 { 376 {
(...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after
407 newSelector.push('[id=', selector.substring(pos.start + 1, pos.end), ']'); 412 newSelector.push('[id=', selector.substring(pos.start + 1, pos.end), ']');
408 i = pos.end; 413 i = pos.end;
409 } 414 }
410 newSelector.push(selector.substring(i)); 415 newSelector.push(selector.substring(i));
411 416
412 return newSelector.join(""); 417 return newSelector.join("");
413 } 418 }
414 419
415 function addCSSRules(rules, selectors, matchDomain, exceptionDomains) 420 function addCSSRules(rules, selectors, matchDomain, exceptionDomains)
416 { 421 {
422 let unlessDomain = exceptionDomains.size > 0 ? [] : null;
423
424 exceptionDomains.forEach(name => unlessDomain.push("*" + name));
425
417 while (selectors.length) 426 while (selectors.length)
418 { 427 {
419 let selector = selectors.splice(0, selectorLimit).join(", "); 428 let selector = selectors.splice(0, selectorLimit).join(", ");
420 429
421 // As of Safari 9.0 element IDs are matched as lowercase. We work around 430 // As of Safari 9.0 element IDs are matched as lowercase. We work around
422 // this by converting to the attribute format [id="elementID"] 431 // this by converting to the attribute format [id="elementID"]
423 selector = convertIDSelectorsToAttributeSelectors(selector); 432 selector = convertIDSelectorsToAttributeSelectors(selector);
424 433
425 let rule = { 434 let rule = {
426 trigger: {"url-filter": matchDomain, 435 trigger: {"url-filter": matchDomain,
427 "url-filter-is-case-sensitive": true}, 436 "url-filter-is-case-sensitive": true},
428 action: {type: "css-display-none", 437 action: {type: "css-display-none",
429 selector: selector} 438 selector: selector}
430 }; 439 };
431 440
432 if (exceptionDomains.size > 0) 441 if (unlessDomain)
433 { 442 rule.trigger["unless-domain"] = unlessDomain;
434 rule.trigger["unless-domain"] = [];
435 exceptionDomains.forEach(name =>
436 {
437 rule.trigger["unless-domain"].push("*" + name);
438 });
439 }
440 443
441 rules.push(rule); 444 rules.push(rule);
442 } 445 }
443 } 446 }
444 447
445 let ContentBlockerList = 448 let ContentBlockerList =
446 /** 449 /**
447 * Create a new Adblock Plus filter to content blocker list converter 450 * Create a new Adblock Plus filter to content blocker list converter
448 * 451 *
449 * @constructor 452 * @constructor
(...skipping 96 matching lines...) Expand 10 before | Expand all | Expand 10 after
546 // Note that as a result of this workaround we end up with a huge rule set in 549 // Note that as a result of this workaround we end up with a huge rule set in
547 // terms of the amount of memory used. This can cause Node.js to throw 550 // terms of the amount of memory used. This can cause Node.js to throw
548 // "JavaScript heap out of memory". To avoid this, call Node.js with 551 // "JavaScript heap out of memory". To avoid this, call Node.js with
549 // --max_old_space_size=4096 552 // --max_old_space_size=4096
550 let elemhideExceptionDomains = extractFilterDomains(this.elemhideExceptions); 553 let elemhideExceptionDomains = extractFilterDomains(this.elemhideExceptions);
551 554
552 let genericSelectorExceptionDomains = 555 let genericSelectorExceptionDomains =
553 extractFilterDomains(this.generichideExceptions); 556 extractFilterDomains(this.generichideExceptions);
554 elemhideExceptionDomains.forEach(name => 557 elemhideExceptionDomains.forEach(name =>
555 { 558 {
556 genericSelectorExceptionDomains.add(name); 559 genericSelectorExceptionDomains.add(name);
kzar 2017/07/10 12:33:08 I wonder if it would be better to pass two Sets of
Manish Jethani 2017/07/11 11:19:18 We'd still have to combine them into one set if we
kzar 2017/07/11 12:20:03 Fair enough.
557 }); 560 });
558 561
559 addCSSRules(rules, genericSelectors, "^https?://", 562 addCSSRules(rules, genericSelectors, "^https?://",
560 genericSelectorExceptionDomains); 563 genericSelectorExceptionDomains);
561 564
562 groupedElemhideFilters.forEach((selectors, matchDomain) => 565 groupedElemhideFilters.forEach((selectors, matchDomain) =>
563 { 566 {
564 addCSSRules(rules, selectors, matchDomain, elemhideExceptionDomains); 567 addCSSRules(rules, selectors, matchDomain, elemhideExceptionDomains);
565 }); 568 });
566 569
567 let requestFilterExceptionDomains = []; 570 let requestFilterExceptionDomains = [];
568 for (let filter of this.genericblockExceptions) 571 for (let filter of this.genericblockExceptions)
569 { 572 {
570 let parsed = parseFilterRegexpSource(filter.regexpSource); 573 let parsed = parseFilterRegexpSource(filter.regexpSource);
571 if (parsed.hostname) 574 if (parsed.hostname)
572 requestFilterExceptionDomains.push(parsed.hostname); 575 requestFilterExceptionDomains.push(parsed.hostname);
573 } 576 }
574 577
575 for (let filter of this.requestFilters) 578 for (let filter of this.requestFilters)
576 { 579 {
577 convertFilterAddRules(rules, filter, "block", true, 580 convertFilterAddRules(rules, filter, "block", true,
578 requestFilterExceptionDomains); 581 requestFilterExceptionDomains);
579 } 582 }
580 583
581 for (let filter of this.requestExceptions) 584 for (let filter of this.requestExceptions)
582 convertFilterAddRules(rules, filter, "ignore-previous-rules", true); 585 convertFilterAddRules(rules, filter, "ignore-previous-rules", true);
583 586
584 return rules.filter(rule => !hasNonASCI(rule)); 587 return rules;
585 }; 588 };
LEFTRIGHT

Powered by Google App Engine
This is Rietveld