Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Side by Side Diff: lib/abp2blocklist.js

Issue 29452289: Issue 5283 - Add support for $websocket and $webrtc (Closed) Base URL: https://hg.adblockplus.org/abp2blocklist
Patch Set: Add top-level exception after copying rule Created July 9, 2017, 9:27 a.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | node_modules/filterClasses.js » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * This file is part of Adblock Plus <https://adblockplus.org/>, 2 * This file is part of Adblock Plus <https://adblockplus.org/>,
3 * Copyright (C) 2006-2017 eyeo GmbH 3 * Copyright (C) 2006-2017 eyeo GmbH
4 * 4 *
5 * Adblock Plus is free software: you can redistribute it and/or modify 5 * Adblock Plus is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 3 as 6 * it under the terms of the GNU General Public License version 3 as
7 * published by the Free Software Foundation. 7 * published by the Free Software Foundation.
8 * 8 *
9 * Adblock Plus is distributed in the hope that it will be useful, 9 * Adblock Plus is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
(...skipping 15 matching lines...) Expand all
26 const typeMap = filterClasses.RegExpFilter.typeMap; 26 const typeMap = filterClasses.RegExpFilter.typeMap;
27 const whitelistableRequestTypes = (typeMap.IMAGE 27 const whitelistableRequestTypes = (typeMap.IMAGE
28 | typeMap.STYLESHEET 28 | typeMap.STYLESHEET
29 | typeMap.SCRIPT 29 | typeMap.SCRIPT
30 | typeMap.FONT 30 | typeMap.FONT
31 | typeMap.MEDIA 31 | typeMap.MEDIA
32 | typeMap.POPUP 32 | typeMap.POPUP
33 | typeMap.OBJECT 33 | typeMap.OBJECT
34 | typeMap.OBJECT_SUBREQUEST 34 | typeMap.OBJECT_SUBREQUEST
35 | typeMap.XMLHTTPREQUEST 35 | typeMap.XMLHTTPREQUEST
36 | typeMap.WEBSOCKET
37 | typeMap.WEBRTC
36 | typeMap.PING 38 | typeMap.PING
37 | typeMap.SUBDOCUMENT 39 | typeMap.SUBDOCUMENT
38 | typeMap.OTHER); 40 | typeMap.OTHER);
39 41
40 function parseDomains(domains, included, excluded) 42 function parseDomains(domains, included, excluded)
41 { 43 {
42 for (let domain in domains) 44 for (let domain in domains)
43 { 45 {
44 if (domain != "") 46 if (domain != "")
45 { 47 {
(...skipping 11 matching lines...) Expand all
57 function escapeRegExp(s) 59 function escapeRegExp(s)
58 { 60 {
59 return s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); 61 return s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
60 } 62 }
61 63
62 function matchDomain(domain) 64 function matchDomain(domain)
63 { 65 {
64 return "^https?://([^/:]*\\.)?" + escapeRegExp(domain).toLowerCase() + "[/:]"; 66 return "^https?://([^/:]*\\.)?" + escapeRegExp(domain).toLowerCase() + "[/:]";
65 } 67 }
66 68
69 function getURLSchemes(contentType)
70 {
71 // If the given content type includes all supported URL schemes, simply
72 // return a single generic URL scheme pattern. This minimizes the size of the
73 // generated rule set. The downside to this is that it will also match
74 // schemes that we do not want to match (e.g. "ftp://"), but this can be
75 // mitigated by adding exceptions for those schemes.
76 if (contentType & typeMap.WEBSOCKET && contentType & typeMap.WEBRTC &&
77 contentType & ~(typeMap.WEBSOCKET | typeMap.WEBRTC))
78 return ["[^:]+:(//)?"];
79
80 let urlSchemes = [];
81
82 if (contentType & typeMap.WEBSOCKET)
83 urlSchemes.push("wss?://");
84
85 if (contentType & typeMap.WEBRTC)
86 urlSchemes.push("stuns?:", "turns?:");
87
88 if (contentType & ~(typeMap.WEBSOCKET | typeMap.WEBRTC))
89 urlSchemes.push("https?://");
90
91 return urlSchemes;
92 }
93
67 function findSubdomainsInList(domain, list) 94 function findSubdomainsInList(domain, list)
68 { 95 {
69 let subdomains = []; 96 let subdomains = [];
70 let suffixLength = domain.length + 1; 97 let suffixLength = domain.length + 1;
71 98
72 for (let name of list) 99 for (let name of list)
73 { 100 {
74 if (name.length > suffixLength && name.slice(-suffixLength) == "." + domain) 101 if (name.length > suffixLength && name.slice(-suffixLength) == "." + domain)
75 subdomains.push(name.slice(0, -suffixLength)); 102 subdomains.push(name.slice(0, -suffixLength));
76 } 103 }
(...skipping 13 matching lines...) Expand all
90 return {matchDomains: included.map(matchDomain), selector: filter.selector}; 117 return {matchDomains: included.map(matchDomain), selector: filter.selector};
91 } 118 }
92 119
93 /** 120 /**
94 * Parse the given filter "regexpSource" string. Producing a regular expression, 121 * Parse the given filter "regexpSource" string. Producing a regular expression,
95 * extracting the hostname (if any), deciding if the regular expression is safe 122 * extracting the hostname (if any), deciding if the regular expression is safe
96 * to be converted + matched as lower case and noting if the source contains 123 * to be converted + matched as lower case and noting if the source contains
97 * anything after the hostname.) 124 * anything after the hostname.)
98 * 125 *
99 * @param {string} text regexpSource property of a filter 126 * @param {string} text regexpSource property of a filter
127 * @param {string} urlScheme The URL scheme to use in the regular expression
100 * @returns {object} An object containing a regular expression string, a bool 128 * @returns {object} An object containing a regular expression string, a bool
101 * indicating if the filter can be safely matched as lower 129 * indicating if the filter can be safely matched as lower
102 * case, a hostname string (or undefined) and a bool 130 * case, a hostname string (or undefined) and a bool
103 * indicating if the source only contains a hostname or not: 131 * indicating if the source only contains a hostname or not:
104 * {regexp: "...", 132 * {regexp: "...",
105 * canSafelyMatchAsLowercase: true/false, 133 * canSafelyMatchAsLowercase: true/false,
106 * hostname: "...", 134 * hostname: "...",
107 * justHostname: true/false} 135 * justHostname: true/false}
108 */ 136 */
109 function parseFilterRegexpSource(text) 137 function parseFilterRegexpSource(text, urlScheme)
110 { 138 {
111 let regexp = []; 139 let regexp = [];
112 let lastIndex = text.length - 1; 140 let lastIndex = text.length - 1;
113 let hostname; 141 let hostname;
114 let hostnameStart = null; 142 let hostnameStart = null;
115 let hostnameFinished = false; 143 let hostnameFinished = false;
116 let justHostname = false; 144 let justHostname = false;
117 let canSafelyMatchAsLowercase = false; 145 let canSafelyMatchAsLowercase = false;
118 146
147 if (!urlScheme)
148 urlScheme = getURLSchemes()[0];
149
119 for (let i = 0; i < text.length; i++) 150 for (let i = 0; i < text.length; i++)
120 { 151 {
121 let c = text[i]; 152 let c = text[i];
122 153
123 if (hostnameFinished) 154 if (hostnameFinished)
124 justHostname = false; 155 justHostname = false;
125 156
126 // If we're currently inside the hostname we have to be careful not to 157 // If we're currently inside the hostname we have to be careful not to
127 // escape any characters until after we have converted it to punycode. 158 // escape any characters until after we have converted it to punycode.
128 if (hostnameStart != null && !hostnameFinished) 159 if (hostnameStart != null && !hostnameFinished)
(...skipping 30 matching lines...) Expand all
159 } 190 }
160 if (i == lastIndex) 191 if (i == lastIndex)
161 { 192 {
162 regexp.push("$"); 193 regexp.push("$");
163 break; 194 break;
164 } 195 }
165 if (i == 1 && text[0] == "|") 196 if (i == 1 && text[0] == "|")
166 { 197 {
167 hostnameStart = i + 1; 198 hostnameStart = i + 1;
168 canSafelyMatchAsLowercase = true; 199 canSafelyMatchAsLowercase = true;
169 regexp.push("https?://([^/]+\\.)?"); 200 regexp.push(urlScheme + "([^/]+\\.)?");
170 break; 201 break;
171 } 202 }
172 regexp.push("\\|"); 203 regexp.push("\\|");
173 break; 204 break;
174 case "/": 205 case "/":
175 if (!hostnameFinished && 206 if (!hostnameFinished &&
176 text.charAt(i-2) == ":" && text.charAt(i-1) == "/") 207 text.charAt(i-2) == ":" && text.charAt(i-1) == "/")
177 { 208 {
178 hostnameStart = i + 1; 209 hostnameStart = i + 1;
179 canSafelyMatchAsLowercase = true; 210 canSafelyMatchAsLowercase = true;
(...skipping 14 matching lines...) Expand all
194 } 225 }
195 226
196 return { 227 return {
197 regexp: regexp.join(""), 228 regexp: regexp.join(""),
198 canSafelyMatchAsLowercase: canSafelyMatchAsLowercase, 229 canSafelyMatchAsLowercase: canSafelyMatchAsLowercase,
199 hostname: hostname, 230 hostname: hostname,
200 justHostname: justHostname 231 justHostname: justHostname
201 }; 232 };
202 } 233 }
203 234
204 function getResourceTypes(filter) 235 function getResourceTypes(contentType)
205 { 236 {
206 let types = []; 237 let types = [];
207 238
208 if (filter.contentType & typeMap.IMAGE) 239 if (contentType & typeMap.IMAGE)
209 types.push("image"); 240 types.push("image");
210 if (filter.contentType & typeMap.STYLESHEET) 241 if (contentType & typeMap.STYLESHEET)
211 types.push("style-sheet"); 242 types.push("style-sheet");
212 if (filter.contentType & typeMap.SCRIPT) 243 if (contentType & typeMap.SCRIPT)
213 types.push("script"); 244 types.push("script");
214 if (filter.contentType & typeMap.FONT) 245 if (contentType & typeMap.FONT)
215 types.push("font"); 246 types.push("font");
216 if (filter.contentType & (typeMap.MEDIA | typeMap.OBJECT)) 247 if (contentType & (typeMap.MEDIA | typeMap.OBJECT))
217 types.push("media"); 248 types.push("media");
218 if (filter.contentType & typeMap.POPUP) 249 if (contentType & typeMap.POPUP)
219 types.push("popup"); 250 types.push("popup");
220 if (filter.contentType & (typeMap.XMLHTTPREQUEST | 251 if (contentType & (typeMap.XMLHTTPREQUEST |
252 typeMap.WEBSOCKET |
253 typeMap.WEBRTC |
221 typeMap.OBJECT_SUBREQUEST | 254 typeMap.OBJECT_SUBREQUEST |
222 typeMap.PING | 255 typeMap.PING |
223 typeMap.OTHER)) 256 typeMap.OTHER))
257 {
224 types.push("raw"); 258 types.push("raw");
225 if (filter.contentType & typeMap.SUBDOCUMENT) 259 }
260 if (contentType & typeMap.SUBDOCUMENT)
226 types.push("document"); 261 types.push("document");
227 262
228 return types; 263 return types;
229 } 264 }
230 265
266 function makeRuleCopies(trigger, action, urlSchemes)
267 {
268 let copies = [];
269
270 // Always make a deep copy of the rule, since rules may have to be
271 // manipulated individually at a later stage.
272 let stringifiedTrigger = JSON.stringify(trigger);
273
274 let filterPattern = trigger["url-filter"].substring(1);
275 let startIndex = 0;
276
277 // If the URL filter already begins with the first URL scheme pattern, skip
278 // it.
279 if (trigger["url-filter"].startsWith("^" + urlSchemes[0]))
280 {
281 filterPattern = filterPattern.substring(urlSchemes[0].length);
282 startIndex = 1;
283 }
284 else
285 {
286 filterPattern = ".*" + filterPattern;
287 }
288
289 for (let i = startIndex; i < urlSchemes.length; i++)
290 {
291 let copyTrigger = Object.assign(JSON.parse(stringifiedTrigger), {
292 "url-filter": "^" + urlSchemes[i] + filterPattern
293 });
294 copies.push({trigger: copyTrigger, action});
295 }
296
297 return copies;
298 }
299
300 function excludeTopURLFromTrigger(trigger)
301 {
302 trigger["unless-top-url"] = [trigger["url-filter"]];
303 if (trigger["url-filter-is-case-sensitive"])
304 trigger["top-url-filter-is-case-sensitive"] = true;
305 }
306
231 function convertFilterAddRules(rules, filter, action, withResourceTypes, 307 function convertFilterAddRules(rules, filter, action, withResourceTypes,
232 exceptionDomains) 308 exceptionDomains, contentType)
233 { 309 {
234 let parsed = parseFilterRegexpSource(filter.regexpSource); 310 if (!contentType)
311 contentType = filter.contentType;
312
313 // If WebSocket or WebRTC are given along with other options but not
314 // including all three of WebSocket, WebRTC, and XMLHttpRequest, we must
315 // generate multiple rules. For example, for the filter
316 // "foo$websocket,image", we must generate one rule with "^wss?://" and "raw"
317 // and another rule with "^https?://" and "image". If we merge the two, we
318 // end up blocking requests of type XMLHttpRequest inadvertently.
319 if ((contentType & typeMap.WEBSOCKET && contentType != typeMap.WEBSOCKET &&
320 !(contentType & typeMap.WEBRTC &&
321 contentType & typeMap.XMLHTTPREQUEST)) ||
322 (contentType & typeMap.WEBRTC && contentType != typeMap.WEBRTC &&
323 !(contentType & typeMap.WEBSOCKET &&
324 contentType & typeMap.XMLHTTPREQUEST)))
325 {
326 if (contentType & typeMap.WEBSOCKET)
327 {
328 convertFilterAddRules(rules, filter, action, withResourceTypes,
329 exceptionDomains, typeMap.WEBSOCKET);
330 }
331
332 if (contentType & typeMap.WEBRTC)
333 {
334 convertFilterAddRules(rules, filter, action, withResourceTypes,
335 exceptionDomains, typeMap.WEBRTC);
336 }
337
338 contentType &= ~(typeMap.WEBSOCKET | typeMap.WEBRTC);
339
340 if (!contentType)
341 return;
342 }
343
344 let urlSchemes = getURLSchemes(contentType);
345 let parsed = parseFilterRegexpSource(filter.regexpSource, urlSchemes[0]);
235 346
236 // For the special case of $document whitelisting filters with just a domain 347 // For the special case of $document whitelisting filters with just a domain
237 // we can generate an equivalent blocking rule exception using if-domain. 348 // we can generate an equivalent blocking rule exception using if-domain.
238 if (filter instanceof filterClasses.WhitelistFilter && 349 if (filter instanceof filterClasses.WhitelistFilter &&
239 filter.contentType & typeMap.DOCUMENT && 350 contentType & typeMap.DOCUMENT &&
240 parsed.justHostname) 351 parsed.justHostname)
241 { 352 {
242 rules.push({ 353 rules.push({
243 trigger: { 354 trigger: {
244 "url-filter": ".*", 355 "url-filter": ".*",
245 "if-domain": ["*" + parsed.hostname] 356 "if-domain": ["*" + parsed.hostname]
246 }, 357 },
247 action: {type: "ignore-previous-rules"} 358 action: {type: "ignore-previous-rules"}
248 }); 359 });
249 // If the filter contains other supported options we'll need to generate 360 // If the filter contains other supported options we'll need to generate
250 // further rules for it, but if not we can simply return now. 361 // further rules for it, but if not we can simply return now.
251 if (!(filter.contentType & whitelistableRequestTypes)) 362 if (!(contentType & whitelistableRequestTypes))
252 return; 363 return;
253 } 364 }
254 365
255 let trigger = {"url-filter": parsed.regexp}; 366 let trigger = {"url-filter": parsed.regexp};
256 367
257 // Limit rules to HTTP(S) URLs 368 // If the URL filter begins with one of the URL schemes for this content
258 if (!/^(\^|http)/i.test(trigger["url-filter"])) 369 // type, we generate additional rules for all the URL scheme patterns;
259 trigger["url-filter"] = "^https?://.*" + trigger["url-filter"]; 370 // otherwise, if the start of the URL filter literally matches the first URL
371 // scheme pattern, we just generate additional rules for the remaining URL
372 // scheme patterns.
373 //
374 // For example, "stun:foo$webrtc" will give us "stun:foo", then we add a "^"
375 // in front of this and generate two additional rules for
376 // "^stuns?:.*stun:foo" and "^turns?:.*stun:foo". On the other hand,
377 // "||foo$webrtc" will give us "^stuns?:([^/]+\\.)?foo", so we just generate
378 // "^turns?:([^/]+\\.)?foo" in addition.
379 //
380 // Note that the filter can be already anchored to the beginning
381 // (e.g. "|stun:foo$webrtc"), in which case we do not generate any additional
382 // rules.
383 let needAltRules = trigger["url-filter"][0] != "^" ||
384 trigger["url-filter"].startsWith("^" + urlSchemes[0]);
385
386 if (trigger["url-filter"][0] != "^")
387 {
388 if (!urlSchemes.some(scheme => new RegExp("^" + scheme)
389 .test(trigger["url-filter"])))
390 {
391 trigger["url-filter"] = urlSchemes[0] + ".*" + trigger["url-filter"];
392 }
393
394 trigger["url-filter"] = "^" + trigger["url-filter"];
395 }
260 396
261 // For rules containing only a hostname we know that we're matching against 397 // For rules containing only a hostname we know that we're matching against
262 // a lowercase string unless the matchCase option was passed. 398 // a lowercase string unless the matchCase option was passed.
263 if (parsed.canSafelyMatchAsLowercase && !filter.matchCase) 399 if (parsed.canSafelyMatchAsLowercase && !filter.matchCase)
264 trigger["url-filter"] = trigger["url-filter"].toLowerCase(); 400 trigger["url-filter"] = trigger["url-filter"].toLowerCase();
265 401
266 if (parsed.canSafelyMatchAsLowercase || filter.matchCase) 402 if (parsed.canSafelyMatchAsLowercase || filter.matchCase)
267 trigger["url-filter-is-case-sensitive"] = true; 403 trigger["url-filter-is-case-sensitive"] = true;
268 404
269 let included = []; 405 let included = [];
270 let excluded = []; 406 let excluded = [];
271 407
272 parseDomains(filter.domains, included, excluded); 408 parseDomains(filter.domains, included, excluded);
273 409
274 if (exceptionDomains) 410 if (exceptionDomains)
275 excluded = excluded.concat(exceptionDomains); 411 excluded = excluded.concat(exceptionDomains);
276 412
277 if (withResourceTypes) 413 if (withResourceTypes)
278 { 414 {
279 trigger["resource-type"] = getResourceTypes(filter); 415 trigger["resource-type"] = getResourceTypes(contentType);
280 416
281 if (trigger["resource-type"].length == 0) 417 if (trigger["resource-type"].length == 0)
282 return; 418 return;
283 } 419 }
284 420
285 if (filter.thirdParty != null) 421 if (filter.thirdParty != null)
286 trigger["load-type"] = [filter.thirdParty ? "third-party" : "first-party"]; 422 trigger["load-type"] = [filter.thirdParty ? "third-party" : "first-party"];
287 423
424 let addTopLevelException = false;
425
288 if (included.length > 0) 426 if (included.length > 0)
289 { 427 {
290 trigger["if-domain"] = []; 428 trigger["if-domain"] = [];
291 429
292 for (let name of included) 430 for (let name of included)
293 { 431 {
294 // If this is a blocking filter or an element hiding filter, add the 432 // If this is a blocking filter or an element hiding filter, add the
295 // subdomain wildcard only if no subdomains have been excluded. 433 // subdomain wildcard only if no subdomains have been excluded.
296 let notSubdomains = null; 434 let notSubdomains = null;
297 if ((filter instanceof filterClasses.BlockingFilter || 435 if ((filter instanceof filterClasses.BlockingFilter ||
(...skipping 12 matching lines...) Expand all
310 } 448 }
311 } 449 }
312 } 450 }
313 else if (excluded.length > 0) 451 else if (excluded.length > 0)
314 { 452 {
315 trigger["unless-domain"] = excluded.map(name => "*" + name); 453 trigger["unless-domain"] = excluded.map(name => "*" + name);
316 } 454 }
317 else if (filter instanceof filterClasses.BlockingFilter && 455 else if (filter instanceof filterClasses.BlockingFilter &&
318 filter.contentType & typeMap.SUBDOCUMENT) 456 filter.contentType & typeMap.SUBDOCUMENT)
319 { 457 {
320 trigger["unless-top-url"] = [trigger["url-filter"]]; 458 addTopLevelException = true;
321 if (trigger["url-filter-is-case-sensitive"]) 459 excludeTopURLFromTrigger(trigger);
322 trigger["top-url-filter-is-case-sensitive"] = true;
323 } 460 }
324 461
325 rules.push({trigger: trigger, action: {type: action}}); 462 rules.push({trigger: trigger, action: {type: action}});
463
464 if (needAltRules)
465 {
466 // Generate additional rules for any alternative URL schemes.
467 for (let altRule of makeRuleCopies(trigger, {type: action}, urlSchemes))
468 {
469 if (addTopLevelException)
470 excludeTopURLFromTrigger(altRule.trigger);
471
472 rules.push(altRule);
473 }
474 }
326 } 475 }
327 476
328 function hasNonASCI(obj) 477 function hasNonASCI(obj)
329 { 478 {
330 if (typeof obj == "string") 479 if (typeof obj == "string")
331 { 480 {
332 if (/[^\x00-\x7F]/.test(obj)) 481 if (/[^\x00-\x7F]/.test(obj))
333 return true; 482 return true;
334 } 483 }
335 484
(...skipping 203 matching lines...) Expand 10 before | Expand all | Expand 10 after
539 { 688 {
540 convertFilterAddRules(rules, filter, "block", true, 689 convertFilterAddRules(rules, filter, "block", true,
541 requestFilterExceptionDomains); 690 requestFilterExceptionDomains);
542 } 691 }
543 692
544 for (let filter of this.requestExceptions) 693 for (let filter of this.requestExceptions)
545 convertFilterAddRules(rules, filter, "ignore-previous-rules", true); 694 convertFilterAddRules(rules, filter, "ignore-previous-rules", true);
546 695
547 return rules.filter(rule => !hasNonASCI(rule)); 696 return rules.filter(rule => !hasNonASCI(rule));
548 }; 697 };
OLDNEW
« no previous file with comments | « no previous file | node_modules/filterClasses.js » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld