OLD | NEW |
1 /* | 1 /* |
2 * This file is part of Adblock Plus <https://adblockplus.org/>, | 2 * This file is part of Adblock Plus <https://adblockplus.org/>, |
3 * Copyright (C) 2006-2017 eyeo GmbH | 3 * Copyright (C) 2006-2017 eyeo GmbH |
4 * | 4 * |
5 * Adblock Plus is free software: you can redistribute it and/or modify | 5 * Adblock Plus is free software: you can redistribute it and/or modify |
6 * it under the terms of the GNU General Public License version 3 as | 6 * it under the terms of the GNU General Public License version 3 as |
7 * published by the Free Software Foundation. | 7 * published by the Free Software Foundation. |
8 * | 8 * |
9 * Adblock Plus is distributed in the hope that it will be useful, | 9 * Adblock Plus is distributed in the hope that it will be useful, |
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of | 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
(...skipping 15 matching lines...) Expand all Loading... |
26 const typeMap = filterClasses.RegExpFilter.typeMap; | 26 const typeMap = filterClasses.RegExpFilter.typeMap; |
27 const whitelistableRequestTypes = (typeMap.IMAGE | 27 const whitelistableRequestTypes = (typeMap.IMAGE |
28 | typeMap.STYLESHEET | 28 | typeMap.STYLESHEET |
29 | typeMap.SCRIPT | 29 | typeMap.SCRIPT |
30 | typeMap.FONT | 30 | typeMap.FONT |
31 | typeMap.MEDIA | 31 | typeMap.MEDIA |
32 | typeMap.POPUP | 32 | typeMap.POPUP |
33 | typeMap.OBJECT | 33 | typeMap.OBJECT |
34 | typeMap.OBJECT_SUBREQUEST | 34 | typeMap.OBJECT_SUBREQUEST |
35 | typeMap.XMLHTTPREQUEST | 35 | typeMap.XMLHTTPREQUEST |
| 36 | typeMap.WEBSOCKET |
| 37 | typeMap.WEBRTC |
36 | typeMap.PING | 38 | typeMap.PING |
37 | typeMap.SUBDOCUMENT | 39 | typeMap.SUBDOCUMENT |
38 | typeMap.OTHER); | 40 | typeMap.OTHER); |
39 | 41 |
40 function parseDomains(domains, included, excluded) | 42 function parseDomains(domains, included, excluded) |
41 { | 43 { |
42 for (let domain in domains) | 44 for (let domain in domains) |
43 { | 45 { |
44 if (domain != "") | 46 if (domain != "") |
45 { | 47 { |
(...skipping 11 matching lines...) Expand all Loading... |
57 function escapeRegExp(s) | 59 function escapeRegExp(s) |
58 { | 60 { |
59 return s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); | 61 return s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); |
60 } | 62 } |
61 | 63 |
62 function matchDomain(domain) | 64 function matchDomain(domain) |
63 { | 65 { |
64 return "^https?://([^/:]*\\.)?" + escapeRegExp(domain).toLowerCase() + "[/:]"; | 66 return "^https?://([^/:]*\\.)?" + escapeRegExp(domain).toLowerCase() + "[/:]"; |
65 } | 67 } |
66 | 68 |
| 69 function getURLSchemes(contentType) |
| 70 { |
| 71 // If the given content type includes all supported URL schemes, simply |
| 72 // return a single generic URL scheme pattern. This minimizes the size of the |
| 73 // generated rule set. The downside to this is that it will also match |
| 74 // schemes that we do not want to match (e.g. "ftp://"), but this can be |
| 75 // mitigated by adding exceptions for those schemes. |
| 76 if (contentType & typeMap.WEBSOCKET && contentType & typeMap.WEBRTC && |
| 77 contentType & ~(typeMap.WEBSOCKET | typeMap.WEBRTC)) |
| 78 return ["[^:]+:(//)?"]; |
| 79 |
| 80 let urlSchemes = []; |
| 81 |
| 82 if (contentType & typeMap.WEBSOCKET) |
| 83 urlSchemes.push("wss?://"); |
| 84 |
| 85 if (contentType & typeMap.WEBRTC) |
| 86 urlSchemes.push("stuns?:", "turns?:"); |
| 87 |
| 88 if (contentType & ~(typeMap.WEBSOCKET | typeMap.WEBRTC)) |
| 89 urlSchemes.push("https?://"); |
| 90 |
| 91 return urlSchemes; |
| 92 } |
| 93 |
67 function findSubdomainsInList(domain, list) | 94 function findSubdomainsInList(domain, list) |
68 { | 95 { |
69 let subdomains = []; | 96 let subdomains = []; |
70 let suffixLength = domain.length + 1; | 97 let suffixLength = domain.length + 1; |
71 | 98 |
72 for (let name of list) | 99 for (let name of list) |
73 { | 100 { |
74 if (name.length > suffixLength && name.slice(-suffixLength) == "." + domain) | 101 if (name.length > suffixLength && name.slice(-suffixLength) == "." + domain) |
75 subdomains.push(name.slice(0, -suffixLength)); | 102 subdomains.push(name.slice(0, -suffixLength)); |
76 } | 103 } |
(...skipping 13 matching lines...) Expand all Loading... |
90 return {matchDomains: included.map(matchDomain), selector: filter.selector}; | 117 return {matchDomains: included.map(matchDomain), selector: filter.selector}; |
91 } | 118 } |
92 | 119 |
93 /** | 120 /** |
94 * Parse the given filter "regexpSource" string. Producing a regular expression, | 121 * Parse the given filter "regexpSource" string. Producing a regular expression, |
95 * extracting the hostname (if any), deciding if the regular expression is safe | 122 * extracting the hostname (if any), deciding if the regular expression is safe |
96 * to be converted + matched as lower case and noting if the source contains | 123 * to be converted + matched as lower case and noting if the source contains |
97 * anything after the hostname.) | 124 * anything after the hostname.) |
98 * | 125 * |
99 * @param {string} text regexpSource property of a filter | 126 * @param {string} text regexpSource property of a filter |
| 127 * @param {string} urlScheme The URL scheme to use in the regular expression |
100 * @returns {object} An object containing a regular expression string, a bool | 128 * @returns {object} An object containing a regular expression string, a bool |
101 * indicating if the filter can be safely matched as lower | 129 * indicating if the filter can be safely matched as lower |
102 * case, a hostname string (or undefined) and a bool | 130 * case, a hostname string (or undefined) and a bool |
103 * indicating if the source only contains a hostname or not: | 131 * indicating if the source only contains a hostname or not: |
104 * {regexp: "...", | 132 * {regexp: "...", |
105 * canSafelyMatchAsLowercase: true/false, | 133 * canSafelyMatchAsLowercase: true/false, |
106 * hostname: "...", | 134 * hostname: "...", |
107 * justHostname: true/false} | 135 * justHostname: true/false} |
108 */ | 136 */ |
109 function parseFilterRegexpSource(text) | 137 function parseFilterRegexpSource(text, urlScheme) |
110 { | 138 { |
111 let regexp = []; | 139 let regexp = []; |
112 let lastIndex = text.length - 1; | 140 let lastIndex = text.length - 1; |
113 let hostname; | 141 let hostname; |
114 let hostnameStart = null; | 142 let hostnameStart = null; |
115 let hostnameFinished = false; | 143 let hostnameFinished = false; |
116 let justHostname = false; | 144 let justHostname = false; |
117 let canSafelyMatchAsLowercase = false; | 145 let canSafelyMatchAsLowercase = false; |
118 | 146 |
| 147 if (!urlScheme) |
| 148 urlScheme = getURLSchemes()[0]; |
| 149 |
119 for (let i = 0; i < text.length; i++) | 150 for (let i = 0; i < text.length; i++) |
120 { | 151 { |
121 let c = text[i]; | 152 let c = text[i]; |
122 | 153 |
123 if (hostnameFinished) | 154 if (hostnameFinished) |
124 justHostname = false; | 155 justHostname = false; |
125 | 156 |
126 // If we're currently inside the hostname we have to be careful not to | 157 // If we're currently inside the hostname we have to be careful not to |
127 // escape any characters until after we have converted it to punycode. | 158 // escape any characters until after we have converted it to punycode. |
128 if (hostnameStart != null && !hostnameFinished) | 159 if (hostnameStart != null && !hostnameFinished) |
(...skipping 30 matching lines...) Expand all Loading... |
159 } | 190 } |
160 if (i == lastIndex) | 191 if (i == lastIndex) |
161 { | 192 { |
162 regexp.push("$"); | 193 regexp.push("$"); |
163 break; | 194 break; |
164 } | 195 } |
165 if (i == 1 && text[0] == "|") | 196 if (i == 1 && text[0] == "|") |
166 { | 197 { |
167 hostnameStart = i + 1; | 198 hostnameStart = i + 1; |
168 canSafelyMatchAsLowercase = true; | 199 canSafelyMatchAsLowercase = true; |
169 regexp.push("https?://([^/]+\\.)?"); | 200 regexp.push(urlScheme + "([^/]+\\.)?"); |
170 break; | 201 break; |
171 } | 202 } |
172 regexp.push("\\|"); | 203 regexp.push("\\|"); |
173 break; | 204 break; |
174 case "/": | 205 case "/": |
175 if (!hostnameFinished && | 206 if (!hostnameFinished && |
176 text.charAt(i-2) == ":" && text.charAt(i-1) == "/") | 207 text.charAt(i-2) == ":" && text.charAt(i-1) == "/") |
177 { | 208 { |
178 hostnameStart = i + 1; | 209 hostnameStart = i + 1; |
179 canSafelyMatchAsLowercase = true; | 210 canSafelyMatchAsLowercase = true; |
(...skipping 14 matching lines...) Expand all Loading... |
194 } | 225 } |
195 | 226 |
196 return { | 227 return { |
197 regexp: regexp.join(""), | 228 regexp: regexp.join(""), |
198 canSafelyMatchAsLowercase: canSafelyMatchAsLowercase, | 229 canSafelyMatchAsLowercase: canSafelyMatchAsLowercase, |
199 hostname: hostname, | 230 hostname: hostname, |
200 justHostname: justHostname | 231 justHostname: justHostname |
201 }; | 232 }; |
202 } | 233 } |
203 | 234 |
204 function getResourceTypes(filter) | 235 function getResourceTypes(contentType) |
205 { | 236 { |
206 let types = []; | 237 let types = []; |
207 | 238 |
208 if (filter.contentType & typeMap.IMAGE) | 239 if (contentType & typeMap.IMAGE) |
209 types.push("image"); | 240 types.push("image"); |
210 if (filter.contentType & typeMap.STYLESHEET) | 241 if (contentType & typeMap.STYLESHEET) |
211 types.push("style-sheet"); | 242 types.push("style-sheet"); |
212 if (filter.contentType & typeMap.SCRIPT) | 243 if (contentType & typeMap.SCRIPT) |
213 types.push("script"); | 244 types.push("script"); |
214 if (filter.contentType & typeMap.FONT) | 245 if (contentType & typeMap.FONT) |
215 types.push("font"); | 246 types.push("font"); |
216 if (filter.contentType & (typeMap.MEDIA | typeMap.OBJECT)) | 247 if (contentType & (typeMap.MEDIA | typeMap.OBJECT)) |
217 types.push("media"); | 248 types.push("media"); |
218 if (filter.contentType & typeMap.POPUP) | 249 if (contentType & typeMap.POPUP) |
219 types.push("popup"); | 250 types.push("popup"); |
220 if (filter.contentType & (typeMap.XMLHTTPREQUEST | | 251 if (contentType & (typeMap.XMLHTTPREQUEST | |
| 252 typeMap.WEBSOCKET | |
| 253 typeMap.WEBRTC | |
221 typeMap.OBJECT_SUBREQUEST | | 254 typeMap.OBJECT_SUBREQUEST | |
222 typeMap.PING | | 255 typeMap.PING | |
223 typeMap.OTHER)) | 256 typeMap.OTHER)) |
| 257 { |
224 types.push("raw"); | 258 types.push("raw"); |
225 if (filter.contentType & typeMap.SUBDOCUMENT) | 259 } |
| 260 if (contentType & typeMap.SUBDOCUMENT) |
226 types.push("document"); | 261 types.push("document"); |
227 | 262 |
228 return types; | 263 return types; |
229 } | 264 } |
230 | 265 |
| 266 function makeRuleCopies(trigger, action, urlSchemes) |
| 267 { |
| 268 let copies = []; |
| 269 |
| 270 // Always make a deep copy of the rule, since rules may have to be |
| 271 // manipulated individually at a later stage. |
| 272 let stringifiedTrigger = JSON.stringify(trigger); |
| 273 |
| 274 let filterPattern = trigger["url-filter"].substring(1); |
| 275 let startIndex = 0; |
| 276 |
| 277 // If the URL filter already begins with the first URL scheme pattern, skip |
| 278 // it. |
| 279 if (trigger["url-filter"].startsWith("^" + urlSchemes[0])) |
| 280 { |
| 281 filterPattern = filterPattern.substring(urlSchemes[0].length); |
| 282 startIndex = 1; |
| 283 } |
| 284 else |
| 285 { |
| 286 filterPattern = ".*" + filterPattern; |
| 287 } |
| 288 |
| 289 for (let i = startIndex; i < urlSchemes.length; i++) |
| 290 { |
| 291 let copyTrigger = Object.assign(JSON.parse(stringifiedTrigger), { |
| 292 "url-filter": "^" + urlSchemes[i] + filterPattern |
| 293 }); |
| 294 copies.push({trigger: copyTrigger, action}); |
| 295 } |
| 296 |
| 297 return copies; |
| 298 } |
| 299 |
231 function convertFilterAddRules(rules, filter, action, withResourceTypes, | 300 function convertFilterAddRules(rules, filter, action, withResourceTypes, |
232 exceptionDomains) | 301 exceptionDomains, contentType) |
233 { | 302 { |
234 let parsed = parseFilterRegexpSource(filter.regexpSource); | 303 if (!contentType) |
| 304 contentType = filter.contentType; |
| 305 |
| 306 // If WebSocket or WebRTC are given along with other options but not |
| 307 // including all three of WebSocket, WebRTC, and XMLHttpRequest, we must |
| 308 // generate multiple rules. For example, for the filter |
| 309 // "foo$websocket,image", we must generate one rule with "^wss?://" and "raw" |
| 310 // and another rule with "^https?://" and "image". If we merge the two, we |
| 311 // end up blocking requests of type XMLHttpRequest inadvertently. |
| 312 if ((contentType & typeMap.WEBSOCKET && contentType != typeMap.WEBSOCKET && |
| 313 !(contentType & typeMap.WEBRTC && |
| 314 contentType & typeMap.XMLHTTPREQUEST)) || |
| 315 (contentType & typeMap.WEBRTC && contentType != typeMap.WEBRTC && |
| 316 !(contentType & typeMap.WEBSOCKET && |
| 317 contentType & typeMap.XMLHTTPREQUEST))) |
| 318 { |
| 319 if (contentType & typeMap.WEBSOCKET) |
| 320 { |
| 321 convertFilterAddRules(rules, filter, action, withResourceTypes, |
| 322 exceptionDomains, typeMap.WEBSOCKET); |
| 323 } |
| 324 |
| 325 if (contentType & typeMap.WEBRTC) |
| 326 { |
| 327 convertFilterAddRules(rules, filter, action, withResourceTypes, |
| 328 exceptionDomains, typeMap.WEBRTC); |
| 329 } |
| 330 |
| 331 contentType &= ~(typeMap.WEBSOCKET | typeMap.WEBRTC); |
| 332 |
| 333 if (!contentType) |
| 334 return; |
| 335 } |
| 336 |
| 337 let urlSchemes = getURLSchemes(contentType); |
| 338 let parsed = parseFilterRegexpSource(filter.regexpSource, urlSchemes[0]); |
235 | 339 |
236 // For the special case of $document whitelisting filters with just a domain | 340 // For the special case of $document whitelisting filters with just a domain |
237 // we can generate an equivalent blocking rule exception using if-domain. | 341 // we can generate an equivalent blocking rule exception using if-domain. |
238 if (filter instanceof filterClasses.WhitelistFilter && | 342 if (filter instanceof filterClasses.WhitelistFilter && |
239 filter.contentType & typeMap.DOCUMENT && | 343 contentType & typeMap.DOCUMENT && |
240 parsed.justHostname) | 344 parsed.justHostname) |
241 { | 345 { |
242 rules.push({ | 346 rules.push({ |
243 trigger: { | 347 trigger: { |
244 "url-filter": ".*", | 348 "url-filter": ".*", |
245 "if-domain": ["*" + parsed.hostname] | 349 "if-domain": ["*" + parsed.hostname] |
246 }, | 350 }, |
247 action: {type: "ignore-previous-rules"} | 351 action: {type: "ignore-previous-rules"} |
248 }); | 352 }); |
249 // If the filter contains other supported options we'll need to generate | 353 // If the filter contains other supported options we'll need to generate |
250 // further rules for it, but if not we can simply return now. | 354 // further rules for it, but if not we can simply return now. |
251 if (!(filter.contentType & whitelistableRequestTypes)) | 355 if (!(contentType & whitelistableRequestTypes)) |
252 return; | 356 return; |
253 } | 357 } |
254 | 358 |
255 let trigger = {"url-filter": parsed.regexp}; | 359 let trigger = {"url-filter": parsed.regexp}; |
256 | 360 |
257 // Limit rules to HTTP(S) URLs | 361 // If the URL filter begins with one of the URL schemes for this content |
258 if (!/^(\^|http)/i.test(trigger["url-filter"])) | 362 // type, we generate additional rules for all the URL scheme patterns; |
259 trigger["url-filter"] = "^https?://.*" + trigger["url-filter"]; | 363 // otherwise, if the start of the URL filter literally matches the first URL |
| 364 // scheme pattern, we just generate additional rules for the remaining URL |
| 365 // scheme patterns. |
| 366 // |
| 367 // For example, "stun:foo$webrtc" will give us "stun:foo", then we add a "^" |
| 368 // in front of this and generate two additional rules for |
| 369 // "^stuns?:.*stun:foo" and "^turns?:.*stun:foo". On the other hand, |
| 370 // "||foo$webrtc" will give us "^stuns?:([^/]+\\.)?foo", so we just generate |
| 371 // "^turns?:([^/]+\\.)?foo" in addition. |
| 372 // |
| 373 // Note that the filter can be already anchored to the beginning |
| 374 // (e.g. "|stun:foo$webrtc"), in which case we do not generate any additional |
| 375 // rules. |
| 376 let needAltRules = trigger["url-filter"][0] != "^" || |
| 377 trigger["url-filter"].startsWith("^" + urlSchemes[0]); |
| 378 |
| 379 if (trigger["url-filter"][0] != "^") |
| 380 { |
| 381 if (!urlSchemes.some(scheme => new RegExp("^" + scheme) |
| 382 .test(trigger["url-filter"]))) |
| 383 { |
| 384 trigger["url-filter"] = urlSchemes[0] + ".*" + trigger["url-filter"]; |
| 385 } |
| 386 |
| 387 trigger["url-filter"] = "^" + trigger["url-filter"]; |
| 388 } |
260 | 389 |
261 // For rules containing only a hostname we know that we're matching against | 390 // For rules containing only a hostname we know that we're matching against |
262 // a lowercase string unless the matchCase option was passed. | 391 // a lowercase string unless the matchCase option was passed. |
263 if (parsed.canSafelyMatchAsLowercase && !filter.matchCase) | 392 if (parsed.canSafelyMatchAsLowercase && !filter.matchCase) |
264 trigger["url-filter"] = trigger["url-filter"].toLowerCase(); | 393 trigger["url-filter"] = trigger["url-filter"].toLowerCase(); |
265 | 394 |
266 if (parsed.canSafelyMatchAsLowercase || filter.matchCase) | 395 if (parsed.canSafelyMatchAsLowercase || filter.matchCase) |
267 trigger["url-filter-is-case-sensitive"] = true; | 396 trigger["url-filter-is-case-sensitive"] = true; |
268 | 397 |
269 let included = []; | 398 let included = []; |
270 let excluded = []; | 399 let excluded = []; |
271 | 400 |
272 parseDomains(filter.domains, included, excluded); | 401 parseDomains(filter.domains, included, excluded); |
273 | 402 |
274 if (exceptionDomains) | 403 if (exceptionDomains) |
275 excluded = excluded.concat(exceptionDomains); | 404 excluded = excluded.concat(exceptionDomains); |
276 | 405 |
277 if (withResourceTypes) | 406 if (withResourceTypes) |
278 { | 407 { |
279 trigger["resource-type"] = getResourceTypes(filter); | 408 trigger["resource-type"] = getResourceTypes(contentType); |
280 | 409 |
281 if (trigger["resource-type"].length == 0) | 410 if (trigger["resource-type"].length == 0) |
282 return; | 411 return; |
283 } | 412 } |
284 | 413 |
285 if (filter.thirdParty != null) | 414 if (filter.thirdParty != null) |
286 trigger["load-type"] = [filter.thirdParty ? "third-party" : "first-party"]; | 415 trigger["load-type"] = [filter.thirdParty ? "third-party" : "first-party"]; |
287 | 416 |
288 if (included.length > 0) | 417 if (included.length > 0) |
289 { | 418 { |
(...skipping 17 matching lines...) Expand all Loading... |
307 else | 436 else |
308 { | 437 { |
309 trigger["if-domain"].push("*" + name); | 438 trigger["if-domain"].push("*" + name); |
310 } | 439 } |
311 } | 440 } |
312 } | 441 } |
313 else if (excluded.length > 0) | 442 else if (excluded.length > 0) |
314 { | 443 { |
315 trigger["unless-domain"] = excluded.map(name => "*" + name); | 444 trigger["unless-domain"] = excluded.map(name => "*" + name); |
316 } | 445 } |
317 else if (filter instanceof filterClasses.BlockingFilter && | |
318 filter.contentType & typeMap.SUBDOCUMENT) | |
319 { | |
320 trigger["unless-top-url"] = [trigger["url-filter"]]; | |
321 if (trigger["url-filter-is-case-sensitive"]) | |
322 trigger["top-url-filter-is-case-sensitive"] = true; | |
323 } | |
324 | 446 |
325 rules.push({trigger: trigger, action: {type: action}}); | 447 rules.push({trigger: trigger, action: {type: action}}); |
| 448 |
| 449 if (needAltRules) |
| 450 { |
| 451 // Generate additional rules for any alternative URL schemes. |
| 452 for (let altRule of makeRuleCopies(trigger, {type: action}, urlSchemes)) |
| 453 rules.push(altRule); |
| 454 } |
326 } | 455 } |
327 | 456 |
328 function hasNonASCI(obj) | 457 function hasNonASCI(obj) |
329 { | 458 { |
330 if (typeof obj == "string") | 459 if (typeof obj == "string") |
331 { | 460 { |
332 if (/[^\x00-\x7F]/.test(obj)) | 461 if (/[^\x00-\x7F]/.test(obj)) |
333 return true; | 462 return true; |
334 } | 463 } |
335 | 464 |
(...skipping 203 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
539 { | 668 { |
540 convertFilterAddRules(rules, filter, "block", true, | 669 convertFilterAddRules(rules, filter, "block", true, |
541 requestFilterExceptionDomains); | 670 requestFilterExceptionDomains); |
542 } | 671 } |
543 | 672 |
544 for (let filter of this.requestExceptions) | 673 for (let filter of this.requestExceptions) |
545 convertFilterAddRules(rules, filter, "ignore-previous-rules", true); | 674 convertFilterAddRules(rules, filter, "ignore-previous-rules", true); |
546 | 675 |
547 return rules.filter(rule => !hasNonASCI(rule)); | 676 return rules.filter(rule => !hasNonASCI(rule)); |
548 }; | 677 }; |
OLD | NEW |