Left: | ||
Right: |
LEFT | RIGHT |
---|---|
1 /* | 1 /* |
2 * This file is part of Adblock Plus <https://adblockplus.org/>, | 2 * This file is part of Adblock Plus <https://adblockplus.org/>, |
3 * Copyright (C) 2006-2017 eyeo GmbH | 3 * Copyright (C) 2006-2017 eyeo GmbH |
4 * | 4 * |
5 * Adblock Plus is free software: you can redistribute it and/or modify | 5 * Adblock Plus is free software: you can redistribute it and/or modify |
6 * it under the terms of the GNU General Public License version 3 as | 6 * it under the terms of the GNU General Public License version 3 as |
7 * published by the Free Software Foundation. | 7 * published by the Free Software Foundation. |
8 * | 8 * |
9 * Adblock Plus is distributed in the hope that it will be useful, | 9 * Adblock Plus is distributed in the hope that it will be useful, |
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of | 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
12 * GNU General Public License for more details. | 12 * GNU General Public License for more details. |
13 * | 13 * |
14 * You should have received a copy of the GNU General Public License | 14 * You should have received a copy of the GNU General Public License |
15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. | 15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. |
16 */ | 16 */ |
17 | 17 |
18 /** @module abp2blocklist */ | 18 /** @module abp2blocklist */ |
19 | 19 |
20 "use strict"; | 20 "use strict"; |
21 | 21 |
22 let filterClasses = require("filterClasses"); | 22 let filterClasses = require("filterClasses"); |
23 let tldjs = require("tldjs"); | |
24 let punycode = require("punycode"); | 23 let punycode = require("punycode"); |
25 | 24 |
26 const selectorLimit = 5000; | 25 const selectorLimit = 5000; |
27 const typeMap = filterClasses.RegExpFilter.typeMap; | 26 const typeMap = filterClasses.RegExpFilter.typeMap; |
28 const whitelistableRequestTypes = (typeMap.IMAGE | 27 |
29 | typeMap.STYLESHEET | 28 const httpRequestTypes = typeMap.IMAGE | |
30 | typeMap.SCRIPT | 29 typeMap.STYLESHEET | |
31 | typeMap.FONT | 30 typeMap.SCRIPT | |
32 | typeMap.MEDIA | 31 typeMap.FONT | |
33 | typeMap.POPUP | 32 typeMap.MEDIA | |
34 | typeMap.OBJECT | 33 typeMap.POPUP | |
35 | typeMap.OBJECT_SUBREQUEST | 34 typeMap.OBJECT | |
36 | typeMap.XMLHTTPREQUEST | 35 typeMap.OBJECT_SUBREQUEST | |
37 | typeMap.PING | 36 typeMap.XMLHTTPREQUEST | |
38 | typeMap.SUBDOCUMENT | 37 typeMap.PING | |
39 | typeMap.OTHER); | 38 typeMap.SUBDOCUMENT | |
39 typeMap.OTHER; | |
40 const rawRequestTypes = typeMap.XMLHTTPREQUEST | | |
41 typeMap.WEBSOCKET | | |
42 typeMap.WEBRTC | | |
43 typeMap.OBJECT_SUBREQUEST | | |
44 typeMap.PING | | |
45 typeMap.OTHER; | |
46 const whitelistableRequestTypes = httpRequestTypes | | |
47 typeMap.WEBSOCKET | | |
48 typeMap.WEBRTC; | |
40 | 49 |
41 function callLater(func) | 50 function callLater(func) |
42 { | 51 { |
43 return new Promise(resolve => | 52 return new Promise(resolve => |
44 { | 53 { |
45 let call = () => resolve(func()); | 54 let call = () => resolve(func()); |
46 | 55 |
47 // If this looks like Node.js, call process.nextTick, otherwise call | 56 // If this looks like Node.js, call process.nextTick, otherwise call |
48 // setTimeout. | 57 // setTimeout. |
49 if (typeof process != "undefined") | 58 if (typeof process != "undefined") |
50 process.nextTick(call); | 59 process.nextTick(call); |
51 else | 60 else |
52 setTimeout(call, 0); | 61 setTimeout(call, 0); |
53 }); | 62 }); |
54 } | 63 } |
55 | 64 |
56 function async(funcs) | 65 function async(callees, mapFunction) |
57 { | 66 { |
58 if (!Array.isArray(funcs)) | 67 if (!(Symbol.iterator in callees)) |
59 funcs = Array.from(arguments); | 68 callees = [callees]; |
60 | 69 |
61 let lastPause = Date.now(); | 70 let lastPause = Date.now(); |
62 | 71 let index = 0; |
63 return funcs.reduce((promise, next) => promise.then(() => | 72 |
64 { | 73 let promise = Promise.resolve(); |
65 // If it has been 100ms or longer since the last call, take a pause. This | 74 |
66 // keeps the browser from freezing up. | 75 for (let next of callees) |
67 let now = Date.now(); | 76 { |
68 if (now - lastPause >= 100) | 77 let currentIndex = index; |
69 { | 78 |
70 lastPause = now; | 79 promise = promise.then(() => |
71 return callLater(next); | 80 { |
72 } | 81 if (mapFunction) |
73 | 82 next = mapFunction(next, currentIndex); |
74 return next(); | 83 |
75 }), | 84 // If it has been 100ms or longer since the last call, take a pause. This |
76 Promise.resolve()); | 85 // keeps the browser from freezing up. |
86 let now = Date.now(); | |
87 if (now - lastPause >= 100) | |
88 { | |
89 lastPause = now; | |
90 return callLater(next); | |
91 } | |
92 | |
93 return next(); | |
94 }); | |
95 | |
96 index++; | |
97 } | |
98 | |
99 return promise; | |
77 } | 100 } |
78 | 101 |
79 function parseDomains(domains, included, excluded) | 102 function parseDomains(domains, included, excluded) |
80 { | 103 { |
81 for (let domain in domains) | 104 for (let domain in domains) |
82 { | 105 { |
83 if (domain != "") | 106 if (domain != "") |
84 { | 107 { |
85 let enabled = domains[domain]; | 108 let enabled = domains[domain]; |
86 domain = punycode.toASCII(domain.toLowerCase()); | 109 domain = punycode.toASCII(domain.toLowerCase()); |
87 | 110 |
88 if (!enabled) | 111 if (!enabled) |
89 excluded.push(domain); | 112 excluded.push(domain); |
90 else if (!domains[""]) | 113 else if (!domains[""]) |
91 included.push(domain); | 114 included.push(domain); |
92 } | 115 } |
93 } | 116 } |
94 } | 117 } |
95 | 118 |
96 function escapeRegExp(s) | 119 function escapeRegExp(s) |
97 { | 120 { |
98 return s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); | 121 return s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); |
99 } | 122 } |
100 | 123 |
101 function matchDomain(domain) | 124 function matchDomain(domain) |
102 { | 125 { |
126 if (!domain) | |
127 return "^https?://"; | |
128 | |
103 return "^https?://([^/:]*\\.)?" + escapeRegExp(domain).toLowerCase() + "[/:]"; | 129 return "^https?://([^/:]*\\.)?" + escapeRegExp(domain).toLowerCase() + "[/:]"; |
130 } | |
131 | |
132 function getURLSchemes(contentType) | |
133 { | |
134 // If the given content type includes all supported URL schemes, simply | |
135 // return a single generic URL scheme pattern. This minimizes the size of the | |
136 // generated rule set. The downside to this is that it will also match | |
137 // schemes that we do not want to match (e.g. "ftp://"), but this can be | |
138 // mitigated by adding exceptions for those schemes. | |
139 if (contentType & typeMap.WEBSOCKET && contentType & typeMap.WEBRTC && | |
140 contentType & httpRequestTypes) | |
141 return ["[^:]+:(//)?"]; | |
142 | |
143 let urlSchemes = []; | |
144 | |
145 if (contentType & typeMap.WEBSOCKET) | |
146 urlSchemes.push("wss?://"); | |
147 | |
148 if (contentType & typeMap.WEBRTC) | |
149 urlSchemes.push("stuns?:", "turns?:"); | |
150 | |
151 if (contentType & httpRequestTypes) | |
152 urlSchemes.push("https?://"); | |
153 | |
154 return urlSchemes; | |
155 } | |
156 | |
157 function findSubdomainsInList(domain, list) | |
158 { | |
159 let subdomains = []; | |
160 let suffixLength = domain.length + 1; | |
161 | |
162 for (let name of list) | |
163 { | |
164 if (name.length > suffixLength && name.slice(-suffixLength) == "." + domain) | |
165 subdomains.push(name.slice(0, -suffixLength)); | |
166 } | |
167 | |
168 return subdomains; | |
169 } | |
170 | |
171 function extractFilterDomains(filters) | |
172 { | |
173 let domains = new Set(); | |
174 for (let filter of filters) | |
175 { | |
176 let parsed = parseFilterRegexpSource(filter.regexpSource); | |
177 if (parsed.justHostname) | |
178 domains.add(parsed.hostname); | |
179 } | |
180 return domains; | |
104 } | 181 } |
105 | 182 |
106 function convertElemHideFilter(filter, elemhideSelectorExceptions) | 183 function convertElemHideFilter(filter, elemhideSelectorExceptions) |
107 { | 184 { |
108 let included = []; | 185 let included = []; |
109 let excluded = []; | 186 let excluded = []; |
110 let rules = []; | |
111 | 187 |
112 parseDomains(filter.domains, included, excluded); | 188 parseDomains(filter.domains, included, excluded); |
113 | 189 |
114 if (excluded.length == 0 && !(filter.selector in elemhideSelectorExceptions)) | 190 if (excluded.length == 0 && !(filter.selector in elemhideSelectorExceptions)) |
115 return {matchDomains: included.map(matchDomain), selector: filter.selector}; | 191 return {matchDomains: included, selector: filter.selector}; |
116 } | 192 } |
117 | 193 |
118 /** | 194 /** |
119 * Parse the given filter "regexpSource" string. Producing a regular expression, | 195 * Parse the given filter "regexpSource" string. Producing a regular expression, |
120 * extracting the hostname (if any), deciding if the regular expression is safe | 196 * extracting the hostname (if any), deciding if the regular expression is safe |
121 * to be converted + matched as lower case and noting if the source contains | 197 * to be converted + matched as lower case and noting if the source contains |
122 * anything after the hostname.) | 198 * anything after the hostname.) |
123 * | 199 * |
124 * @param {string} text regexpSource property of a filter | 200 * @param {string} text regexpSource property of a filter |
201 * @param {string} urlScheme The URL scheme to use in the regular expression | |
125 * @returns {object} An object containing a regular expression string, a bool | 202 * @returns {object} An object containing a regular expression string, a bool |
126 * indicating if the filter can be safely matched as lower | 203 * indicating if the filter can be safely matched as lower |
127 * case, a hostname string (or undefined) and a bool | 204 * case, a hostname string (or undefined) and a bool |
128 * indicating if the source only contains a hostname or not: | 205 * indicating if the source only contains a hostname or not: |
129 * {regexp: "...", | 206 * {regexp: "...", |
130 * canSafelyMatchAsLowercase: true/false, | 207 * canSafelyMatchAsLowercase: true/false, |
131 * hostname: "...", | 208 * hostname: "...", |
132 * justHostname: true/false} | 209 * justHostname: true/false} |
133 */ | 210 */ |
134 function parseFilterRegexpSource(text) | 211 function parseFilterRegexpSource(text, urlScheme) |
135 { | 212 { |
136 let regexp = []; | 213 let regexp = []; |
137 let lastIndex = text.length - 1; | 214 |
215 // Convert the text into an array of Unicode characters. | |
216 // | |
217 // In the case of surrogate pairs (the smiley emoji, for example), one | |
218 // Unicode code point is represented by two JavaScript characters together. | |
219 // We want to iterate over Unicode code points rather than JavaScript | |
220 // characters. | |
221 let characters = Array.from(text); | |
222 | |
223 let lastIndex = characters.length - 1; | |
138 let hostname; | 224 let hostname; |
139 let hostnameStart = null; | 225 let hostnameStart = null; |
140 let hostnameFinished = false; | 226 let hostnameFinished = false; |
141 let justHostname = false; | 227 let justHostname = false; |
142 let canSafelyMatchAsLowercase = false; | 228 let canSafelyMatchAsLowercase = false; |
143 | 229 |
144 for (let i = 0; i < text.length; i++) | 230 if (!urlScheme) |
145 { | 231 urlScheme = getURLSchemes()[0]; |
146 let c = text[i]; | 232 |
233 for (let i = 0; i < characters.length; i++) | |
234 { | |
235 let c = characters[i]; | |
147 | 236 |
148 if (hostnameFinished) | 237 if (hostnameFinished) |
149 justHostname = false; | 238 justHostname = false; |
150 | 239 |
151 // If we're currently inside the hostname we have to be careful not to | 240 // If we're currently inside the hostname we have to be careful not to |
152 // escape any characters until after we have converted it to punycode. | 241 // escape any characters until after we have converted it to punycode. |
153 if (hostnameStart != null && !hostnameFinished) | 242 if (hostnameStart != null && !hostnameFinished) |
154 { | 243 { |
155 let endingChar = (c == "*" || c == "^" || | 244 let endingChar = (c == "*" || c == "^" || |
156 c == "?" || c == "/" || c == "|"); | 245 c == "?" || c == "/" || c == "|"); |
157 if (!endingChar && i != lastIndex) | 246 if (!endingChar && i != lastIndex) |
158 continue; | 247 continue; |
159 | 248 |
160 hostname = punycode.toASCII( | 249 hostname = punycode.toASCII( |
161 text.substring(hostnameStart, endingChar ? i : i + 1) | 250 characters.slice(hostnameStart, endingChar ? i : i + 1).join("") |
251 .toLowerCase() | |
162 ); | 252 ); |
163 hostnameFinished = justHostname = true; | 253 hostnameFinished = justHostname = true; |
164 regexp.push(escapeRegExp(hostname)); | 254 regexp.push(escapeRegExp(hostname)); |
165 if (!endingChar) | 255 if (!endingChar) |
166 break; | 256 break; |
167 } | 257 } |
168 | 258 |
169 switch (c) | 259 switch (c) |
170 { | 260 { |
171 case "*": | 261 case "*": |
172 if (regexp.length > 0 && i < lastIndex && text[i + 1] != "*") | 262 if (regexp.length > 0 && i < lastIndex && characters[i + 1] != "*") |
173 regexp.push(".*"); | 263 regexp.push(".*"); |
174 break; | 264 break; |
175 case "^": | 265 case "^": |
176 if (i < lastIndex) | 266 let alphabet = "a-z"; |
177 regexp.push("."); | 267 // If justHostname is true and we've encountered a "^", it means we're |
268 // still in the hostname part of the URL. Since hostnames are always | |
269 // lower case (Punycode), there's no need to include "A-Z" in the | |
270 // pattern. Further, subsequent code may lower-case the entire regular | |
271 // expression (if the URL contains only the hostname part), leaving us | |
272 // with "a-za-z", which would be redundant. | |
273 if (!justHostname) | |
274 alphabet = "A-Z" + alphabet; | |
275 let digits = "0-9"; | |
276 // Note that the "-" must appear first here in order to retain its | |
277 // literal meaning within the brackets. | |
278 let specialCharacters = "-_.%"; | |
279 let separator = "[^" + specialCharacters + alphabet + digits + "]"; | |
280 if (i == 0) | |
281 regexp.push("^" + urlScheme + "(.*" + separator + ")?"); | |
282 else if (i == lastIndex) | |
283 regexp.push("(" + separator + ".*)?$"); | |
284 else | |
285 regexp.push(separator); | |
178 break; | 286 break; |
179 case "|": | 287 case "|": |
180 if (i == 0) | 288 if (i == 0) |
181 { | 289 { |
182 regexp.push("^"); | 290 regexp.push("^"); |
183 break; | 291 break; |
184 } | 292 } |
185 if (i == lastIndex) | 293 if (i == lastIndex) |
186 { | 294 { |
187 regexp.push("$"); | 295 regexp.push("$"); |
188 break; | 296 break; |
189 } | 297 } |
190 if (i == 1 && text[0] == "|") | 298 if (i == 1 && characters[0] == "|") |
191 { | 299 { |
192 hostnameStart = i + 1; | 300 hostnameStart = i + 1; |
193 canSafelyMatchAsLowercase = true; | 301 canSafelyMatchAsLowercase = true; |
194 regexp.push("https?://([^/]+\\.)?"); | 302 regexp.push(urlScheme + "([^/]+\\.)?"); |
195 break; | 303 break; |
196 } | 304 } |
197 regexp.push("\\|"); | 305 regexp.push("\\|"); |
198 break; | 306 break; |
199 case "/": | 307 case "/": |
200 if (!hostnameFinished && | 308 if (!hostnameFinished && |
201 text.charAt(i-2) == ":" && text.charAt(i-1) == "/") | 309 characters[i - 2] == ":" && characters[i - 1] == "/") |
202 { | 310 { |
203 hostnameStart = i + 1; | 311 hostnameStart = i + 1; |
204 canSafelyMatchAsLowercase = true; | 312 canSafelyMatchAsLowercase = true; |
205 } | 313 } |
206 regexp.push("/"); | 314 regexp.push("/"); |
207 break; | 315 break; |
208 case ".": case "+": case "$": case "?": | 316 case ".": case "+": case "$": case "?": |
209 case "{": case "}": case "(": case ")": | 317 case "{": case "}": case "(": case ")": |
210 case "[": case "]": case "\\": | 318 case "[": case "]": case "\\": |
211 regexp.push("\\", c); | 319 regexp.push("\\", c); |
212 break; | 320 break; |
213 default: | 321 default: |
214 if (hostnameFinished && (c >= "a" && c <= "z" || | 322 if (hostnameFinished && (c >= "a" && c <= "z" || |
215 c >= "A" && c <= "Z")) | 323 c >= "A" && c <= "Z")) |
216 canSafelyMatchAsLowercase = false; | 324 canSafelyMatchAsLowercase = false; |
217 regexp.push(c); | 325 regexp.push(c == "%" ? c : encodeURI(c)); |
218 } | 326 } |
219 } | 327 } |
220 | 328 |
221 return { | 329 return { |
222 regexp: regexp.join(""), | 330 regexp: regexp.join(""), |
223 canSafelyMatchAsLowercase: canSafelyMatchAsLowercase, | 331 canSafelyMatchAsLowercase: canSafelyMatchAsLowercase, |
224 hostname: hostname, | 332 hostname: hostname, |
225 justHostname: justHostname | 333 justHostname: justHostname |
226 }; | 334 }; |
227 } | 335 } |
228 | 336 |
229 function getResourceTypes(filter) | 337 function getResourceTypes(contentType) |
230 { | 338 { |
231 let types = []; | 339 let types = []; |
232 | 340 |
233 if (filter.contentType & typeMap.IMAGE) | 341 if (contentType & typeMap.IMAGE) |
234 types.push("image"); | 342 types.push("image"); |
235 if (filter.contentType & typeMap.STYLESHEET) | 343 if (contentType & typeMap.STYLESHEET) |
236 types.push("style-sheet"); | 344 types.push("style-sheet"); |
237 if (filter.contentType & typeMap.SCRIPT) | 345 if (contentType & typeMap.SCRIPT) |
238 types.push("script"); | 346 types.push("script"); |
239 if (filter.contentType & typeMap.FONT) | 347 if (contentType & typeMap.FONT) |
240 types.push("font"); | 348 types.push("font"); |
241 if (filter.contentType & (typeMap.MEDIA | typeMap.OBJECT)) | 349 if (contentType & (typeMap.MEDIA | typeMap.OBJECT)) |
242 types.push("media"); | 350 types.push("media"); |
243 if (filter.contentType & typeMap.POPUP) | 351 if (contentType & typeMap.POPUP) |
244 types.push("popup"); | 352 types.push("popup"); |
245 if (filter.contentType & (typeMap.XMLHTTPREQUEST | | 353 if (contentType & rawRequestTypes) |
246 typeMap.OBJECT_SUBREQUEST | | |
247 typeMap.PING | | |
248 typeMap.OTHER)) | |
249 types.push("raw"); | 354 types.push("raw"); |
250 if (filter.contentType & typeMap.SUBDOCUMENT) | 355 if (contentType & typeMap.SUBDOCUMENT) |
251 types.push("document"); | 356 types.push("document"); |
252 | 357 |
253 return types; | 358 return types; |
254 } | 359 } |
255 | 360 |
256 function addDomainPrefix(domains) | 361 function makeRuleCopies(trigger, action, urlSchemes) |
257 { | 362 { |
258 let result = []; | 363 let copies = []; |
259 | 364 |
260 for (let domain of domains) | 365 // Always make a deep copy of the rule, since rules may have to be |
261 { | 366 // manipulated individually at a later stage. |
262 result.push(domain); | 367 let stringifiedTrigger = JSON.stringify(trigger); |
263 | 368 |
264 if (tldjs.getDomain(domain) == domain) | 369 let filterPattern = trigger["url-filter"].substring(1); |
265 result.push("www." + domain); | 370 let startIndex = 0; |
266 } | 371 |
267 | 372 // If the URL filter already begins with the first URL scheme pattern, skip |
268 return result; | 373 // it. |
269 } | 374 if (trigger["url-filter"].startsWith("^" + urlSchemes[0])) |
270 | 375 { |
271 function convertFilterAddRules(rules, filter, action, withResourceTypes) | 376 filterPattern = filterPattern.substring(urlSchemes[0].length); |
272 { | 377 startIndex = 1; |
273 let parsed = parseFilterRegexpSource(filter.regexpSource); | 378 } |
379 else | |
380 { | |
381 filterPattern = ".*" + filterPattern; | |
382 } | |
383 | |
384 for (let i = startIndex; i < urlSchemes.length; i++) | |
385 { | |
386 let copyTrigger = Object.assign(JSON.parse(stringifiedTrigger), { | |
387 "url-filter": "^" + urlSchemes[i] + filterPattern | |
388 }); | |
389 copies.push({trigger: copyTrigger, action}); | |
390 } | |
391 | |
392 return copies; | |
393 } | |
394 | |
395 function excludeTopURLFromTrigger(trigger) | |
396 { | |
397 trigger["unless-top-url"] = [trigger["url-filter"]]; | |
398 if (trigger["url-filter-is-case-sensitive"]) | |
399 trigger["top-url-filter-is-case-sensitive"] = true; | |
400 } | |
401 | |
402 function convertFilterAddRules(rules, filter, action, withResourceTypes, | |
403 exceptionDomains, contentType) | |
404 { | |
405 if (!contentType) | |
406 contentType = filter.contentType; | |
407 | |
408 // If WebSocket or WebRTC are given along with other options but not | |
409 // including all three of WebSocket, WebRTC, and at least one HTTP raw type, | |
410 // we must generate multiple rules. For example, for the filter | |
411 // "foo$websocket,image", we must generate one rule with "^wss?://" and "raw" | |
412 // and another rule with "^https?://" and "image". If we merge the two, we | |
413 // end up blocking requests of all HTTP raw types (e.g. XMLHttpRequest) | |
414 // inadvertently. | |
415 if ((contentType & typeMap.WEBSOCKET && contentType != typeMap.WEBSOCKET && | |
416 !(contentType & typeMap.WEBRTC && | |
417 contentType & rawRequestTypes & httpRequestTypes)) || | |
418 (contentType & typeMap.WEBRTC && contentType != typeMap.WEBRTC && | |
419 !(contentType & typeMap.WEBSOCKET && | |
420 contentType & rawRequestTypes & httpRequestTypes))) | |
421 { | |
422 if (contentType & typeMap.WEBSOCKET) | |
423 { | |
424 convertFilterAddRules(rules, filter, action, withResourceTypes, | |
425 exceptionDomains, typeMap.WEBSOCKET); | |
426 } | |
427 | |
428 if (contentType & typeMap.WEBRTC) | |
429 { | |
430 convertFilterAddRules(rules, filter, action, withResourceTypes, | |
431 exceptionDomains, typeMap.WEBRTC); | |
432 } | |
433 | |
434 contentType &= ~(typeMap.WEBSOCKET | typeMap.WEBRTC); | |
435 | |
436 if (!contentType) | |
437 return; | |
438 } | |
439 | |
440 let urlSchemes = getURLSchemes(contentType); | |
441 let parsed = parseFilterRegexpSource(filter.regexpSource, urlSchemes[0]); | |
274 | 442 |
275 // For the special case of $document whitelisting filters with just a domain | 443 // For the special case of $document whitelisting filters with just a domain |
276 // we can generate an equivalent blocking rule exception using if-domain. | 444 // we can generate an equivalent blocking rule exception using if-domain. |
277 if (filter instanceof filterClasses.WhitelistFilter && | 445 if (filter instanceof filterClasses.WhitelistFilter && |
278 filter.contentType & typeMap.DOCUMENT && | 446 contentType & typeMap.DOCUMENT && |
279 parsed.justHostname) | 447 parsed.justHostname) |
280 { | 448 { |
281 rules.push({ | 449 rules.push({ |
282 trigger: { | 450 trigger: { |
283 "url-filter": ".*", | 451 "url-filter": ".*", |
284 "if-domain": addDomainPrefix([parsed.hostname]) | 452 "if-domain": ["*" + parsed.hostname] |
285 }, | 453 }, |
286 action: {type: "ignore-previous-rules"} | 454 action: {type: "ignore-previous-rules"} |
287 }); | 455 }); |
288 // If the filter contains other supported options we'll need to generate | 456 // If the filter contains other supported options we'll need to generate |
289 // further rules for it, but if not we can simply return now. | 457 // further rules for it, but if not we can simply return now. |
290 if (!(filter.contentType & whitelistableRequestTypes)) | 458 if (!(contentType & whitelistableRequestTypes)) |
291 return; | 459 return; |
292 } | 460 } |
293 | 461 |
294 let trigger = {"url-filter": parsed.regexp}; | 462 let trigger = {"url-filter": parsed.regexp}; |
295 | 463 |
296 // Limit rules to HTTP(S) URLs | 464 // If the URL filter begins with one of the URL schemes for this content |
297 if (!/^(\^|http)/i.test(trigger["url-filter"])) | 465 // type, we generate additional rules for all the URL scheme patterns; |
298 trigger["url-filter"] = "^https?://.*" + trigger["url-filter"]; | 466 // otherwise, if the start of the URL filter literally matches the first URL |
467 // scheme pattern, we just generate additional rules for the remaining URL | |
468 // scheme patterns. | |
469 // | |
470 // For example, "stun:foo$webrtc" will give us "stun:foo", then we add a "^" | |
471 // in front of this and generate two additional rules for | |
472 // "^stuns?:.*stun:foo" and "^turns?:.*stun:foo". On the other hand, | |
473 // "||foo$webrtc" will give us "^stuns?:([^/]+\\.)?foo", so we just generate | |
474 // "^turns?:([^/]+\\.)?foo" in addition. | |
475 // | |
476 // Note that the filter can be already anchored to the beginning | |
477 // (e.g. "|stun:foo$webrtc"), in which case we do not generate any additional | |
478 // rules. | |
479 let needAltRules = trigger["url-filter"][0] != "^" || | |
480 trigger["url-filter"].startsWith("^" + urlSchemes[0]); | |
481 | |
482 if (trigger["url-filter"][0] != "^") | |
483 { | |
484 if (!urlSchemes.some(scheme => new RegExp("^" + scheme) | |
485 .test(trigger["url-filter"]))) | |
486 { | |
487 trigger["url-filter"] = urlSchemes[0] + ".*" + trigger["url-filter"]; | |
488 } | |
489 | |
490 trigger["url-filter"] = "^" + trigger["url-filter"]; | |
491 } | |
299 | 492 |
300 // For rules containing only a hostname we know that we're matching against | 493 // For rules containing only a hostname we know that we're matching against |
301 // a lowercase string unless the matchCase option was passed. | 494 // a lowercase string unless the matchCase option was passed. |
302 if (parsed.canSafelyMatchAsLowercase && !filter.matchCase) | 495 if (parsed.canSafelyMatchAsLowercase && !filter.matchCase) |
303 trigger["url-filter"] = trigger["url-filter"].toLowerCase(); | 496 trigger["url-filter"] = trigger["url-filter"].toLowerCase(); |
304 | 497 |
305 if (parsed.canSafelyMatchAsLowercase || filter.matchCase) | 498 if (parsed.canSafelyMatchAsLowercase || filter.matchCase) |
306 trigger["url-filter-is-case-sensitive"] = true; | 499 trigger["url-filter-is-case-sensitive"] = true; |
307 | 500 |
308 let included = []; | 501 let included = []; |
309 let excluded = []; | 502 let excluded = []; |
310 | 503 |
311 parseDomains(filter.domains, included, excluded); | 504 parseDomains(filter.domains, included, excluded); |
312 | 505 |
506 if (exceptionDomains) | |
507 excluded = excluded.concat(exceptionDomains); | |
508 | |
313 if (withResourceTypes) | 509 if (withResourceTypes) |
314 { | 510 { |
315 trigger["resource-type"] = getResourceTypes(filter); | 511 let resourceTypes = getResourceTypes(contentType); |
316 | 512 |
317 if (trigger["resource-type"].length == 0) | 513 // Content blocker rules can't differentiate between sub-document requests |
514 // (iframes) and top-level document requests. To avoid too many false | |
515 // positives, we prevent rules with no hostname part from blocking document | |
516 // requests. | |
517 // | |
518 // Once Safari 11 becomes our minimum supported version, we could change | |
519 // our approach here to use the new "unless-top-url" property instead. | |
520 if (filter instanceof filterClasses.BlockingFilter && !parsed.hostname) | |
521 resourceTypes = resourceTypes.filter(type => type != "document"); | |
522 | |
523 if (resourceTypes.length == 0) | |
318 return; | 524 return; |
525 | |
526 trigger["resource-type"] = resourceTypes; | |
319 } | 527 } |
320 | 528 |
321 if (filter.thirdParty != null) | 529 if (filter.thirdParty != null) |
322 trigger["load-type"] = [filter.thirdParty ? "third-party" : "first-party"]; | 530 trigger["load-type"] = [filter.thirdParty ? "third-party" : "first-party"]; |
323 | 531 |
532 let addTopLevelException = false; | |
533 | |
324 if (included.length > 0) | 534 if (included.length > 0) |
325 trigger["if-domain"] = addDomainPrefix(included); | 535 { |
536 trigger["if-domain"] = []; | |
537 | |
538 for (let name of included) | |
539 { | |
540 // If this is a blocking filter or an element hiding filter, add the | |
541 // subdomain wildcard only if no subdomains have been excluded. | |
542 let notSubdomains = null; | |
543 if ((filter instanceof filterClasses.BlockingFilter || | |
544 filter instanceof filterClasses.ElemHideFilter) && | |
545 (notSubdomains = findSubdomainsInList(name, excluded)).length > 0) | |
546 { | |
547 trigger["if-domain"].push(name); | |
548 | |
549 // Add the "www" prefix but only if it hasn't been excluded. | |
550 if (!notSubdomains.includes("www")) | |
551 trigger["if-domain"].push("www." + name); | |
552 } | |
553 else | |
554 { | |
555 trigger["if-domain"].push("*" + name); | |
556 } | |
557 } | |
558 } | |
326 else if (excluded.length > 0) | 559 else if (excluded.length > 0) |
327 trigger["unless-domain"] = addDomainPrefix(excluded); | 560 { |
561 trigger["unless-domain"] = excluded.map(name => "*" + name); | |
562 } | |
563 else if (filter instanceof filterClasses.BlockingFilter && | |
564 filter.contentType & typeMap.SUBDOCUMENT && parsed.hostname) | |
565 { | |
566 // Rules with a hostname part are still allowed to block document requests, | |
567 // but we add an exception for top-level documents. | |
568 // | |
569 // Note that we can only do this if there's no "unless-domain" property for | |
570 // now. This also only works in Safari 11 onwards, while older versions | |
571 // simply ignore this property. Once Safari 11 becomes our minimum | |
572 // supported version, we can merge "unless-domain" into "unless-top-url". | |
573 addTopLevelException = true; | |
574 excludeTopURLFromTrigger(trigger); | |
575 } | |
328 | 576 |
329 rules.push({trigger: trigger, action: {type: action}}); | 577 rules.push({trigger: trigger, action: {type: action}}); |
330 } | 578 |
331 | 579 if (needAltRules) |
332 function hasNonASCI(obj) | 580 { |
333 { | 581 // Generate additional rules for any alternative URL schemes. |
334 if (typeof obj == "string") | 582 for (let altRule of makeRuleCopies(trigger, {type: action}, urlSchemes)) |
335 { | 583 { |
336 if (/[^\x00-\x7F]/.test(obj)) | 584 if (addTopLevelException) |
337 return true; | 585 excludeTopURLFromTrigger(altRule.trigger); |
338 } | 586 |
339 | 587 rules.push(altRule); |
340 if (typeof obj == "object") | 588 } |
341 { | 589 } |
342 if (obj instanceof Array) | |
343 for (let item of obj) | |
344 if (hasNonASCI(item)) | |
345 return true; | |
346 | |
347 let names = Object.getOwnPropertyNames(obj); | |
348 for (let name of names) | |
349 if (hasNonASCI(obj[name])) | |
350 return true; | |
351 } | |
352 | |
353 return false; | |
354 } | 590 } |
355 | 591 |
356 function convertIDSelectorsToAttributeSelectors(selector) | 592 function convertIDSelectorsToAttributeSelectors(selector) |
357 { | 593 { |
358 // First we figure out where all the IDs are | 594 // First we figure out where all the IDs are |
359 let sep = ""; | 595 let sep = ""; |
360 let start = null; | 596 let start = null; |
361 let positions = []; | 597 let positions = []; |
362 for (let i = 0; i < selector.length; i++) | 598 for (let i = 0; i < selector.length; i++) |
363 { | 599 { |
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
397 { | 633 { |
398 newSelector.push(selector.substring(i, pos.start)); | 634 newSelector.push(selector.substring(i, pos.start)); |
399 newSelector.push('[id=', selector.substring(pos.start + 1, pos.end), ']'); | 635 newSelector.push('[id=', selector.substring(pos.start + 1, pos.end), ']'); |
400 i = pos.end; | 636 i = pos.end; |
401 } | 637 } |
402 newSelector.push(selector.substring(i)); | 638 newSelector.push(selector.substring(i)); |
403 | 639 |
404 return newSelector.join(""); | 640 return newSelector.join(""); |
405 } | 641 } |
406 | 642 |
643 function addCSSRules(rules, selectors, domain, exceptionDomains) | |
644 { | |
645 let unlessDomain = exceptionDomains.size > 0 ? [] : null; | |
646 | |
647 exceptionDomains.forEach(name => | |
648 { | |
649 // For domain-specific filters, include the exception domains only if | |
650 // they're subdomains of the given domain. | |
651 if (!domain || name.substr(-domain.length - 1) == "." + domain) | |
652 unlessDomain.push("*" + name); | |
653 }); | |
654 | |
655 while (selectors.length) | |
656 { | |
657 let selector = selectors.splice(0, selectorLimit).join(", "); | |
658 | |
659 // As of Safari 9.0 element IDs are matched as lowercase. We work around | |
660 // this by converting to the attribute format [id="elementID"] | |
661 selector = convertIDSelectorsToAttributeSelectors(selector); | |
662 | |
663 let rule = { | |
664 trigger: {"url-filter": matchDomain(domain), | |
665 "url-filter-is-case-sensitive": true}, | |
666 action: {type: "css-display-none", | |
667 selector: selector} | |
668 }; | |
669 | |
670 if (unlessDomain) | |
671 rule.trigger["unless-domain"] = unlessDomain; | |
672 | |
673 rules.push(rule); | |
674 } | |
675 } | |
676 | |
407 /** | 677 /** |
408 * Check if two strings are a close match | 678 * Check if two strings are a close match |
409 * | 679 * |
410 * This function returns an edit operation, one of "substitute", "delete", and | 680 * This function returns an edit operation, one of "substitute", "delete", and |
411 * "insert", along with an index in the source string where the edit must occur | 681 * "insert", along with an index in the source string where the edit must occur |
412 * in order to arrive at the target string. If the strings are not a close | 682 * in order to arrive at the target string. If the strings are not a close |
413 * match, it returns null. | 683 * match, it returns null. |
414 * | 684 * |
415 * Two strings are considered to be a close match if they are one edit | 685 * Two strings are considered to be a close match if they are one edit |
416 * operation apart. | 686 * operation apart. |
(...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
455 // calculation. | 725 // calculation. |
456 if (diff < 0) | 726 if (diff < 0) |
457 { | 727 { |
458 let tmp = s; | 728 let tmp = s; |
459 s = t; | 729 s = t; |
460 t = tmp; | 730 t = tmp; |
461 } | 731 } |
462 | 732 |
463 let edit = null; | 733 let edit = null; |
464 | 734 |
465 let i = 0, j = 0; | 735 let i = 0; |
kzar
2017/05/29 09:49:54
Nit: Please split into separate lets.
Also I wond
Manish Jethani
2017/05/31 06:43:06
Done.
| |
736 let j = 0; | |
466 | 737 |
467 // Start from the beginning and keep going until we hit a character that | 738 // Start from the beginning and keep going until we hit a character that |
468 // doesn't match. | 739 // doesn't match. |
469 for (; i < s.length; i++) | 740 for (; i < s.length; i++) |
470 { | 741 { |
471 if (s[i] != t[i]) | 742 if (s[i] != t[i]) |
472 break; | 743 break; |
473 } | 744 } |
474 | 745 |
475 // Now do exactly the same from the end, but also stop if we reach the | 746 // Now do exactly the same from the end, but also stop if we reach the |
476 // position where we terminated the previous loop. | 747 // position where we terminated the previous loop. |
477 for (; j < t.length; j++) | 748 for (; j < t.length; j++) |
kzar
2017/05/29 09:49:53
Since j is working backwards through the character
Manish Jethani
2017/05/31 06:43:06
Then we would need two variables, one for s.length
| |
478 { | 749 { |
479 if (t.length - j == i || s[s.length - j - 1] != t[t.length - j - 1]) | 750 if (t.length - j == i || s[s.length - j - 1] != t[t.length - j - 1]) |
480 break; | 751 break; |
481 } | 752 } |
482 | 753 |
483 if (diff == 0) | 754 if (diff == 0) |
484 { | 755 { |
485 // If the strings are equal in length and the delta isn't exactly one | 756 // If the strings are equal in length and the delta isn't exactly one |
486 // character, it's not a close match. | 757 // character, it's not a close match. |
487 if (t.length - j - i != 1) | 758 if (t.length - j - i != 1) |
kzar
2017/05/29 09:49:53
Probably a dumb question but wouldn't this also co
Manish Jethani
2017/05/31 06:43:07
Yes, identical strings are not a close match by de
| |
488 return null; | 759 return null; |
489 } | 760 } |
490 else if (i != t.length - j) | 761 else if (i != t.length - j) |
491 { | 762 { |
492 // For strings of unequal length, if we haven't found a match for every | 763 // For strings of unequal length, if we haven't found a match for every |
493 // single character in the shorter string counting from both the beginning | 764 // single character in the shorter string counting from both the beginning |
494 // and the end, it's not a close match. | 765 // and the end, it's not a close match. |
495 return null; | 766 return null; |
496 } | 767 } |
497 | 768 |
(...skipping 25 matching lines...) Expand all Loading... | |
523 edit.endIndex = s.length - j; | 794 edit.endIndex = s.length - j; |
524 } | 795 } |
525 | 796 |
526 return edit; | 797 return edit; |
527 } | 798 } |
528 | 799 |
529 function eliminateRedundantRulesByURLFilter(rulesInfo, exhaustive) | 800 function eliminateRedundantRulesByURLFilter(rulesInfo, exhaustive) |
530 { | 801 { |
531 const heuristicRange = 1000; | 802 const heuristicRange = 1000; |
532 | 803 |
804 let ol = rulesInfo.length; | |
805 | |
533 // Throw out obviously redundant rules. | 806 // Throw out obviously redundant rules. |
534 return async(rulesInfo.map((ruleInfo, index) => () => | 807 return async(rulesInfo, (ruleInfo, index) => () => |
535 { | 808 { |
536 // If this rule is already marked as redundant, don't bother comparing it | 809 // If this rule is already marked as redundant, don't bother comparing it |
537 // with other rules. | 810 // with other rules. |
538 if (rulesInfo[index].redundant) | 811 if (rulesInfo[index].redundant) |
539 return; | 812 return; |
540 | 813 |
541 let limit = exhaustive ? rulesInfo.length : | 814 let limit = exhaustive ? rulesInfo.length : |
542 Math.min(index + heuristicRange, rulesInfo.length); | 815 Math.min(index + heuristicRange, rulesInfo.length); |
543 | 816 |
544 for (let i = index, j = i + 1; j < limit; j++) | 817 for (let i = index, j = i + 1; j < limit; j++) |
(...skipping 12 matching lines...) Expand all Loading... | |
557 { | 830 { |
558 rulesInfo[i].redundant = true; | 831 rulesInfo[i].redundant = true; |
559 break; | 832 break; |
560 } | 833 } |
561 } | 834 } |
562 else if (target.substring(0, source.length) == source) | 835 else if (target.substring(0, source.length) == source) |
563 { | 836 { |
564 rulesInfo[j].redundant = true; | 837 rulesInfo[j].redundant = true; |
565 } | 838 } |
566 } | 839 } |
567 })) | 840 }) |
568 .then(() => rulesInfo.filter(ruleInfo => !ruleInfo.redundant)); | 841 .then(() => rulesInfo.filter(ruleInfo => !ruleInfo.redundant)); |
569 } | 842 } |
570 | 843 |
571 function findMatchesForRuleByURLFilter(rulesInfo, index, exhaustive) | 844 function findMatchesForRuleByURLFilter(rulesInfo, index, exhaustive) |
572 { | 845 { |
573 // Closely matching rules are likely to be within a certain range. We only | 846 // Closely matching rules are likely to be within a certain range. We only |
574 // look for matches within this range by default. If we increase this value, | 847 // look for matches within this range by default. If we increase this value, |
575 // it can give us more matches and a smaller resulting rule set, but possibly | 848 // it can give us more matches and a smaller resulting rule set, but possibly |
576 // at a significant performance cost. | 849 // at a significant performance cost. |
577 // | 850 // |
(...skipping 141 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
719 if (best.length > 0) | 992 if (best.length > 0) |
720 { | 993 { |
721 let urlFilter = rule.trigger["url-filter"]; | 994 let urlFilter = rule.trigger["url-filter"]; |
722 | 995 |
723 let editIndex = best[0].edit.index; | 996 let editIndex = best[0].edit.index; |
724 | 997 |
725 if (!multiEdit) | 998 if (!multiEdit) |
726 { | 999 { |
727 // Merge all the matching rules into this one. | 1000 // Merge all the matching rules into this one. |
728 | 1001 |
729 let characters = []; | 1002 let characters = [urlFilter[editIndex]]; |
730 let quantifier = ""; | 1003 let quantifier = ""; |
731 | 1004 |
732 for (let match of best) | 1005 for (let match of best) |
733 { | 1006 { |
734 if (match.edit.type == "delete") | 1007 if (match.edit.type == "delete") |
735 { | 1008 { |
736 quantifier = "?"; | 1009 quantifier = "?"; |
737 } | 1010 } |
738 else | 1011 else |
739 { | 1012 { |
740 let character = rulesInfo[match.index].rule | 1013 let character = rulesInfo[match.index].rule |
741 .trigger["url-filter"][editIndex]; | 1014 .trigger["url-filter"][editIndex]; |
742 characters.push(character); | 1015 |
1016 // Insert any hyphen at the beginning so it gets interpreted as a | |
1017 // literal hyphen. | |
1018 if (character == "-") | |
1019 characters.unshift(character); | |
1020 else | |
1021 characters.push(character); | |
743 } | 1022 } |
744 | 1023 |
745 // Mark the target rule as merged so other rules don't try to merge | 1024 // Mark the target rule as merged so other rules don't try to merge |
746 // it again. | 1025 // it again. |
747 rulesInfo[match.index].merged = true; | 1026 rulesInfo[match.index].merged = true; |
748 } | 1027 } |
749 | 1028 |
750 urlFilter = urlFilter.substring(0, editIndex + 1) + quantifier + | 1029 urlFilter = urlFilter.substring(0, editIndex + 1) + quantifier + |
751 urlFilter.substring(editIndex + 1); | 1030 urlFilter.substring(editIndex + 1); |
752 if (characters.length > 0) | 1031 if (characters.length > 1) |
753 { | 1032 { |
754 urlFilter = urlFilter.substring(0, editIndex) + "[" + | 1033 urlFilter = urlFilter.substring(0, editIndex) + "[" + |
755 urlFilter[editIndex] + characters.join("") + "]" + | 1034 characters.join("") + "]" + |
756 urlFilter.substring(editIndex + 1); | 1035 urlFilter.substring(editIndex + 1); |
757 } | 1036 } |
758 } | 1037 } |
759 else | 1038 else |
760 { | 1039 { |
761 let editEndIndex = best[0].edit.endIndex; | 1040 let editEndIndex = best[0].edit.endIndex; |
762 | 1041 |
763 // Mark the target rule as merged so other rules don't try to merge it | 1042 // Mark the target rule as merged so other rules don't try to merge it |
764 // again. | 1043 // again. |
765 rulesInfo[best[0].index].merged = true; | 1044 rulesInfo[best[0].index].merged = true; |
766 | 1045 |
767 urlFilter = urlFilter.substring(0, editIndex) + "(" + | 1046 urlFilter = urlFilter.substring(0, editIndex) + "(" + |
768 urlFilter.substring(editIndex, editEndIndex) + ")?" + | 1047 urlFilter.substring(editIndex, editEndIndex) + ")?" + |
769 urlFilter.substring(editEndIndex); | 1048 urlFilter.substring(editEndIndex); |
770 } | 1049 } |
771 | 1050 |
772 rule.trigger["url-filter"] = urlFilter; | 1051 rule.trigger["url-filter"] = urlFilter; |
773 | 1052 |
774 // Mark this rule as one that has had other rules merged into it. | 1053 // Mark this rule as one that has had other rules merged into it. |
775 ruleInfo.mergedInto = true; | 1054 ruleInfo.mergedInto = true; |
776 } | 1055 } |
777 } | 1056 } |
778 } | 1057 } |
779 | 1058 |
780 function mergeRulesByURLFilter(rulesInfo, exhaustive) | 1059 function mergeRulesByURLFilter(rulesInfo, exhaustive) |
781 { | 1060 { |
782 return async(rulesInfo.map((ruleInfo, index) => () => | 1061 return async(rulesInfo, (ruleInfo, index) => () => |
783 findMatchesForRuleByURLFilter(rulesInfo, index, exhaustive) | 1062 findMatchesForRuleByURLFilter(rulesInfo, index, exhaustive) |
784 )) | 1063 ) |
785 .then(() => mergeCandidateRulesByURLFilter(rulesInfo)); | 1064 .then(() => mergeCandidateRulesByURLFilter(rulesInfo)); |
786 } | 1065 } |
787 | 1066 |
788 function mergeRulesByArrayProperty(rulesInfo, propertyType, property) | 1067 function mergeRulesByArrayProperty(rulesInfo, propertyType, property) |
789 { | 1068 { |
790 if (rulesInfo.length <= 1) | 1069 if (rulesInfo.length <= 1) |
791 return; | 1070 return; |
792 | 1071 |
793 let oneRuleInfo = rulesInfo.shift(); | 1072 let valueSet = new Set(rulesInfo[0].rule[propertyType][property]); |
794 let valueSet = new Set(oneRuleInfo.rule[propertyType][property]); | 1073 |
795 | 1074 for (let i = 1; i < rulesInfo.length; i++) |
796 for (let ruleInfo of rulesInfo) | 1075 { |
797 { | 1076 for (let value of rulesInfo[i].rule[propertyType][property] || []) |
798 if (ruleInfo.rule[propertyType][property]) | 1077 valueSet.add(value); |
799 { | 1078 |
800 for (let value of ruleInfo.rule[propertyType][property]) | 1079 rulesInfo[i].merged = true; |
801 valueSet.add(value); | |
802 } | |
803 | |
804 ruleInfo.merged = true; | |
805 } | 1080 } |
806 | 1081 |
807 if (valueSet.size > 0) | 1082 if (valueSet.size > 0) |
808 oneRuleInfo.rule[propertyType][property] = Array.from(valueSet); | 1083 rulesInfo[0].rule[propertyType][property] = Array.from(valueSet); |
809 | 1084 |
810 oneRuleInfo.mergedInto = true; | 1085 rulesInfo[0].mergedInto = true; |
811 } | 1086 } |
812 | 1087 |
813 function groupRulesByMergeableProperty(rulesInfo, propertyType, property) | 1088 function groupRulesByMergeableProperty(rulesInfo, propertyType, property) |
814 { | 1089 { |
815 let mergeableRulesInfoByGroup = new Map(); | 1090 let mergeableRulesInfoByGroup = new Map(); |
816 | 1091 |
817 for (let ruleInfo of rulesInfo) | 1092 for (let ruleInfo of rulesInfo) |
818 { | 1093 { |
819 let copy = { | 1094 let copy = { |
820 trigger: Object.assign({}, ruleInfo.rule.trigger), | 1095 trigger: Object.assign({}, ruleInfo.rule.trigger), |
(...skipping 17 matching lines...) Expand all Loading... | |
838 | 1113 |
839 function mergeRules(rules, exhaustive) | 1114 function mergeRules(rules, exhaustive) |
840 { | 1115 { |
841 let rulesInfo = rules.map(rule => ({rule})); | 1116 let rulesInfo = rules.map(rule => ({rule})); |
842 | 1117 |
843 let arrayPropertiesToMergeBy = ["resource-type", "if-domain"]; | 1118 let arrayPropertiesToMergeBy = ["resource-type", "if-domain"]; |
844 | 1119 |
845 return async(() => | 1120 return async(() => |
846 { | 1121 { |
847 let map = groupRulesByMergeableProperty(rulesInfo, "trigger", "url-filter"); | 1122 let map = groupRulesByMergeableProperty(rulesInfo, "trigger", "url-filter"); |
848 return async(Array.from(map.values()).map(mergeableRulesInfo => () => | 1123 return async(map.values(), mergeableRulesInfo => () => |
849 eliminateRedundantRulesByURLFilter(mergeableRulesInfo, exhaustive) | 1124 eliminateRedundantRulesByURLFilter(mergeableRulesInfo, exhaustive) |
850 .then(rulesInfo => mergeRulesByURLFilter(rulesInfo, exhaustive)) | 1125 .then(rulesInfo => mergeRulesByURLFilter(rulesInfo, exhaustive)) |
851 )) | 1126 ) |
852 .then(() => | 1127 .then(() => |
853 { | 1128 { |
854 // Filter out rules that are redundant or have been merged into other | 1129 // Filter out rules that are redundant or have been merged into other |
855 // rules. | 1130 // rules. |
856 rulesInfo = rulesInfo.filter(ruleInfo => !ruleInfo.redundant && | 1131 rulesInfo = rulesInfo.filter(ruleInfo => !ruleInfo.redundant && |
857 !ruleInfo.merged); | 1132 !ruleInfo.merged); |
858 }); | 1133 }); |
859 }) | 1134 }) |
860 .then(() => async(arrayPropertiesToMergeBy.map(arrayProperty => () => | 1135 .then(() => async(arrayPropertiesToMergeBy, arrayProperty => () => |
861 { | 1136 { |
862 let map = groupRulesByMergeableProperty(rulesInfo, "trigger", | 1137 let map = groupRulesByMergeableProperty(rulesInfo, "trigger", |
863 arrayProperty); | 1138 arrayProperty); |
864 return async(Array.from(map.values()).map(mergeableRulesInfo => () => | 1139 return async(map.values(), mergeableRulesInfo => () => |
865 mergeRulesByArrayProperty(mergeableRulesInfo, "trigger", arrayProperty) | 1140 mergeRulesByArrayProperty(mergeableRulesInfo, "trigger", arrayProperty) |
866 )) | 1141 ) |
867 .then(() => | 1142 .then(() => |
868 { | 1143 { |
869 rulesInfo = rulesInfo.filter(ruleInfo => !ruleInfo.merged); | 1144 rulesInfo = rulesInfo.filter(ruleInfo => !ruleInfo.merged); |
870 }); | 1145 }); |
871 }))) | 1146 })) |
872 .then(() => rulesInfo.map(ruleInfo => ruleInfo.rule)); | 1147 .then(() => rulesInfo.map(ruleInfo => ruleInfo.rule)); |
873 } | 1148 } |
874 | 1149 |
875 let ContentBlockerList = | 1150 let ContentBlockerList = |
876 /** | 1151 /** |
877 * Create a new Adblock Plus filter to content blocker list converter | 1152 * Create a new Adblock Plus filter to content blocker list converter |
878 * | 1153 * |
879 * @param {object} options Options for content blocker list generation | 1154 * @param {object} options Options for content blocker list generation |
880 * | 1155 * |
881 * @constructor | 1156 * @constructor |
882 */ | 1157 */ |
883 exports.ContentBlockerList = function(options) | 1158 exports.ContentBlockerList = function (options) |
884 { | 1159 { |
885 const defaultOptions = { | 1160 const defaultOptions = { |
886 merge: "auto" | 1161 merge: "auto" |
887 }; | 1162 }; |
888 | 1163 |
889 this.options = Object.assign({}, defaultOptions, options); | 1164 this.options = Object.assign({}, defaultOptions, options); |
890 | 1165 |
891 this.requestFilters = []; | 1166 this.requestFilters = []; |
892 this.requestExceptions = []; | 1167 this.requestExceptions = []; |
893 this.elemhideFilters = []; | 1168 this.elemhideFilters = []; |
894 this.elemhideExceptions = []; | 1169 this.elemhideExceptions = []; |
1170 this.genericblockExceptions = []; | |
1171 this.generichideExceptions = []; | |
895 this.elemhideSelectorExceptions = new Map(); | 1172 this.elemhideSelectorExceptions = new Map(); |
896 }; | 1173 }; |
897 | 1174 |
898 /** | 1175 /** |
899 * Add Adblock Plus filter to be converted | 1176 * Add Adblock Plus filter to be converted |
900 * | 1177 * |
901 * @param {Filter} filter Filter to convert | 1178 * @param {Filter} filter Filter to convert |
902 */ | 1179 */ |
903 ContentBlockerList.prototype.addFilter = function(filter) | 1180 ContentBlockerList.prototype.addFilter = function(filter) |
904 { | 1181 { |
905 if (filter.sitekeys) | 1182 if (filter.sitekeys) |
906 return; | 1183 return; |
907 if (filter instanceof filterClasses.RegExpFilter && | 1184 if (filter instanceof filterClasses.RegExpFilter && |
908 filter.regexpSource == null) | 1185 filter.regexpSource == null) |
909 return; | 1186 return; |
910 | 1187 |
911 if (filter instanceof filterClasses.BlockingFilter) | 1188 if (filter instanceof filterClasses.BlockingFilter) |
912 this.requestFilters.push(filter); | 1189 this.requestFilters.push(filter); |
913 | 1190 |
914 if (filter instanceof filterClasses.WhitelistFilter) | 1191 if (filter instanceof filterClasses.WhitelistFilter) |
915 { | 1192 { |
916 if (filter.contentType & (typeMap.DOCUMENT | whitelistableRequestTypes)) | 1193 if (filter.contentType & (typeMap.DOCUMENT | whitelistableRequestTypes)) |
917 this.requestExceptions.push(filter); | 1194 this.requestExceptions.push(filter); |
918 | 1195 |
919 if (filter.contentType & typeMap.ELEMHIDE) | 1196 if (filter.contentType & typeMap.GENERICBLOCK) |
920 this.elemhideExceptions.push(filter); | 1197 this.genericblockExceptions.push(filter); |
1198 | |
1199 if (filter.contentType & typeMap.ELEMHIDE) | |
1200 this.elemhideExceptions.push(filter); | |
1201 else if (filter.contentType & typeMap.GENERICHIDE) | |
1202 this.generichideExceptions.push(filter); | |
921 } | 1203 } |
922 | 1204 |
923 if (filter instanceof filterClasses.ElemHideFilter) | 1205 if (filter instanceof filterClasses.ElemHideFilter) |
924 this.elemhideFilters.push(filter); | 1206 this.elemhideFilters.push(filter); |
925 | 1207 |
926 if (filter instanceof filterClasses.ElemHideException) | 1208 if (filter instanceof filterClasses.ElemHideException) |
927 { | 1209 { |
928 let domains = this.elemhideSelectorExceptions[filter.selector]; | 1210 let domains = this.elemhideSelectorExceptions[filter.selector]; |
929 if (!domains) | 1211 if (!domains) |
930 domains = this.elemhideSelectorExceptions[filter.selector] = []; | 1212 domains = this.elemhideSelectorExceptions[filter.selector] = []; |
931 | 1213 |
932 parseDomains(filter.domains, domains, []); | 1214 parseDomains(filter.domains, domains, []); |
933 } | 1215 } |
934 }; | 1216 }; |
935 | 1217 |
936 /** | 1218 /** |
937 * Generate content blocker list for all filters that were added | 1219 * Generate content blocker list for all filters that were added |
938 */ | 1220 */ |
939 ContentBlockerList.prototype.generateRules = function() | 1221 ContentBlockerList.prototype.generateRules = function() |
940 { | 1222 { |
941 let cssRules = []; | 1223 let cssRules = []; |
942 let cssExceptionRules = []; | 1224 let cssExceptionRules = []; |
943 let blockingRules = []; | 1225 let blockingRules = []; |
944 let blockingExceptionRules = []; | 1226 let blockingExceptionRules = []; |
945 | 1227 |
946 let ruleGroups = [cssRules, cssExceptionRules, | 1228 let ruleGroups = [cssRules, cssExceptionRules, |
947 blockingRules, blockingExceptionRules]; | 1229 blockingRules, blockingExceptionRules]; |
948 | 1230 |
1231 let genericSelectors = []; | |
949 let groupedElemhideFilters = new Map(); | 1232 let groupedElemhideFilters = new Map(); |
1233 | |
950 for (let filter of this.elemhideFilters) | 1234 for (let filter of this.elemhideFilters) |
951 { | 1235 { |
952 let result = convertElemHideFilter(filter, this.elemhideSelectorExceptions); | 1236 let result = convertElemHideFilter(filter, this.elemhideSelectorExceptions); |
953 if (!result) | 1237 if (!result) |
954 continue; | 1238 continue; |
955 | 1239 |
956 if (result.matchDomains.length == 0) | 1240 if (result.matchDomains.length == 0) |
957 result.matchDomains = ["^https?://"]; | 1241 { |
958 | 1242 genericSelectors.push(result.selector); |
959 for (let matchDomain of result.matchDomains) | 1243 } |
960 { | 1244 else |
961 let group = groupedElemhideFilters.get(matchDomain) || []; | 1245 { |
962 group.push(result.selector); | 1246 for (let matchDomain of result.matchDomains) |
963 groupedElemhideFilters.set(matchDomain, group); | 1247 { |
964 } | 1248 let group = groupedElemhideFilters.get(matchDomain) || []; |
965 } | 1249 group.push(result.selector); |
1250 groupedElemhideFilters.set(matchDomain, group); | |
1251 } | |
1252 } | |
1253 } | |
1254 | |
1255 // Separate out the element hiding exceptions that have only a hostname part | |
1256 // from the rest. This allows us to implement a workaround for issue #5345 | |
1257 // (WebKit bug #167423), but as a bonus it also reduces the number of | |
1258 // generated rules. The downside is that the exception will only apply to the | |
1259 // top-level document, not to iframes. We have to live with this until the | |
1260 // WebKit bug is fixed in all supported versions of Safari. | |
1261 // https://bugs.webkit.org/show_bug.cgi?id=167423 | |
1262 // | |
1263 // Note that as a result of this workaround we end up with a huge rule set in | |
1264 // terms of the amount of memory used. This can cause Node.js to throw | |
1265 // "JavaScript heap out of memory". To avoid this, call Node.js with | |
1266 // --max_old_space_size=4096 | |
1267 let elemhideExceptionDomains = extractFilterDomains(this.elemhideExceptions); | |
1268 | |
1269 let genericSelectorExceptionDomains = | |
1270 extractFilterDomains(this.generichideExceptions); | |
1271 elemhideExceptionDomains.forEach(name => | |
1272 { | |
1273 genericSelectorExceptionDomains.add(name); | |
1274 }); | |
1275 | |
1276 addCSSRules(cssRules, genericSelectors, null, | |
1277 genericSelectorExceptionDomains); | |
1278 | |
1279 // Filter out whitelisted domains. | |
1280 elemhideExceptionDomains.forEach(domain => | |
1281 groupedElemhideFilters.delete(domain)); | |
966 | 1282 |
967 groupedElemhideFilters.forEach((selectors, matchDomain) => | 1283 groupedElemhideFilters.forEach((selectors, matchDomain) => |
968 { | 1284 { |
969 while (selectors.length) | 1285 addCSSRules(cssRules, selectors, matchDomain, elemhideExceptionDomains); |
970 { | |
971 let selector = selectors.splice(0, selectorLimit).join(", "); | |
972 | |
973 // As of Safari 9.0 element IDs are matched as lowercase. We work around | |
974 // this by converting to the attribute format [id="elementID"] | |
975 selector = convertIDSelectorsToAttributeSelectors(selector); | |
976 | |
977 cssRules.push({ | |
978 trigger: {"url-filter": matchDomain, | |
979 "url-filter-is-case-sensitive": true}, | |
980 action: {type: "css-display-none", | |
981 selector: selector} | |
982 }); | |
983 } | |
984 }); | 1286 }); |
985 | 1287 |
986 for (let filter of this.elemhideExceptions) | 1288 let requestFilterExceptionDomains = []; |
987 { | 1289 for (let filter of this.genericblockExceptions) |
988 convertFilterAddRules(cssExceptionRules, filter, | 1290 { |
989 "ignore-previous-rules", false); | 1291 let parsed = parseFilterRegexpSource(filter.regexpSource); |
1292 if (parsed.hostname) | |
1293 requestFilterExceptionDomains.push(parsed.hostname); | |
990 } | 1294 } |
991 | 1295 |
992 for (let filter of this.requestFilters) | 1296 for (let filter of this.requestFilters) |
993 convertFilterAddRules(blockingRules, filter, "block", true); | 1297 { |
1298 convertFilterAddRules(blockingRules, filter, "block", true, | |
1299 requestFilterExceptionDomains); | |
1300 } | |
994 | 1301 |
995 for (let filter of this.requestExceptions) | 1302 for (let filter of this.requestExceptions) |
996 { | 1303 { |
997 convertFilterAddRules(blockingExceptionRules, filter, | 1304 convertFilterAddRules(blockingExceptionRules, filter, |
998 "ignore-previous-rules", true); | 1305 "ignore-previous-rules", true); |
999 } | 1306 } |
1000 | 1307 |
1001 return async(ruleGroups.map((group, index) => () => | 1308 return async(ruleGroups, (group, index) => () => |
1002 { | 1309 { |
1003 let next = () => | 1310 let next = () => |
1004 { | 1311 { |
1005 if (index == ruleGroups.length - 1) | 1312 if (index == ruleGroups.length - 1) |
1006 return ruleGroups.reduce((all, rules) => all.concat(rules), []); | 1313 return ruleGroups.reduce((all, rules) => all.concat(rules), []); |
1007 }; | 1314 }; |
1008 | 1315 |
1009 ruleGroups[index] = ruleGroups[index].filter(rule => !hasNonASCI(rule)); | |
1010 | |
1011 if (this.options.merge == "all" || | 1316 if (this.options.merge == "all" || |
1012 (this.options.merge == "auto" && | 1317 (this.options.merge == "auto" && |
1013 ruleGroups.reduce((n, group) => n + group.length, 0) > 50000)) | 1318 ruleGroups.reduce((n, group) => n + group.length, 0) > 50000)) |
1014 { | 1319 { |
1015 return mergeRules(ruleGroups[index], this.options.merge == "all") | 1320 return mergeRules(ruleGroups[index], this.options.merge == "all") |
1016 .then(rules => | 1321 .then(rules => |
1017 { | 1322 { |
1018 ruleGroups[index] = rules; | 1323 ruleGroups[index] = rules; |
1019 return next(); | 1324 return next(); |
1020 }); | 1325 }); |
1021 } | 1326 } |
1022 | 1327 |
1023 return next(); | 1328 return next(); |
1024 })); | 1329 }); |
1025 }; | 1330 }; |
LEFT | RIGHT |