| Left: | ||
| Right: |
| LEFT | RIGHT |
|---|---|
| 1 /* | 1 /* |
| 2 * This file is part of Adblock Plus <https://adblockplus.org/>, | 2 * This file is part of Adblock Plus <https://adblockplus.org/>, |
| 3 * Copyright (C) 2006-2017 eyeo GmbH | 3 * Copyright (C) 2006-2017 eyeo GmbH |
| 4 * | 4 * |
| 5 * Adblock Plus is free software: you can redistribute it and/or modify | 5 * Adblock Plus is free software: you can redistribute it and/or modify |
| 6 * it under the terms of the GNU General Public License version 3 as | 6 * it under the terms of the GNU General Public License version 3 as |
| 7 * published by the Free Software Foundation. | 7 * published by the Free Software Foundation. |
| 8 * | 8 * |
| 9 * Adblock Plus is distributed in the hope that it will be useful, | 9 * Adblock Plus is distributed in the hope that it will be useful, |
| 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of | 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 12 * GNU General Public License for more details. | 12 * GNU General Public License for more details. |
| 13 * | 13 * |
| 14 * You should have received a copy of the GNU General Public License | 14 * You should have received a copy of the GNU General Public License |
| 15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. | 15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. |
| 16 */ | 16 */ |
| 17 | 17 |
| 18 /** @module abp2blocklist */ | 18 /** @module abp2blocklist */ |
| 19 | 19 |
| 20 "use strict"; | 20 "use strict"; |
| 21 | 21 |
| 22 let filterClasses = require("filterClasses"); | 22 let filterClasses = require("filterClasses"); |
| 23 let tldjs = require("tldjs"); | |
| 24 let punycode = require("punycode"); | 23 let punycode = require("punycode"); |
| 25 | 24 |
| 26 const selectorLimit = 5000; | 25 const selectorLimit = 5000; |
| 27 const typeMap = filterClasses.RegExpFilter.typeMap; | 26 const typeMap = filterClasses.RegExpFilter.typeMap; |
| 28 const whitelistableRequestTypes = (typeMap.IMAGE | 27 |
| 29 | typeMap.STYLESHEET | 28 const httpRequestTypes = typeMap.IMAGE | |
| 30 | typeMap.SCRIPT | 29 typeMap.STYLESHEET | |
| 31 | typeMap.FONT | 30 typeMap.SCRIPT | |
| 32 | typeMap.MEDIA | 31 typeMap.FONT | |
| 33 | typeMap.POPUP | 32 typeMap.MEDIA | |
| 34 | typeMap.OBJECT | 33 typeMap.POPUP | |
| 35 | typeMap.OBJECT_SUBREQUEST | 34 typeMap.OBJECT | |
| 36 | typeMap.XMLHTTPREQUEST | 35 typeMap.OBJECT_SUBREQUEST | |
| 37 | typeMap.PING | 36 typeMap.XMLHTTPREQUEST | |
| 38 | typeMap.SUBDOCUMENT | 37 typeMap.PING | |
| 39 | typeMap.OTHER); | 38 typeMap.SUBDOCUMENT | |
| 39 typeMap.OTHER; | |
| 40 const rawRequestTypes = typeMap.XMLHTTPREQUEST | | |
| 41 typeMap.WEBSOCKET | | |
| 42 typeMap.WEBRTC | | |
| 43 typeMap.OBJECT_SUBREQUEST | | |
| 44 typeMap.PING | | |
| 45 typeMap.OTHER; | |
| 46 const whitelistableRequestTypes = httpRequestTypes | | |
| 47 typeMap.WEBSOCKET | | |
| 48 typeMap.WEBRTC; | |
| 49 | |
| 50 function callLater(func) | |
| 51 { | |
| 52 return new Promise(resolve => | |
| 53 { | |
| 54 let call = () => resolve(func()); | |
| 55 | |
| 56 // If this looks like Node.js, call process.nextTick, otherwise call | |
| 57 // setTimeout. | |
| 58 if (typeof process != "undefined") | |
| 59 process.nextTick(call); | |
| 60 else | |
| 61 setTimeout(call, 0); | |
| 62 }); | |
| 63 } | |
| 64 | |
| 65 function async(callees, mapFunction) | |
| 66 { | |
| 67 if (!(Symbol.iterator in callees)) | |
| 68 callees = [callees]; | |
| 69 | |
| 70 let lastPause = Date.now(); | |
| 71 let index = 0; | |
| 72 | |
| 73 let promise = Promise.resolve(); | |
| 74 | |
| 75 for (let next of callees) | |
| 76 { | |
| 77 let currentIndex = index; | |
| 78 | |
| 79 promise = promise.then(() => | |
| 80 { | |
| 81 if (mapFunction) | |
| 82 next = mapFunction(next, currentIndex); | |
| 83 | |
| 84 // If it has been 100ms or longer since the last call, take a pause. This | |
| 85 // keeps the browser from freezing up. | |
| 86 let now = Date.now(); | |
| 87 if (now - lastPause >= 100) | |
| 88 { | |
| 89 lastPause = now; | |
| 90 return callLater(next); | |
| 91 } | |
| 92 | |
| 93 return next(); | |
| 94 }); | |
| 95 | |
| 96 index++; | |
| 97 } | |
| 98 | |
| 99 return promise; | |
| 100 } | |
| 40 | 101 |
| 41 function parseDomains(domains, included, excluded) | 102 function parseDomains(domains, included, excluded) |
| 42 { | 103 { |
| 43 for (let domain in domains) | 104 for (let domain in domains) |
| 44 { | 105 { |
| 45 if (domain != "") | 106 if (domain != "") |
| 46 { | 107 { |
| 47 let enabled = domains[domain]; | 108 let enabled = domains[domain]; |
| 48 domain = punycode.toASCII(domain.toLowerCase()); | 109 domain = punycode.toASCII(domain.toLowerCase()); |
| 49 | 110 |
| 50 if (!enabled) | 111 if (!enabled) |
| 51 excluded.push(domain); | 112 excluded.push(domain); |
| 52 else if (!domains[""]) | 113 else if (!domains[""]) |
| 53 included.push(domain); | 114 included.push(domain); |
| 54 } | 115 } |
| 55 } | 116 } |
| 56 } | 117 } |
| 57 | 118 |
| 58 function escapeRegExp(s) | 119 function escapeRegExp(s) |
| 59 { | 120 { |
| 60 return s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); | 121 return s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); |
| 61 } | 122 } |
| 62 | 123 |
| 63 function matchDomain(domain) | 124 function matchDomain(domain) |
| 64 { | 125 { |
| 126 if (!domain) | |
| 127 return "^https?://"; | |
| 128 | |
| 65 return "^https?://([^/:]*\\.)?" + escapeRegExp(domain).toLowerCase() + "[/:]"; | 129 return "^https?://([^/:]*\\.)?" + escapeRegExp(domain).toLowerCase() + "[/:]"; |
| 130 } | |
| 131 | |
| 132 function getURLSchemes(contentType) | |
| 133 { | |
| 134 // If the given content type includes all supported URL schemes, simply | |
| 135 // return a single generic URL scheme pattern. This minimizes the size of the | |
| 136 // generated rule set. The downside to this is that it will also match | |
| 137 // schemes that we do not want to match (e.g. "ftp://"), but this can be | |
| 138 // mitigated by adding exceptions for those schemes. | |
| 139 if (contentType & typeMap.WEBSOCKET && contentType & typeMap.WEBRTC && | |
| 140 contentType & httpRequestTypes) | |
| 141 return ["[^:]+:(//)?"]; | |
| 142 | |
| 143 let urlSchemes = []; | |
| 144 | |
| 145 if (contentType & typeMap.WEBSOCKET) | |
| 146 urlSchemes.push("wss?://"); | |
| 147 | |
| 148 if (contentType & typeMap.WEBRTC) | |
| 149 urlSchemes.push("stuns?:", "turns?:"); | |
| 150 | |
| 151 if (contentType & httpRequestTypes) | |
| 152 urlSchemes.push("https?://"); | |
| 153 | |
| 154 return urlSchemes; | |
| 155 } | |
| 156 | |
| 157 function findSubdomainsInList(domain, list) | |
| 158 { | |
| 159 let subdomains = []; | |
| 160 let suffixLength = domain.length + 1; | |
| 161 | |
| 162 for (let name of list) | |
| 163 { | |
| 164 if (name.length > suffixLength && name.slice(-suffixLength) == "." + domain) | |
| 165 subdomains.push(name.slice(0, -suffixLength)); | |
| 166 } | |
| 167 | |
| 168 return subdomains; | |
| 169 } | |
| 170 | |
| 171 function extractFilterDomains(filters) | |
| 172 { | |
| 173 let domains = new Set(); | |
| 174 for (let filter of filters) | |
| 175 { | |
| 176 let parsed = parseFilterRegexpSource(filter.regexpSource); | |
| 177 if (parsed.justHostname) | |
| 178 domains.add(parsed.hostname); | |
| 179 } | |
| 180 return domains; | |
| 66 } | 181 } |
| 67 | 182 |
| 68 function convertElemHideFilter(filter, elemhideSelectorExceptions) | 183 function convertElemHideFilter(filter, elemhideSelectorExceptions) |
| 69 { | 184 { |
| 70 let included = []; | 185 let included = []; |
| 71 let excluded = []; | 186 let excluded = []; |
| 72 let rules = []; | |
| 73 | 187 |
| 74 parseDomains(filter.domains, included, excluded); | 188 parseDomains(filter.domains, included, excluded); |
| 75 | 189 |
| 76 if (excluded.length == 0 && !(filter.selector in elemhideSelectorExceptions)) | 190 if (excluded.length == 0 && !(filter.selector in elemhideSelectorExceptions)) |
| 77 return {matchDomains: included.map(matchDomain), selector: filter.selector}; | 191 return {matchDomains: included, selector: filter.selector}; |
| 78 } | 192 } |
| 79 | 193 |
| 80 /** | 194 /** |
| 81 * Parse the given filter "regexpSource" string. Producing a regular expression, | 195 * Parse the given filter "regexpSource" string. Producing a regular expression, |
| 82 * extracting the hostname (if any), deciding if the regular expression is safe | 196 * extracting the hostname (if any), deciding if the regular expression is safe |
| 83 * to be converted + matched as lower case and noting if the source contains | 197 * to be converted + matched as lower case and noting if the source contains |
| 84 * anything after the hostname.) | 198 * anything after the hostname.) |
| 85 * | 199 * |
| 86 * @param {string} text regexpSource property of a filter | 200 * @param {string} text regexpSource property of a filter |
| 201 * @param {string} urlScheme The URL scheme to use in the regular expression | |
| 87 * @returns {object} An object containing a regular expression string, a bool | 202 * @returns {object} An object containing a regular expression string, a bool |
| 88 * indicating if the filter can be safely matched as lower | 203 * indicating if the filter can be safely matched as lower |
| 89 * case, a hostname string (or undefined) and a bool | 204 * case, a hostname string (or undefined) and a bool |
| 90 * indicating if the source only contains a hostname or not: | 205 * indicating if the source only contains a hostname or not: |
| 91 * {regexp: "...", | 206 * {regexp: "...", |
| 92 * canSafelyMatchAsLowercase: true/false, | 207 * canSafelyMatchAsLowercase: true/false, |
| 93 * hostname: "...", | 208 * hostname: "...", |
| 94 * justHostname: true/false} | 209 * justHostname: true/false} |
| 95 */ | 210 */ |
| 96 function parseFilterRegexpSource(text) | 211 function parseFilterRegexpSource(text, urlScheme) |
| 97 { | 212 { |
| 98 let regexp = []; | 213 let regexp = []; |
| 99 let lastIndex = text.length - 1; | 214 |
| 215 // Convert the text into an array of Unicode characters. | |
| 216 // | |
| 217 // In the case of surrogate pairs (the smiley emoji, for example), one | |
| 218 // Unicode code point is represented by two JavaScript characters together. | |
| 219 // We want to iterate over Unicode code points rather than JavaScript | |
| 220 // characters. | |
| 221 let characters = Array.from(text); | |
| 222 | |
| 223 let lastIndex = characters.length - 1; | |
| 100 let hostname; | 224 let hostname; |
| 101 let hostnameStart = null; | 225 let hostnameStart = null; |
| 102 let hostnameFinished = false; | 226 let hostnameFinished = false; |
| 103 let justHostname = false; | 227 let justHostname = false; |
| 104 let canSafelyMatchAsLowercase = false; | 228 let canSafelyMatchAsLowercase = false; |
| 105 | 229 |
| 106 for (let i = 0; i < text.length; i++) | 230 if (!urlScheme) |
| 107 { | 231 urlScheme = getURLSchemes()[0]; |
| 108 let c = text[i]; | 232 |
| 233 for (let i = 0; i < characters.length; i++) | |
| 234 { | |
| 235 let c = characters[i]; | |
| 109 | 236 |
| 110 if (hostnameFinished) | 237 if (hostnameFinished) |
| 111 justHostname = false; | 238 justHostname = false; |
| 112 | 239 |
| 113 // If we're currently inside the hostname we have to be careful not to | 240 // If we're currently inside the hostname we have to be careful not to |
| 114 // escape any characters until after we have converted it to punycode. | 241 // escape any characters until after we have converted it to punycode. |
| 115 if (hostnameStart != null && !hostnameFinished) | 242 if (hostnameStart != null && !hostnameFinished) |
| 116 { | 243 { |
| 117 let endingChar = (c == "*" || c == "^" || | 244 let endingChar = (c == "*" || c == "^" || |
| 118 c == "?" || c == "/" || c == "|"); | 245 c == "?" || c == "/" || c == "|"); |
| 119 if (!endingChar && i != lastIndex) | 246 if (!endingChar && i != lastIndex) |
| 120 continue; | 247 continue; |
| 121 | 248 |
| 122 hostname = punycode.toASCII( | 249 hostname = punycode.toASCII( |
| 123 text.substring(hostnameStart, endingChar ? i : i + 1) | 250 characters.slice(hostnameStart, endingChar ? i : i + 1).join("") |
| 251 .toLowerCase() | |
| 124 ); | 252 ); |
| 125 hostnameFinished = justHostname = true; | 253 hostnameFinished = justHostname = true; |
| 126 regexp.push(escapeRegExp(hostname)); | 254 regexp.push(escapeRegExp(hostname)); |
| 127 if (!endingChar) | 255 if (!endingChar) |
| 128 break; | 256 break; |
| 129 } | 257 } |
| 130 | 258 |
| 131 switch (c) | 259 switch (c) |
| 132 { | 260 { |
| 133 case "*": | 261 case "*": |
| 134 if (regexp.length > 0 && i < lastIndex && text[i + 1] != "*") | 262 if (regexp.length > 0 && i < lastIndex && characters[i + 1] != "*") |
| 135 regexp.push(".*"); | 263 regexp.push(".*"); |
| 136 break; | 264 break; |
| 137 case "^": | 265 case "^": |
| 138 if (i < lastIndex) | 266 let alphabet = "a-z"; |
| 139 regexp.push("."); | 267 // If justHostname is true and we've encountered a "^", it means we're |
| 268 // still in the hostname part of the URL. Since hostnames are always | |
| 269 // lower case (Punycode), there's no need to include "A-Z" in the | |
| 270 // pattern. Further, subsequent code may lower-case the entire regular | |
| 271 // expression (if the URL contains only the hostname part), leaving us | |
| 272 // with "a-za-z", which would be redundant. | |
| 273 if (!justHostname) | |
| 274 alphabet = "A-Z" + alphabet; | |
| 275 let digits = "0-9"; | |
| 276 // Note that the "-" must appear first here in order to retain its | |
| 277 // literal meaning within the brackets. | |
| 278 let specialCharacters = "-_.%"; | |
| 279 let separator = "[^" + specialCharacters + alphabet + digits + "]"; | |
| 280 if (i == 0) | |
| 281 regexp.push("^" + urlScheme + "(.*" + separator + ")?"); | |
| 282 else if (i == lastIndex) | |
| 283 regexp.push("(" + separator + ".*)?$"); | |
| 284 else | |
| 285 regexp.push(separator); | |
| 140 break; | 286 break; |
| 141 case "|": | 287 case "|": |
| 142 if (i == 0) | 288 if (i == 0) |
| 143 { | 289 { |
| 144 regexp.push("^"); | 290 regexp.push("^"); |
| 145 break; | 291 break; |
| 146 } | 292 } |
| 147 if (i == lastIndex) | 293 if (i == lastIndex) |
| 148 { | 294 { |
| 149 regexp.push("$"); | 295 regexp.push("$"); |
| 150 break; | 296 break; |
| 151 } | 297 } |
| 152 if (i == 1 && text[0] == "|") | 298 if (i == 1 && characters[0] == "|") |
| 153 { | 299 { |
| 154 hostnameStart = i + 1; | 300 hostnameStart = i + 1; |
| 155 canSafelyMatchAsLowercase = true; | 301 canSafelyMatchAsLowercase = true; |
| 156 regexp.push("https?://([^/]+\\.)?"); | 302 regexp.push(urlScheme + "([^/]+\\.)?"); |
| 157 break; | 303 break; |
| 158 } | 304 } |
| 159 regexp.push("\\|"); | 305 regexp.push("\\|"); |
| 160 break; | 306 break; |
| 161 case "/": | 307 case "/": |
| 162 if (!hostnameFinished && | 308 if (!hostnameFinished && |
| 163 text.charAt(i-2) == ":" && text.charAt(i-1) == "/") | 309 characters[i - 2] == ":" && characters[i - 1] == "/") |
| 164 { | 310 { |
| 165 hostnameStart = i + 1; | 311 hostnameStart = i + 1; |
| 166 canSafelyMatchAsLowercase = true; | 312 canSafelyMatchAsLowercase = true; |
| 167 } | 313 } |
| 168 regexp.push("/"); | 314 regexp.push("/"); |
| 169 break; | 315 break; |
| 170 case ".": case "+": case "$": case "?": | 316 case ".": case "+": case "$": case "?": |
| 171 case "{": case "}": case "(": case ")": | 317 case "{": case "}": case "(": case ")": |
| 172 case "[": case "]": case "\\": | 318 case "[": case "]": case "\\": |
| 173 regexp.push("\\", c); | 319 regexp.push("\\", c); |
| 174 break; | 320 break; |
| 175 default: | 321 default: |
| 176 if (hostnameFinished && (c >= "a" && c <= "z" || | 322 if (hostnameFinished && (c >= "a" && c <= "z" || |
| 177 c >= "A" && c <= "Z")) | 323 c >= "A" && c <= "Z")) |
| 178 canSafelyMatchAsLowercase = false; | 324 canSafelyMatchAsLowercase = false; |
| 179 regexp.push(c); | 325 regexp.push(c == "%" ? c : encodeURI(c)); |
| 180 } | 326 } |
| 181 } | 327 } |
| 182 | 328 |
| 183 return { | 329 return { |
| 184 regexp: regexp.join(""), | 330 regexp: regexp.join(""), |
| 185 canSafelyMatchAsLowercase: canSafelyMatchAsLowercase, | 331 canSafelyMatchAsLowercase: canSafelyMatchAsLowercase, |
| 186 hostname: hostname, | 332 hostname: hostname, |
| 187 justHostname: justHostname | 333 justHostname: justHostname |
| 188 }; | 334 }; |
| 189 } | 335 } |
| 190 | 336 |
| 191 function getResourceTypes(filter) | 337 function getResourceTypes(contentType) |
| 192 { | 338 { |
| 193 let types = []; | 339 let types = []; |
| 194 | 340 |
| 195 if (filter.contentType & typeMap.IMAGE) | 341 if (contentType & typeMap.IMAGE) |
| 196 types.push("image"); | 342 types.push("image"); |
| 197 if (filter.contentType & typeMap.STYLESHEET) | 343 if (contentType & typeMap.STYLESHEET) |
| 198 types.push("style-sheet"); | 344 types.push("style-sheet"); |
| 199 if (filter.contentType & typeMap.SCRIPT) | 345 if (contentType & typeMap.SCRIPT) |
| 200 types.push("script"); | 346 types.push("script"); |
| 201 if (filter.contentType & typeMap.FONT) | 347 if (contentType & typeMap.FONT) |
| 202 types.push("font"); | 348 types.push("font"); |
| 203 if (filter.contentType & (typeMap.MEDIA | typeMap.OBJECT)) | 349 if (contentType & (typeMap.MEDIA | typeMap.OBJECT)) |
| 204 types.push("media"); | 350 types.push("media"); |
| 205 if (filter.contentType & typeMap.POPUP) | 351 if (contentType & typeMap.POPUP) |
| 206 types.push("popup"); | 352 types.push("popup"); |
| 207 if (filter.contentType & (typeMap.XMLHTTPREQUEST | | 353 if (contentType & rawRequestTypes) |
| 208 typeMap.OBJECT_SUBREQUEST | | |
| 209 typeMap.PING | | |
| 210 typeMap.OTHER)) | |
| 211 types.push("raw"); | 354 types.push("raw"); |
| 212 if (filter.contentType & typeMap.SUBDOCUMENT) | 355 if (contentType & typeMap.SUBDOCUMENT) |
| 213 types.push("document"); | 356 types.push("document"); |
| 214 | 357 |
| 215 return types; | 358 return types; |
| 216 } | 359 } |
| 217 | 360 |
| 218 function addDomainPrefix(domains) | 361 function makeRuleCopies(trigger, action, urlSchemes) |
| 219 { | 362 { |
| 220 let result = []; | 363 let copies = []; |
| 221 | 364 |
| 222 for (let domain of domains) | 365 // Always make a deep copy of the rule, since rules may have to be |
| 223 { | 366 // manipulated individually at a later stage. |
| 224 result.push(domain); | 367 let stringifiedTrigger = JSON.stringify(trigger); |
| 225 | 368 |
| 226 if (tldjs.getDomain(domain) == domain) | 369 let filterPattern = trigger["url-filter"].substring(1); |
| 227 result.push("www." + domain); | 370 let startIndex = 0; |
| 228 } | 371 |
| 229 | 372 // If the URL filter already begins with the first URL scheme pattern, skip |
| 230 return result; | 373 // it. |
| 231 } | 374 if (trigger["url-filter"].startsWith("^" + urlSchemes[0])) |
| 232 | 375 { |
| 233 function convertFilterAddRules(rules, filter, action, withResourceTypes) | 376 filterPattern = filterPattern.substring(urlSchemes[0].length); |
| 234 { | 377 startIndex = 1; |
| 235 let parsed = parseFilterRegexpSource(filter.regexpSource); | 378 } |
| 379 else | |
| 380 { | |
| 381 filterPattern = ".*" + filterPattern; | |
| 382 } | |
| 383 | |
| 384 for (let i = startIndex; i < urlSchemes.length; i++) | |
| 385 { | |
| 386 let copyTrigger = Object.assign(JSON.parse(stringifiedTrigger), { | |
| 387 "url-filter": "^" + urlSchemes[i] + filterPattern | |
| 388 }); | |
| 389 copies.push({trigger: copyTrigger, action}); | |
| 390 } | |
| 391 | |
| 392 return copies; | |
| 393 } | |
| 394 | |
| 395 function excludeTopURLFromTrigger(trigger) | |
| 396 { | |
| 397 trigger["unless-top-url"] = [trigger["url-filter"]]; | |
| 398 if (trigger["url-filter-is-case-sensitive"]) | |
| 399 trigger["top-url-filter-is-case-sensitive"] = true; | |
| 400 } | |
| 401 | |
| 402 function convertFilterAddRules(rules, filter, action, withResourceTypes, | |
| 403 exceptionDomains, contentType) | |
| 404 { | |
| 405 if (!contentType) | |
| 406 contentType = filter.contentType; | |
| 407 | |
| 408 // If WebSocket or WebRTC are given along with other options but not | |
| 409 // including all three of WebSocket, WebRTC, and at least one HTTP raw type, | |
| 410 // we must generate multiple rules. For example, for the filter | |
| 411 // "foo$websocket,image", we must generate one rule with "^wss?://" and "raw" | |
| 412 // and another rule with "^https?://" and "image". If we merge the two, we | |
| 413 // end up blocking requests of all HTTP raw types (e.g. XMLHttpRequest) | |
| 414 // inadvertently. | |
| 415 if ((contentType & typeMap.WEBSOCKET && contentType != typeMap.WEBSOCKET && | |
| 416 !(contentType & typeMap.WEBRTC && | |
| 417 contentType & rawRequestTypes & httpRequestTypes)) || | |
| 418 (contentType & typeMap.WEBRTC && contentType != typeMap.WEBRTC && | |
| 419 !(contentType & typeMap.WEBSOCKET && | |
| 420 contentType & rawRequestTypes & httpRequestTypes))) | |
| 421 { | |
| 422 if (contentType & typeMap.WEBSOCKET) | |
| 423 { | |
| 424 convertFilterAddRules(rules, filter, action, withResourceTypes, | |
| 425 exceptionDomains, typeMap.WEBSOCKET); | |
| 426 } | |
| 427 | |
| 428 if (contentType & typeMap.WEBRTC) | |
| 429 { | |
| 430 convertFilterAddRules(rules, filter, action, withResourceTypes, | |
| 431 exceptionDomains, typeMap.WEBRTC); | |
| 432 } | |
| 433 | |
| 434 contentType &= ~(typeMap.WEBSOCKET | typeMap.WEBRTC); | |
| 435 | |
| 436 if (!contentType) | |
| 437 return; | |
| 438 } | |
| 439 | |
| 440 let urlSchemes = getURLSchemes(contentType); | |
| 441 let parsed = parseFilterRegexpSource(filter.regexpSource, urlSchemes[0]); | |
| 236 | 442 |
| 237 // For the special case of $document whitelisting filters with just a domain | 443 // For the special case of $document whitelisting filters with just a domain |
| 238 // we can generate an equivalent blocking rule exception using if-domain. | 444 // we can generate an equivalent blocking rule exception using if-domain. |
| 239 if (filter instanceof filterClasses.WhitelistFilter && | 445 if (filter instanceof filterClasses.WhitelistFilter && |
| 240 filter.contentType & typeMap.DOCUMENT && | 446 contentType & typeMap.DOCUMENT && |
| 241 parsed.justHostname) | 447 parsed.justHostname) |
| 242 { | 448 { |
| 243 rules.push({ | 449 rules.push({ |
| 244 trigger: { | 450 trigger: { |
| 245 "url-filter": ".*", | 451 "url-filter": ".*", |
| 246 "if-domain": addDomainPrefix([parsed.hostname]) | 452 "if-domain": ["*" + parsed.hostname] |
| 247 }, | 453 }, |
| 248 action: {type: "ignore-previous-rules"} | 454 action: {type: "ignore-previous-rules"} |
| 249 }); | 455 }); |
| 250 // If the filter contains other supported options we'll need to generate | 456 // If the filter contains other supported options we'll need to generate |
| 251 // further rules for it, but if not we can simply return now. | 457 // further rules for it, but if not we can simply return now. |
| 252 if (!(filter.contentType & whitelistableRequestTypes)) | 458 if (!(contentType & whitelistableRequestTypes)) |
| 253 return; | 459 return; |
| 254 } | 460 } |
| 255 | 461 |
| 256 let trigger = {"url-filter": parsed.regexp}; | 462 let trigger = {"url-filter": parsed.regexp}; |
| 257 | 463 |
| 258 // Limit rules to HTTP(S) URLs | 464 // If the URL filter begins with one of the URL schemes for this content |
| 259 if (!/^(\^|http)/i.test(trigger["url-filter"])) | 465 // type, we generate additional rules for all the URL scheme patterns; |
| 260 trigger["url-filter"] = "^https?://.*" + trigger["url-filter"]; | 466 // otherwise, if the start of the URL filter literally matches the first URL |
| 467 // scheme pattern, we just generate additional rules for the remaining URL | |
| 468 // scheme patterns. | |
| 469 // | |
| 470 // For example, "stun:foo$webrtc" will give us "stun:foo", then we add a "^" | |
| 471 // in front of this and generate two additional rules for | |
| 472 // "^stuns?:.*stun:foo" and "^turns?:.*stun:foo". On the other hand, | |
| 473 // "||foo$webrtc" will give us "^stuns?:([^/]+\\.)?foo", so we just generate | |
| 474 // "^turns?:([^/]+\\.)?foo" in addition. | |
| 475 // | |
| 476 // Note that the filter can be already anchored to the beginning | |
| 477 // (e.g. "|stun:foo$webrtc"), in which case we do not generate any additional | |
| 478 // rules. | |
| 479 let needAltRules = trigger["url-filter"][0] != "^" || | |
| 480 trigger["url-filter"].startsWith("^" + urlSchemes[0]); | |
| 481 | |
| 482 if (trigger["url-filter"][0] != "^") | |
| 483 { | |
| 484 if (!urlSchemes.some(scheme => new RegExp("^" + scheme) | |
| 485 .test(trigger["url-filter"]))) | |
| 486 { | |
| 487 trigger["url-filter"] = urlSchemes[0] + ".*" + trigger["url-filter"]; | |
| 488 } | |
| 489 | |
| 490 trigger["url-filter"] = "^" + trigger["url-filter"]; | |
| 491 } | |
| 261 | 492 |
| 262 // For rules containing only a hostname we know that we're matching against | 493 // For rules containing only a hostname we know that we're matching against |
| 263 // a lowercase string unless the matchCase option was passed. | 494 // a lowercase string unless the matchCase option was passed. |
| 264 if (parsed.canSafelyMatchAsLowercase && !filter.matchCase) | 495 if (parsed.canSafelyMatchAsLowercase && !filter.matchCase) |
| 265 trigger["url-filter"] = trigger["url-filter"].toLowerCase(); | 496 trigger["url-filter"] = trigger["url-filter"].toLowerCase(); |
| 266 | 497 |
| 267 if (parsed.canSafelyMatchAsLowercase || filter.matchCase) | 498 if (parsed.canSafelyMatchAsLowercase || filter.matchCase) |
| 268 trigger["url-filter-is-case-sensitive"] = true; | 499 trigger["url-filter-is-case-sensitive"] = true; |
| 269 | 500 |
| 270 let included = []; | 501 let included = []; |
| 271 let excluded = []; | 502 let excluded = []; |
| 272 | 503 |
| 273 parseDomains(filter.domains, included, excluded); | 504 parseDomains(filter.domains, included, excluded); |
| 274 | 505 |
| 506 if (exceptionDomains) | |
| 507 excluded = excluded.concat(exceptionDomains); | |
| 508 | |
| 275 if (withResourceTypes) | 509 if (withResourceTypes) |
| 276 { | 510 { |
| 277 trigger["resource-type"] = getResourceTypes(filter); | 511 let resourceTypes = getResourceTypes(contentType); |
| 278 | 512 |
| 279 if (trigger["resource-type"].length == 0) | 513 // Content blocker rules can't differentiate between sub-document requests |
| 514 // (iframes) and top-level document requests. To avoid too many false | |
| 515 // positives, we prevent rules with no hostname part from blocking document | |
| 516 // requests. | |
| 517 // | |
| 518 // Once Safari 11 becomes our minimum supported version, we could change | |
| 519 // our approach here to use the new "unless-top-url" property instead. | |
| 520 if (filter instanceof filterClasses.BlockingFilter && !parsed.hostname) | |
| 521 resourceTypes = resourceTypes.filter(type => type != "document"); | |
| 522 | |
| 523 if (resourceTypes.length == 0) | |
| 280 return; | 524 return; |
| 525 | |
| 526 trigger["resource-type"] = resourceTypes; | |
| 281 } | 527 } |
| 282 | 528 |
| 283 if (filter.thirdParty != null) | 529 if (filter.thirdParty != null) |
| 284 trigger["load-type"] = [filter.thirdParty ? "third-party" : "first-party"]; | 530 trigger["load-type"] = [filter.thirdParty ? "third-party" : "first-party"]; |
| 285 | 531 |
| 532 let addTopLevelException = false; | |
| 533 | |
| 286 if (included.length > 0) | 534 if (included.length > 0) |
| 287 trigger["if-domain"] = addDomainPrefix(included); | 535 { |
| 536 trigger["if-domain"] = []; | |
| 537 | |
| 538 for (let name of included) | |
| 539 { | |
| 540 // If this is a blocking filter or an element hiding filter, add the | |
| 541 // subdomain wildcard only if no subdomains have been excluded. | |
| 542 let notSubdomains = null; | |
| 543 if ((filter instanceof filterClasses.BlockingFilter || | |
| 544 filter instanceof filterClasses.ElemHideFilter) && | |
| 545 (notSubdomains = findSubdomainsInList(name, excluded)).length > 0) | |
| 546 { | |
| 547 trigger["if-domain"].push(name); | |
| 548 | |
| 549 // Add the "www" prefix but only if it hasn't been excluded. | |
| 550 if (!notSubdomains.includes("www")) | |
| 551 trigger["if-domain"].push("www." + name); | |
| 552 } | |
| 553 else | |
| 554 { | |
| 555 trigger["if-domain"].push("*" + name); | |
| 556 } | |
| 557 } | |
| 558 } | |
| 288 else if (excluded.length > 0) | 559 else if (excluded.length > 0) |
| 289 trigger["unless-domain"] = addDomainPrefix(excluded); | 560 { |
| 561 trigger["unless-domain"] = excluded.map(name => "*" + name); | |
| 562 } | |
| 563 else if (filter instanceof filterClasses.BlockingFilter && | |
| 564 filter.contentType & typeMap.SUBDOCUMENT && parsed.hostname) | |
| 565 { | |
| 566 // Rules with a hostname part are still allowed to block document requests, | |
| 567 // but we add an exception for top-level documents. | |
| 568 // | |
| 569 // Note that we can only do this if there's no "unless-domain" property for | |
| 570 // now. This also only works in Safari 11 onwards, while older versions | |
| 571 // simply ignore this property. Once Safari 11 becomes our minimum | |
| 572 // supported version, we can merge "unless-domain" into "unless-top-url". | |
| 573 addTopLevelException = true; | |
| 574 excludeTopURLFromTrigger(trigger); | |
| 575 } | |
| 290 | 576 |
| 291 rules.push({trigger: trigger, action: {type: action}}); | 577 rules.push({trigger: trigger, action: {type: action}}); |
| 292 } | 578 |
| 293 | 579 if (needAltRules) |
| 294 function hasNonASCI(obj) | 580 { |
| 295 { | 581 // Generate additional rules for any alternative URL schemes. |
| 296 if (typeof obj == "string") | 582 for (let altRule of makeRuleCopies(trigger, {type: action}, urlSchemes)) |
| 297 { | 583 { |
| 298 if (/[^\x00-\x7F]/.test(obj)) | 584 if (addTopLevelException) |
| 299 return true; | 585 excludeTopURLFromTrigger(altRule.trigger); |
| 300 } | 586 |
| 301 | 587 rules.push(altRule); |
| 302 if (typeof obj == "object") | 588 } |
| 303 { | 589 } |
| 304 if (obj instanceof Array) | |
| 305 for (let item of obj) | |
| 306 if (hasNonASCI(item)) | |
| 307 return true; | |
| 308 | |
| 309 let names = Object.getOwnPropertyNames(obj); | |
| 310 for (let name of names) | |
| 311 if (hasNonASCI(obj[name])) | |
| 312 return true; | |
| 313 } | |
| 314 | |
| 315 return false; | |
| 316 } | 590 } |
| 317 | 591 |
| 318 function convertIDSelectorsToAttributeSelectors(selector) | 592 function convertIDSelectorsToAttributeSelectors(selector) |
| 319 { | 593 { |
| 320 // First we figure out where all the IDs are | 594 // First we figure out where all the IDs are |
| 321 let sep = ""; | 595 let sep = ""; |
| 322 let start = null; | 596 let start = null; |
| 323 let positions = []; | 597 let positions = []; |
| 324 for (let i = 0; i < selector.length; i++) | 598 for (let i = 0; i < selector.length; i++) |
| 325 { | 599 { |
| (...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 359 { | 633 { |
| 360 newSelector.push(selector.substring(i, pos.start)); | 634 newSelector.push(selector.substring(i, pos.start)); |
| 361 newSelector.push('[id=', selector.substring(pos.start + 1, pos.end), ']'); | 635 newSelector.push('[id=', selector.substring(pos.start + 1, pos.end), ']'); |
| 362 i = pos.end; | 636 i = pos.end; |
| 363 } | 637 } |
| 364 newSelector.push(selector.substring(i)); | 638 newSelector.push(selector.substring(i)); |
| 365 | 639 |
| 366 return newSelector.join(""); | 640 return newSelector.join(""); |
| 367 } | 641 } |
| 368 | 642 |
| 369 function closeMatch(s, t, {multi = false} = {}) | 643 function addCSSRules(rules, selectors, domain, exceptionDomains) |
|
kzar
2017/05/03 11:17:24
I've not seen this syntax before `{multi = false}
Manish Jethani
2017/05/03 14:41:54
This:
function func(param1, param2, {option1 =
kzar
2017/05/03 15:19:04
Acknowledged.
| |
| 370 { | 644 { |
| 371 // This function returns an edit operation (one of "substitute", "delete", | 645 let unlessDomain = exceptionDomains.size > 0 ? [] : null; |
| 372 // and "insert") along with an index in the source string where the edit | 646 |
| 373 // should occur in order to arrive at the target string. | 647 exceptionDomains.forEach(name => |
| 374 | 648 { |
| 649 // For domain-specific filters, include the exception domains only if | |
| 650 // they're subdomains of the given domain. | |
| 651 if (!domain || name.substr(-domain.length - 1) == "." + domain) | |
| 652 unlessDomain.push("*" + name); | |
| 653 }); | |
| 654 | |
| 655 while (selectors.length) | |
| 656 { | |
| 657 let selector = selectors.splice(0, selectorLimit).join(", "); | |
| 658 | |
| 659 // As of Safari 9.0 element IDs are matched as lowercase. We work around | |
| 660 // this by converting to the attribute format [id="elementID"] | |
| 661 selector = convertIDSelectorsToAttributeSelectors(selector); | |
| 662 | |
| 663 let rule = { | |
| 664 trigger: {"url-filter": matchDomain(domain), | |
| 665 "url-filter-is-case-sensitive": true}, | |
| 666 action: {type: "css-display-none", | |
| 667 selector: selector} | |
| 668 }; | |
| 669 | |
| 670 if (unlessDomain) | |
| 671 rule.trigger["unless-domain"] = unlessDomain; | |
| 672 | |
| 673 rules.push(rule); | |
| 674 } | |
| 675 } | |
| 676 | |
| 677 /** | |
| 678 * Check if two strings are a close match | |
| 679 * | |
| 680 * This function returns an edit operation, one of "substitute", "delete", and | |
| 681 * "insert", along with an index in the source string where the edit must occur | |
| 682 * in order to arrive at the target string. If the strings are not a close | |
| 683 * match, it returns null. | |
| 684 * | |
| 685 * Two strings are considered to be a close match if they are one edit | |
| 686 * operation apart. | |
| 687 * | |
| 688 * Deletions or insertions of a contiguous range of characters from one string | |
| 689 * into the other, at the same index, are treated as a single edit. For | |
| 690 * example, "internal" and "international" are considered to be one edit apart | |
| 691 * and therefore a close match. | |
| 692 * | |
| 693 * A few things to note: | |
| 694 * | |
| 695 * 1) This function does not care about the format of the input strings. For | |
| 696 * example, the caller may pass in regular expressions, where "[ab]" and | |
| 697 * "[bc]" could be considered to be a close match, since the order within the | |
| 698 * brackets doesn't matter. This function will still return null for this set | |
| 699 * of inputs since they are two edits apart. | |
| 700 * | |
| 701 * 2) To be friendly to calling code that might be passing in regular | |
| 702 * expressions, this function will simply return null if it encounters a | |
| 703 * special character (e.g. "\", "?", "+", etc.) in the delta. For example, | |
| 704 * given "Hello" and "Hello, how are you?", it will return null. | |
| 705 * | |
| 706 * 3) If the caller does indeed pass in regular expressions, it must make the | |
| 707 * important assumption that the parts where two such regular expressions may | |
| 708 * differ can always be treated as normal strings. For example, | |
| 709 * "^https?://example.com/ads" and "^https?://example.com/adv" differ only in | |
| 710 * the last character, therefore the regular expressions can safely be merged | |
| 711 * into "^https?://example.com/ad[sv]". | |
| 712 * | |
| 713 * @param {string} s The source string | |
| 714 * @param {string} t The target string | |
| 715 * | |
| 716 * @returns {object} An object describing the single edit operation that must | |
| 717 * occur in the source string in order to arrive at the | |
| 718 * target string | |
| 719 */ | |
| 720 function closeMatch(s, t) | |
| 721 { | |
| 375 let diff = s.length - t.length; | 722 let diff = s.length - t.length; |
| 376 | |
| 377 // If the string lenghts differ by more than one character, we cannot arrive | |
|
kzar
2017/05/03 11:17:24
Nit: Typo "lenghts".
Manish Jethani
2017/05/04 02:49:32
Done.
| |
| 378 // at target from source in a single edit operation. | |
| 379 if (!multi && (diff < -1 || diff > 1)) | |
| 380 return null; | |
| 381 | 723 |
| 382 // If target is longer than source, swap them for the purpose of our | 724 // If target is longer than source, swap them for the purpose of our |
| 383 // calculation. | 725 // calculation. |
| 384 if (diff < 0) | 726 if (diff < 0) |
| 385 { | 727 { |
| 386 let tmp = s; | 728 let tmp = s; |
| 387 s = t; | 729 s = t; |
| 388 t = tmp; | 730 t = tmp; |
| 389 } | 731 } |
| 390 | 732 |
| 391 let edit = null; | 733 let edit = null; |
| 392 let multiEdit = false; | 734 |
| 393 | 735 let i = 0; |
| 394 let j = 0; | 736 let j = 0; |
| 395 | 737 |
| 396 for (let i = 0; i < s.length; i++) | 738 // Start from the beginning and keep going until we hit a character that |
| 397 { | 739 // doesn't match. |
| 398 if (s[i] == t[j]) | 740 for (; i < s.length; i++) |
| 399 { | 741 { |
| 400 j++; | 742 if (s[i] != t[i]) |
| 401 | 743 break; |
| 402 if (edit && multiEdit && !edit.closeIndex) | 744 } |
| 403 edit.closeIndex = i; | 745 |
| 404 } | 746 // Now do exactly the same from the end, but also stop if we reach the |
| 405 else if (edit && (!multi || diff == 0 || edit.closeIndex)) | 747 // position where we terminated the previous loop. |
| 406 { | 748 for (; j < t.length; j++) |
| 407 // Since we want one and only one edit operation, we must bail here. | 749 { |
| 750 if (t.length - j == i || s[s.length - j - 1] != t[t.length - j - 1]) | |
| 751 break; | |
| 752 } | |
| 753 | |
| 754 if (diff == 0) | |
| 755 { | |
| 756 // If the strings are equal in length and the delta isn't exactly one | |
| 757 // character, it's not a close match. | |
| 758 if (t.length - j - i != 1) | |
| 408 return null; | 759 return null; |
| 409 } | 760 } |
| 410 else if ((s[i] == "." || s[i] == "+" || s[i] == "$" || s[i] == "?" || | 761 else if (i != t.length - j) |
| 411 s[i] == "{" || s[i] == "}" || s[i] == "(" || s[i] == ")" || | 762 { |
| 412 s[i] == "[" || s[i] == "]" || s[i] == "\\") || | 763 // For strings of unequal length, if we haven't found a match for every |
| 413 (t[j] == "." || t[j] == "+" || t[j] == "$" || t[j] == "?" || | 764 // single character in the shorter string counting from both the beginning |
| 414 t[j] == "{" || t[j] == "}" || t[j] == "(" || t[j] == ")" || | 765 // and the end, it's not a close match. |
| 415 t[j] == "[" || t[j] == "]" || t[j] == "\\")) | 766 return null; |
| 416 { | 767 } |
| 417 // We don't deal with special characters for now. | 768 |
|
kzar
2017/05/03 11:17:24
So we skip special characters in the url-filter re
Manish Jethani
2017/05/03 14:41:54
The above is not a good example because this is no
kzar
2017/05/03 15:19:04
Maybe add a comment explaining that assumption?
Manish Jethani
2017/05/04 02:49:32
Added a comment to explain this.
| |
| 769 for (let k = i; k < s.length - j; k++) | |
| 770 { | |
| 771 // If the delta contains any special characters, it's not a close match. | |
| 772 if (s[k] == "." || s[k] == "+" || s[k] == "$" || s[k] == "?" || | |
| 773 s[k] == "{" || s[k] == "}" || s[k] == "(" || s[k] == ")" || | |
| 774 s[k] == "[" || s[k] == "]" || s[k] == "\\") | |
| 418 return null; | 775 return null; |
| 419 } | 776 } |
| 420 else | 777 |
| 421 { | 778 if (diff == 0) |
| 422 if (diff == 0) | 779 { |
|
kzar
2017/05/03 11:17:24
Nit: Couldn't this be an `else if` too?
Manish Jethani
2017/05/04 02:49:32
Done.
| |
| 423 { | 780 edit = {type: "substitute", index: i}; |
| 424 // If both strings are equal in length, this is a substitution. | 781 } |
| 425 edit = {type: "substitute", index: i}; | 782 else if (diff > 0) |
| 426 j++; | 783 { |
| 784 edit = {type: "delete", index: i}; | |
| 785 | |
| 786 if (diff > 1) | |
| 787 edit.endIndex = s.length - j; | |
| 788 } | |
| 789 else | |
| 790 { | |
| 791 edit = {type: "insert", index: i}; | |
| 792 | |
| 793 if (diff < -1) | |
| 794 edit.endIndex = s.length - j; | |
| 795 } | |
| 796 | |
| 797 return edit; | |
| 798 } | |
| 799 | |
| 800 function eliminateRedundantRulesByURLFilter(rulesInfo, exhaustive) | |
| 801 { | |
| 802 const heuristicRange = 1000; | |
| 803 | |
| 804 let ol = rulesInfo.length; | |
| 805 | |
| 806 // Throw out obviously redundant rules. | |
| 807 return async(rulesInfo, (ruleInfo, index) => () => | |
| 808 { | |
| 809 // If this rule is already marked as redundant, don't bother comparing it | |
| 810 // with other rules. | |
| 811 if (rulesInfo[index].redundant) | |
| 812 return; | |
| 813 | |
| 814 let limit = exhaustive ? rulesInfo.length : | |
| 815 Math.min(index + heuristicRange, rulesInfo.length); | |
| 816 | |
| 817 for (let i = index, j = i + 1; j < limit; j++) | |
| 818 { | |
| 819 if (rulesInfo[j].redundant) | |
| 820 continue; | |
| 821 | |
| 822 let source = rulesInfo[i].rule.trigger["url-filter"]; | |
| 823 let target = rulesInfo[j].rule.trigger["url-filter"]; | |
| 824 | |
| 825 if (source.length >= target.length) | |
| 826 { | |
| 827 // If one URL filter is a substring of the other starting at the | |
| 828 // beginning, the other one is clearly redundant. | |
| 829 if (source.substring(0, target.length) == target) | |
| 830 { | |
| 831 rulesInfo[i].redundant = true; | |
| 832 break; | |
| 833 } | |
| 834 } | |
| 835 else if (target.substring(0, source.length) == source) | |
| 836 { | |
| 837 rulesInfo[j].redundant = true; | |
| 838 } | |
| 839 } | |
| 840 }) | |
| 841 .then(() => rulesInfo.filter(ruleInfo => !ruleInfo.redundant)); | |
| 842 } | |
| 843 | |
| 844 function findMatchesForRuleByURLFilter(rulesInfo, index, exhaustive) | |
| 845 { | |
| 846 // Closely matching rules are likely to be within a certain range. We only | |
| 847 // look for matches within this range by default. If we increase this value, | |
| 848 // it can give us more matches and a smaller resulting rule set, but possibly | |
| 849 // at a significant performance cost. | |
| 850 // | |
| 851 // If the exhaustive option is true, we simply ignore this value and look for | |
| 852 // matches throughout the rule set. | |
| 853 const heuristicRange = 1000; | |
| 854 | |
| 855 let limit = exhaustive ? rulesInfo.length : | |
| 856 Math.min(index + heuristicRange, rulesInfo.length); | |
| 857 | |
| 858 for (let i = index, j = i + 1; j < limit; j++) | |
| 859 { | |
| 860 let source = rulesInfo[i].rule.trigger["url-filter"]; | |
| 861 let target = rulesInfo[j].rule.trigger["url-filter"]; | |
| 862 | |
| 863 let edit = closeMatch(source, target); | |
| 864 | |
| 865 if (edit) | |
| 866 { | |
| 867 let urlFilter, ruleInfo, match = {edit}; | |
| 868 | |
| 869 if (edit.type == "insert") | |
| 870 { | |
| 871 // Convert the insertion into a deletion and stick it on the target | |
| 872 // rule instead. We can only group deletions and substitutions; | |
| 873 // therefore insertions must be treated as deletions on the target | |
| 874 // rule. | |
| 875 urlFilter = target; | |
| 876 ruleInfo = rulesInfo[j]; | |
| 877 match.index = i; | |
| 878 edit.type = "delete"; | |
| 427 } | 879 } |
| 428 else | 880 else |
| 429 { | 881 { |
| 430 if (edit) | 882 urlFilter = source; |
| 431 multiEdit = true; | 883 ruleInfo = rulesInfo[i]; |
| 432 else if (diff > 0) | 884 match.index = j; |
|
kzar
2017/05/03 11:17:24
Nit: Please use braces since the clause spans mult
Manish Jethani
2017/05/04 02:49:32
Done.
| |
| 433 // If the source string is longer, this is a deletion. | 885 } |
| 434 edit = {type: "delete", index: i}; | 886 |
| 887 // If the edit has an end index, it represents a multiple character | |
| 888 // edit. | |
| 889 let multiEdit = !!edit.endIndex; | |
| 890 | |
| 891 if (multiEdit) | |
| 892 { | |
| 893 // We only care about a single multiple character edit because the | |
| 894 // number of characters for such a match doesn't matter, we can | |
| 895 // only merge with one other rule. | |
| 896 if (!ruleInfo.multiEditMatch) | |
| 897 ruleInfo.multiEditMatch = match; | |
| 898 } | |
| 899 else | |
| 900 { | |
| 901 // For single character edits, multiple rules can be merged into | |
| 902 // one. e.g. "ad", "ads", and "adv" can be merged into "ad[sv]?". | |
| 903 if (!ruleInfo.matches) | |
| 904 ruleInfo.matches = new Array(urlFilter.length); | |
| 905 | |
| 906 // Matches at a particular index. For example, for a source string | |
| 907 // "ads", both target strings "ad" (deletion) and "adv" | |
| 908 // (substitution) match at index 2, hence they are grouped together | |
| 909 // to possibly be merged later into "ad[sv]?". | |
| 910 let matchesForIndex = ruleInfo.matches[edit.index]; | |
| 911 | |
| 912 if (matchesForIndex) | |
| 913 { | |
| 914 matchesForIndex.push(match); | |
| 915 } | |
| 435 else | 916 else |
| 436 edit = {type: "insert", index: i}; | 917 { |
| 918 matchesForIndex = [match]; | |
| 919 ruleInfo.matches[edit.index] = matchesForIndex; | |
| 920 } | |
| 921 | |
| 922 // Keep track of the best set of matches. We later sort by this to | |
| 923 // get best results. | |
| 924 if (!ruleInfo.bestMatches || | |
| 925 matchesForIndex.length > ruleInfo.bestMatches.length) | |
| 926 ruleInfo.bestMatches = matchesForIndex; | |
| 437 } | 927 } |
| 438 } | 928 } |
| 439 } | 929 } |
| 440 | 930 } |
| 441 if (edit && multiEdit && !edit.closeIndex) | 931 |
| 442 { | 932 function mergeCandidateRulesByURLFilter(rulesInfo) |
| 443 if (j < t.length) | 933 { |
| 444 return null; | 934 // Filter out rules that have no matches at all. |
| 445 | 935 let candidateRulesInfo = rulesInfo.filter(ruleInfo => |
| 446 edit.closeIndex = s.length; | 936 { |
| 447 } | 937 return ruleInfo.bestMatches || ruleInfo.multiEditMatch |
| 448 | |
| 449 return edit; | |
| 450 } | |
| 451 | |
| 452 function ruleWithoutURLFilter(rule) | |
| 453 { | |
| 454 let copy = { | |
|
kzar
2017/05/03 15:19:04
How about `return Object.create(rule, {"url-filter
Manish Jethani
2017/05/04 02:49:31
That would not work for multiple reasons, but most
| |
| 455 trigger: Object.assign({}, rule.trigger), | |
| 456 action: Object.assign({}, rule.action) | |
| 457 }; | |
| 458 | |
| 459 delete copy.trigger["url-filter"]; | |
| 460 | |
| 461 return copy; | |
| 462 } | |
| 463 | |
| 464 function mergeCloselyMatchingRules(rules, {multi = false} = {}) | |
| 465 { | |
| 466 // Closely matching rules are likely to be within a certain range. We only | |
| 467 // look for matches within this range. If we increase this value, it can give | |
| 468 // us more matches and a smaller resulting rule set, but possibly at a | |
| 469 // significant performance cost. | |
| 470 const heuristicRange = 100; | |
|
kzar
2017/05/03 15:19:04
Since the code either runs in a place where speed
Manish Jethani
2017/05/04 02:49:32
In the latest update the generateRules function ta
| |
| 471 | |
| 472 let rulesInfo = new Array(rules.length); | |
| 473 | |
| 474 rules.forEach((rule, index) => | |
| 475 { | |
| 476 rulesInfo[index] = {rule}; | |
|
kzar
2017/05/03 11:17:24
I'm not sure syntax like this will work for Safari
Manish Jethani
2017/05/03 14:41:54
I'll check, but if it doesn't work then I'll have
kzar
2017/05/08 08:13:02
You mentioned testing the code on Safari now, but
Manish Jethani
2017/05/08 14:03:58
I've been testing with Safari 10.
Anyway, this is
kzar
2017/05/09 10:05:46
I think you should test with Safari 9 at least onc
Manish Jethani
2017/05/09 15:52:46
"{rule: rule}" ought to work in every single JS en
| |
| 477 | |
| 478 if (rule.action.type == "ignore-previous-rules") | |
| 479 { | |
| 480 rulesInfo[index].skip = true; | |
| 481 } | |
| 482 else | |
| 483 { | |
| 484 // Save a stringified version of the rule, but without the URL filter. We | |
| 485 // use this for comparison later. | |
| 486 rulesInfo[index].stringifiedWithoutURLFilter = | |
| 487 JSON.stringify(ruleWithoutURLFilter(rule)); | |
| 488 } | |
| 489 }); | 938 }); |
| 490 | 939 |
| 491 for (let i = 0; i < rules.length; i++) | 940 // For best results, we have to sort the candidates by the largest set of |
| 492 { | 941 // matches. |
| 493 if (rulesInfo[i].skip) | 942 // |
| 943 // For example, we want "ads", "bds", "adv", "bdv", "adx", and "bdx" to | |
| 944 // generate "ad[svx]" and "bd[svx]" (2 rules), not "[ab]ds", "[ab]dv", and | |
| 945 // "[ab]dx" (3 rules). | |
| 946 candidateRulesInfo.sort((ruleInfo1, ruleInfo2) => | |
| 947 { | |
| 948 let weight1 = ruleInfo1.bestMatches ? ruleInfo1.bestMatches.length : | |
| 949 ruleInfo1.multiEditMatch ? 1 : 0; | |
| 950 let weight2 = ruleInfo2.bestMatches ? ruleInfo2.bestMatches.length : | |
| 951 ruleInfo2.multiEditMatch ? 1 : 0; | |
| 952 | |
| 953 return weight2 - weight1; | |
| 954 }); | |
| 955 | |
| 956 for (let ruleInfo of candidateRulesInfo) | |
| 957 { | |
| 958 let rule = ruleInfo.rule; | |
| 959 | |
| 960 // If this rule has already been merged into another rule, we skip it. | |
| 961 if (ruleInfo.merged) | |
| 494 continue; | 962 continue; |
| 495 | 963 |
| 496 for (let j = i + 1; j < i + heuristicRange && j < rules.length; j++) | 964 // Find the best set of rules to group, which is simply the largest set. |
| 497 { | 965 let best = (ruleInfo.matches || []).reduce((best, matchesForIndex) => |
| 498 if (rulesInfo[j].skip) | 966 { |
| 499 continue; | 967 matchesForIndex = (matchesForIndex || []).filter(match => |
| 500 | 968 { |
| 501 // Check if the rules are identical except for the URL filter. | 969 // Filter out rules that have either already been merged into other |
| 502 if (rulesInfo[i].stringifiedWithoutURLFilter == | 970 // rules or have had other rules merged into them. |
|
kzar
2017/05/03 15:19:04
I wonder if we could create a lookup table stringi
Manish Jethani
2017/05/04 02:49:32
I'm not sure what the benefit of that would be.
W
| |
| 503 rulesInfo[j].stringifiedWithoutURLFilter) | 971 return !rulesInfo[match.index].merged && |
| 504 { | 972 !rulesInfo[match.index].mergedInto; |
| 505 let source = rules[i].trigger["url-filter"]; | 973 }); |
| 506 let target = rules[j].trigger["url-filter"]; | 974 |
| 507 | 975 return matchesForIndex.length > best.length ? matchesForIndex : best; |
| 508 let edit = closeMatch(source, target, {multi}); | 976 }, |
| 509 | 977 []); |
| 510 if (edit) | 978 |
| 979 let multiEdit = false; | |
| 980 | |
| 981 // If we couldn't find a single rule to merge with, let's see if we have a | |
| 982 // multiple character edit. e.g. we could merge "ad" and "adserver" into | |
| 983 // "ad(server)?". | |
| 984 if (best.length == 0 && ruleInfo.multiEditMatch && | |
| 985 !rulesInfo[ruleInfo.multiEditMatch.index].merged && | |
| 986 !rulesInfo[ruleInfo.multiEditMatch.index].mergedInto) | |
| 987 { | |
| 988 best = [ruleInfo.multiEditMatch]; | |
| 989 multiEdit = true; | |
| 990 } | |
| 991 | |
| 992 if (best.length > 0) | |
| 993 { | |
| 994 let urlFilter = rule.trigger["url-filter"]; | |
| 995 | |
| 996 let editIndex = best[0].edit.index; | |
| 997 | |
| 998 if (!multiEdit) | |
| 999 { | |
| 1000 // Merge all the matching rules into this one. | |
| 1001 | |
| 1002 let characters = [urlFilter[editIndex]]; | |
| 1003 let quantifier = ""; | |
| 1004 | |
| 1005 for (let match of best) | |
| 511 { | 1006 { |
| 512 let urlFilter, ruleInfo, match = {edit}; | 1007 if (match.edit.type == "delete") |
| 513 | |
| 514 if (edit.type == "insert") | |
| 515 { | 1008 { |
| 516 // Convert the insertion into a deletion and stick it on the target | 1009 quantifier = "?"; |
| 517 // rule instead. We can only group deletions and substitutions; | |
| 518 // therefore insertions must be treated as deletions on the target | |
| 519 // rule, to be dealt with later. | |
| 520 urlFilter = target; | |
| 521 ruleInfo = rulesInfo[j]; | |
| 522 match.index = i; | |
| 523 edit.type = "delete"; | |
| 524 } | 1010 } |
| 525 else | 1011 else |
| 526 { | 1012 { |
| 527 urlFilter = source; | 1013 let character = rulesInfo[match.index].rule |
| 528 ruleInfo = rulesInfo[i]; | 1014 .trigger["url-filter"][editIndex]; |
| 529 match.index = j; | 1015 |
| 1016 // Insert any hyphen at the beginning so it gets interpreted as a | |
| 1017 // literal hyphen. | |
| 1018 if (character == "-") | |
| 1019 characters.unshift(character); | |
| 1020 else | |
| 1021 characters.push(character); | |
| 530 } | 1022 } |
| 531 | 1023 |
| 532 if (edit.closeIndex) | 1024 // Mark the target rule as merged so other rules don't try to merge |
| 533 { | 1025 // it again. |
| 534 if (!ruleInfo.multiEditMatch) | 1026 rulesInfo[match.index].merged = true; |
| 535 ruleInfo.multiEditMatch = match; | |
| 536 } | |
| 537 else | |
| 538 { | |
| 539 if (!ruleInfo.matches) | |
| 540 ruleInfo.matches = new Array(urlFilter.length + 1); | |
| 541 | |
| 542 let matchesForIndex = ruleInfo.matches[edit.index]; | |
| 543 | |
| 544 if (matchesForIndex) | |
| 545 { | |
| 546 matchesForIndex.push(match); | |
| 547 } | |
| 548 else | |
| 549 { | |
| 550 matchesForIndex = [match]; | |
| 551 ruleInfo.matches[edit.index] = matchesForIndex; | |
| 552 } | |
| 553 | |
| 554 if (!ruleInfo.bestMatches || | |
| 555 matchesForIndex.length > ruleInfo.bestMatches.length) | |
| 556 ruleInfo.bestMatches = matchesForIndex; | |
| 557 } | |
| 558 } | |
| 559 } | |
| 560 } | |
| 561 } | |
| 562 | |
| 563 let candidateRulesInfo = rulesInfo.filter(ruleInfo => ruleInfo.bestMatches || | |
| 564 ruleInfo.multiEditMatch) ; | |
|
kzar
2017/05/03 11:17:24
Nit: Long line.
Manish Jethani
2017/05/04 02:49:31
Done.
| |
| 565 | |
| 566 // For best results, we have to sort the candidates by the number of matches. | |
| 567 // For example, we want "ads", "bds", "adv", "bdv", and "bdx" to generate | |
| 568 // "ad[sv]" and "bd[svx]" (2 rules), not "[ab]ds", "[ab]dv", and "bdx" (3 | |
| 569 // rules). | |
| 570 candidateRulesInfo.sort((ruleInfo1, ruleInfo2) => | |
| 571 { | |
| 572 let weight1 = 1; | |
| 573 let weight2 = 1; | |
| 574 | |
| 575 if (ruleInfo1.bestMatches) | |
| 576 weight1 = ruleInfo1.bestMatches.length; | |
| 577 | |
| 578 if (ruleInfo2.bestMatches) | |
| 579 weight2 = ruleInfo2.bestMatches.length; | |
| 580 | |
| 581 return weight2 - weight1; | |
| 582 }); | |
| 583 | |
| 584 for (let ruleInfo of candidateRulesInfo) | |
| 585 { | |
| 586 let rule = ruleInfo.rule; | |
| 587 | |
| 588 if (rule._merged) | |
| 589 continue; | |
| 590 | |
| 591 // Find the best set of rules to group, which is simply the largest set. | |
| 592 let best = (ruleInfo.matches || []).reduce((best, matchesForIndex) => | |
| 593 { | |
| 594 matchesForIndex = (matchesForIndex || []).filter(match => | |
| 595 { | |
| 596 // Filter out rules that have either already been merged into other | |
| 597 // rules or have had other rules merged into them. | |
| 598 return !rules[match.index]._merged && | |
| 599 !rulesInfo[match.index].mergedInto; | |
| 600 }); | |
| 601 | |
| 602 return matchesForIndex.length > best.length ? matchesForIndex : best; | |
| 603 }, | |
| 604 []); | |
| 605 | |
| 606 if (best.length == 0 && ruleInfo.multiEditMatch && | |
| 607 !rules[ruleInfo.multiEditMatch.index]._merged && | |
| 608 !rulesInfo[ruleInfo.multiEditMatch.index].mergedInto) | |
| 609 best = [ruleInfo.multiEditMatch]; | |
| 610 | |
| 611 if (best.length > 0) | |
| 612 { | |
| 613 let urlFilter = rule.trigger["url-filter"]; | |
| 614 | |
| 615 let editIndex = best[0].edit.index; | |
| 616 | |
| 617 if (best[0] != ruleInfo.multiEditMatch) | |
| 618 { | |
| 619 // Merge all the matching rules into this one. | |
| 620 | |
| 621 let characters = []; | |
| 622 let quantifier = ""; | |
| 623 | |
| 624 for (let match of best) | |
| 625 { | |
| 626 if (match.edit.type == "delete") | |
| 627 quantifier = "?"; | |
| 628 else | |
| 629 characters.push(rules[match.index].trigger["url-filter"][editIndex]) ; | |
| 630 | |
| 631 rules[match.index]._merged = true; | |
| 632 } | 1027 } |
| 633 | 1028 |
| 634 urlFilter = urlFilter.substring(0, editIndex + 1) + quantifier + | 1029 urlFilter = urlFilter.substring(0, editIndex + 1) + quantifier + |
| 635 urlFilter.substring(editIndex + 1); | 1030 urlFilter.substring(editIndex + 1); |
| 636 if (characters.length > 0) | 1031 if (characters.length > 1) |
| 637 { | 1032 { |
| 638 urlFilter = urlFilter.substring(0, editIndex) + "[" + | 1033 urlFilter = urlFilter.substring(0, editIndex) + "[" + |
| 639 urlFilter[editIndex] + characters.join("") + "]" + | 1034 characters.join("") + "]" + |
| 640 urlFilter.substring(editIndex + 1); | 1035 urlFilter.substring(editIndex + 1); |
| 641 } | 1036 } |
| 642 } | 1037 } |
| 643 else | 1038 else |
| 644 { | 1039 { |
| 645 let editCloseIndex = best[0].edit.closeIndex; | 1040 let editEndIndex = best[0].edit.endIndex; |
| 646 | 1041 |
| 647 rules[best[0].index]._merged = true; | 1042 // Mark the target rule as merged so other rules don't try to merge it |
| 1043 // again. | |
| 1044 rulesInfo[best[0].index].merged = true; | |
| 648 | 1045 |
| 649 urlFilter = urlFilter.substring(0, editIndex) + "(" + | 1046 urlFilter = urlFilter.substring(0, editIndex) + "(" + |
| 650 urlFilter.substring(editIndex, editCloseIndex) + ")?" + | 1047 urlFilter.substring(editIndex, editEndIndex) + ")?" + |
| 651 urlFilter.substring(editCloseIndex); | 1048 urlFilter.substring(editEndIndex); |
| 652 } | 1049 } |
| 653 | 1050 |
| 654 rule.trigger["url-filter"] = urlFilter; | 1051 rule.trigger["url-filter"] = urlFilter; |
| 655 | 1052 |
| 1053 // Mark this rule as one that has had other rules merged into it. | |
| 656 ruleInfo.mergedInto = true; | 1054 ruleInfo.mergedInto = true; |
| 657 } | 1055 } |
| 658 } | 1056 } |
| 659 | 1057 } |
| 660 return rules.filter(rule => !rule._merged); | 1058 |
| 1059 function mergeRulesByURLFilter(rulesInfo, exhaustive) | |
| 1060 { | |
| 1061 return async(rulesInfo, (ruleInfo, index) => () => | |
| 1062 findMatchesForRuleByURLFilter(rulesInfo, index, exhaustive) | |
| 1063 ) | |
| 1064 .then(() => mergeCandidateRulesByURLFilter(rulesInfo)); | |
| 1065 } | |
| 1066 | |
| 1067 function mergeRulesByArrayProperty(rulesInfo, propertyType, property) | |
| 1068 { | |
| 1069 if (rulesInfo.length <= 1) | |
| 1070 return; | |
| 1071 | |
| 1072 let valueSet = new Set(rulesInfo[0].rule[propertyType][property]); | |
| 1073 | |
| 1074 for (let i = 1; i < rulesInfo.length; i++) | |
| 1075 { | |
| 1076 for (let value of rulesInfo[i].rule[propertyType][property] || []) | |
| 1077 valueSet.add(value); | |
| 1078 | |
| 1079 rulesInfo[i].merged = true; | |
| 1080 } | |
| 1081 | |
| 1082 if (valueSet.size > 0) | |
| 1083 rulesInfo[0].rule[propertyType][property] = Array.from(valueSet); | |
| 1084 | |
| 1085 rulesInfo[0].mergedInto = true; | |
| 1086 } | |
| 1087 | |
| 1088 function groupRulesByMergeableProperty(rulesInfo, propertyType, property) | |
| 1089 { | |
| 1090 let mergeableRulesInfoByGroup = new Map(); | |
| 1091 | |
| 1092 for (let ruleInfo of rulesInfo) | |
| 1093 { | |
| 1094 let copy = { | |
| 1095 trigger: Object.assign({}, ruleInfo.rule.trigger), | |
| 1096 action: Object.assign({}, ruleInfo.rule.action) | |
| 1097 }; | |
| 1098 | |
| 1099 delete copy[propertyType][property]; | |
| 1100 | |
| 1101 let groupKey = JSON.stringify(copy); | |
| 1102 | |
| 1103 let mergeableRulesInfo = mergeableRulesInfoByGroup.get(groupKey); | |
| 1104 | |
| 1105 if (mergeableRulesInfo) | |
| 1106 mergeableRulesInfo.push(ruleInfo); | |
| 1107 else | |
| 1108 mergeableRulesInfoByGroup.set(groupKey, [ruleInfo]); | |
| 1109 } | |
| 1110 | |
| 1111 return mergeableRulesInfoByGroup; | |
| 1112 } | |
| 1113 | |
| 1114 function mergeRules(rules, exhaustive) | |
| 1115 { | |
| 1116 let rulesInfo = rules.map(rule => ({rule})); | |
| 1117 | |
| 1118 let arrayPropertiesToMergeBy = ["resource-type", "if-domain"]; | |
| 1119 | |
| 1120 return async(() => | |
| 1121 { | |
| 1122 let map = groupRulesByMergeableProperty(rulesInfo, "trigger", "url-filter"); | |
| 1123 return async(map.values(), mergeableRulesInfo => () => | |
| 1124 eliminateRedundantRulesByURLFilter(mergeableRulesInfo, exhaustive) | |
| 1125 .then(rulesInfo => mergeRulesByURLFilter(rulesInfo, exhaustive)) | |
| 1126 ) | |
| 1127 .then(() => | |
| 1128 { | |
| 1129 // Filter out rules that are redundant or have been merged into other | |
| 1130 // rules. | |
| 1131 rulesInfo = rulesInfo.filter(ruleInfo => !ruleInfo.redundant && | |
| 1132 !ruleInfo.merged); | |
| 1133 }); | |
| 1134 }) | |
| 1135 .then(() => async(arrayPropertiesToMergeBy, arrayProperty => () => | |
| 1136 { | |
| 1137 let map = groupRulesByMergeableProperty(rulesInfo, "trigger", | |
| 1138 arrayProperty); | |
| 1139 return async(map.values(), mergeableRulesInfo => () => | |
| 1140 mergeRulesByArrayProperty(mergeableRulesInfo, "trigger", arrayProperty) | |
| 1141 ) | |
| 1142 .then(() => | |
| 1143 { | |
| 1144 rulesInfo = rulesInfo.filter(ruleInfo => !ruleInfo.merged); | |
| 1145 }); | |
| 1146 })) | |
| 1147 .then(() => rulesInfo.map(ruleInfo => ruleInfo.rule)); | |
| 661 } | 1148 } |
| 662 | 1149 |
| 663 let ContentBlockerList = | 1150 let ContentBlockerList = |
| 664 /** | 1151 /** |
| 665 * Create a new Adblock Plus filter to content blocker list converter | 1152 * Create a new Adblock Plus filter to content blocker list converter |
| 666 * | 1153 * |
| 1154 * @param {object} options Options for content blocker list generation | |
| 1155 * | |
| 667 * @constructor | 1156 * @constructor |
| 668 */ | 1157 */ |
| 669 exports.ContentBlockerList = function () | 1158 exports.ContentBlockerList = function (options) |
| 670 { | 1159 { |
| 1160 const defaultOptions = { | |
| 1161 merge: "auto" | |
| 1162 }; | |
| 1163 | |
| 1164 this.options = Object.assign({}, defaultOptions, options); | |
| 1165 | |
| 671 this.requestFilters = []; | 1166 this.requestFilters = []; |
| 672 this.requestExceptions = []; | 1167 this.requestExceptions = []; |
| 673 this.elemhideFilters = []; | 1168 this.elemhideFilters = []; |
| 674 this.elemhideExceptions = []; | 1169 this.elemhideExceptions = []; |
| 1170 this.genericblockExceptions = []; | |
| 1171 this.generichideExceptions = []; | |
| 675 this.elemhideSelectorExceptions = new Map(); | 1172 this.elemhideSelectorExceptions = new Map(); |
| 676 }; | 1173 }; |
| 677 | 1174 |
| 678 /** | 1175 /** |
| 679 * Add Adblock Plus filter to be converted | 1176 * Add Adblock Plus filter to be converted |
| 680 * | 1177 * |
| 681 * @param {Filter} filter Filter to convert | 1178 * @param {Filter} filter Filter to convert |
| 682 */ | 1179 */ |
| 683 ContentBlockerList.prototype.addFilter = function(filter) | 1180 ContentBlockerList.prototype.addFilter = function(filter) |
| 684 { | 1181 { |
| 685 if (filter.sitekeys) | 1182 if (filter.sitekeys) |
| 686 return; | 1183 return; |
| 687 if (filter instanceof filterClasses.RegExpFilter && | 1184 if (filter instanceof filterClasses.RegExpFilter && |
| 688 filter.regexpSource == null) | 1185 filter.regexpSource == null) |
| 689 return; | 1186 return; |
| 690 | 1187 |
| 691 if (filter instanceof filterClasses.BlockingFilter) | 1188 if (filter instanceof filterClasses.BlockingFilter) |
| 692 this.requestFilters.push(filter); | 1189 this.requestFilters.push(filter); |
| 693 | 1190 |
| 694 if (filter instanceof filterClasses.WhitelistFilter) | 1191 if (filter instanceof filterClasses.WhitelistFilter) |
| 695 { | 1192 { |
| 696 if (filter.contentType & (typeMap.DOCUMENT | whitelistableRequestTypes)) | 1193 if (filter.contentType & (typeMap.DOCUMENT | whitelistableRequestTypes)) |
| 697 this.requestExceptions.push(filter); | 1194 this.requestExceptions.push(filter); |
| 698 | 1195 |
| 699 if (filter.contentType & typeMap.ELEMHIDE) | 1196 if (filter.contentType & typeMap.GENERICBLOCK) |
| 700 this.elemhideExceptions.push(filter); | 1197 this.genericblockExceptions.push(filter); |
| 1198 | |
| 1199 if (filter.contentType & typeMap.ELEMHIDE) | |
| 1200 this.elemhideExceptions.push(filter); | |
| 1201 else if (filter.contentType & typeMap.GENERICHIDE) | |
| 1202 this.generichideExceptions.push(filter); | |
| 701 } | 1203 } |
| 702 | 1204 |
| 703 if (filter instanceof filterClasses.ElemHideFilter) | 1205 if (filter instanceof filterClasses.ElemHideFilter) |
| 704 this.elemhideFilters.push(filter); | 1206 this.elemhideFilters.push(filter); |
| 705 | 1207 |
| 706 if (filter instanceof filterClasses.ElemHideException) | 1208 if (filter instanceof filterClasses.ElemHideException) |
| 707 { | 1209 { |
| 708 let domains = this.elemhideSelectorExceptions[filter.selector]; | 1210 let domains = this.elemhideSelectorExceptions[filter.selector]; |
| 709 if (!domains) | 1211 if (!domains) |
| 710 domains = this.elemhideSelectorExceptions[filter.selector] = []; | 1212 domains = this.elemhideSelectorExceptions[filter.selector] = []; |
| 711 | 1213 |
| 712 parseDomains(filter.domains, domains, []); | 1214 parseDomains(filter.domains, domains, []); |
| 713 } | 1215 } |
| 714 }; | 1216 }; |
| 715 | 1217 |
| 716 /** | 1218 /** |
| 717 * Generate content blocker list for all filters that were added | 1219 * Generate content blocker list for all filters that were added |
| 718 * | |
| 719 * @returns {Filter} filter Filter to convert | |
| 720 */ | 1220 */ |
| 721 ContentBlockerList.prototype.generateRules = function( | 1221 ContentBlockerList.prototype.generateRules = function() |
| 722 {merge = false, multiMerge = false} = {}) | 1222 { |
| 723 { | 1223 let cssRules = []; |
| 724 let rules = []; | 1224 let cssExceptionRules = []; |
| 725 | 1225 let blockingRules = []; |
| 1226 let blockingExceptionRules = []; | |
| 1227 | |
| 1228 let ruleGroups = [cssRules, cssExceptionRules, | |
| 1229 blockingRules, blockingExceptionRules]; | |
| 1230 | |
| 1231 let genericSelectors = []; | |
| 726 let groupedElemhideFilters = new Map(); | 1232 let groupedElemhideFilters = new Map(); |
| 1233 | |
| 727 for (let filter of this.elemhideFilters) | 1234 for (let filter of this.elemhideFilters) |
| 728 { | 1235 { |
| 729 let result = convertElemHideFilter(filter, this.elemhideSelectorExceptions); | 1236 let result = convertElemHideFilter(filter, this.elemhideSelectorExceptions); |
| 730 if (!result) | 1237 if (!result) |
| 731 continue; | 1238 continue; |
| 732 | 1239 |
| 733 if (result.matchDomains.length == 0) | 1240 if (result.matchDomains.length == 0) |
| 734 result.matchDomains = ["^https?://"]; | 1241 { |
| 735 | 1242 genericSelectors.push(result.selector); |
| 736 for (let matchDomain of result.matchDomains) | 1243 } |
| 737 { | 1244 else |
| 738 let group = groupedElemhideFilters.get(matchDomain) || []; | 1245 { |
| 739 group.push(result.selector); | 1246 for (let matchDomain of result.matchDomains) |
| 740 groupedElemhideFilters.set(matchDomain, group); | 1247 { |
| 741 } | 1248 let group = groupedElemhideFilters.get(matchDomain) || []; |
| 742 } | 1249 group.push(result.selector); |
| 1250 groupedElemhideFilters.set(matchDomain, group); | |
| 1251 } | |
| 1252 } | |
| 1253 } | |
| 1254 | |
| 1255 // Separate out the element hiding exceptions that have only a hostname part | |
| 1256 // from the rest. This allows us to implement a workaround for issue #5345 | |
| 1257 // (WebKit bug #167423), but as a bonus it also reduces the number of | |
| 1258 // generated rules. The downside is that the exception will only apply to the | |
| 1259 // top-level document, not to iframes. We have to live with this until the | |
| 1260 // WebKit bug is fixed in all supported versions of Safari. | |
| 1261 // https://bugs.webkit.org/show_bug.cgi?id=167423 | |
| 1262 // | |
| 1263 // Note that as a result of this workaround we end up with a huge rule set in | |
| 1264 // terms of the amount of memory used. This can cause Node.js to throw | |
| 1265 // "JavaScript heap out of memory". To avoid this, call Node.js with | |
| 1266 // --max_old_space_size=4096 | |
| 1267 let elemhideExceptionDomains = extractFilterDomains(this.elemhideExceptions); | |
| 1268 | |
| 1269 let genericSelectorExceptionDomains = | |
| 1270 extractFilterDomains(this.generichideExceptions); | |
| 1271 elemhideExceptionDomains.forEach(name => | |
| 1272 { | |
| 1273 genericSelectorExceptionDomains.add(name); | |
| 1274 }); | |
| 1275 | |
| 1276 addCSSRules(cssRules, genericSelectors, null, | |
| 1277 genericSelectorExceptionDomains); | |
| 1278 | |
| 1279 // Filter out whitelisted domains. | |
| 1280 elemhideExceptionDomains.forEach(domain => | |
| 1281 groupedElemhideFilters.delete(domain)); | |
| 743 | 1282 |
| 744 groupedElemhideFilters.forEach((selectors, matchDomain) => | 1283 groupedElemhideFilters.forEach((selectors, matchDomain) => |
| 745 { | 1284 { |
| 746 while (selectors.length) | 1285 addCSSRules(cssRules, selectors, matchDomain, elemhideExceptionDomains); |
| 747 { | 1286 }); |
| 748 let selector = selectors.splice(0, selectorLimit).join(", "); | 1287 |
| 749 | 1288 let requestFilterExceptionDomains = []; |
| 750 // As of Safari 9.0 element IDs are matched as lowercase. We work around | 1289 for (let filter of this.genericblockExceptions) |
| 751 // this by converting to the attribute format [id="elementID"] | 1290 { |
| 752 selector = convertIDSelectorsToAttributeSelectors(selector); | 1291 let parsed = parseFilterRegexpSource(filter.regexpSource); |
| 753 | 1292 if (parsed.hostname) |
| 754 rules.push({ | 1293 requestFilterExceptionDomains.push(parsed.hostname); |
| 755 trigger: {"url-filter": matchDomain, | 1294 } |
| 756 "url-filter-is-case-sensitive": true}, | 1295 |
| 757 action: {type: "css-display-none", | 1296 for (let filter of this.requestFilters) |
| 758 selector: selector} | 1297 { |
| 1298 convertFilterAddRules(blockingRules, filter, "block", true, | |
| 1299 requestFilterExceptionDomains); | |
| 1300 } | |
| 1301 | |
| 1302 for (let filter of this.requestExceptions) | |
| 1303 { | |
| 1304 convertFilterAddRules(blockingExceptionRules, filter, | |
| 1305 "ignore-previous-rules", true); | |
| 1306 } | |
| 1307 | |
| 1308 return async(ruleGroups, (group, index) => () => | |
| 1309 { | |
| 1310 let next = () => | |
| 1311 { | |
| 1312 if (index == ruleGroups.length - 1) | |
| 1313 return ruleGroups.reduce((all, rules) => all.concat(rules), []); | |
| 1314 }; | |
| 1315 | |
| 1316 if (this.options.merge == "all" || | |
| 1317 (this.options.merge == "auto" && | |
| 1318 ruleGroups.reduce((n, group) => n + group.length, 0) > 50000)) | |
| 1319 { | |
| 1320 return mergeRules(ruleGroups[index], this.options.merge == "all") | |
| 1321 .then(rules => | |
| 1322 { | |
| 1323 ruleGroups[index] = rules; | |
| 1324 return next(); | |
| 759 }); | 1325 }); |
| 760 } | 1326 } |
| 1327 | |
| 1328 return next(); | |
| 761 }); | 1329 }); |
| 762 | |
| 763 for (let filter of this.elemhideExceptions) | |
| 764 convertFilterAddRules(rules, filter, "ignore-previous-rules", false); | |
| 765 for (let filter of this.requestFilters) | |
| 766 convertFilterAddRules(rules, filter, "block", true); | |
| 767 for (let filter of this.requestExceptions) | |
| 768 convertFilterAddRules(rules, filter, "ignore-previous-rules", true); | |
| 769 | |
| 770 rules = rules.filter(rule => !hasNonASCI(rule)); | |
| 771 | |
| 772 if (merge) | |
| 773 rules = mergeCloselyMatchingRules(rules, {multi: multiMerge}); | |
| 774 | |
| 775 return rules; | |
| 776 }; | 1330 }; |
| LEFT | RIGHT |