| OLD | NEW |
| 1 /* | 1 /* |
| 2 * This file is part of Adblock Plus <https://adblockplus.org/>, | 2 * This file is part of Adblock Plus <https://adblockplus.org/>, |
| 3 * Copyright (C) 2006-2016 Eyeo GmbH | 3 * Copyright (C) 2006-2016 Eyeo GmbH |
| 4 * | 4 * |
| 5 * Adblock Plus is free software: you can redistribute it and/or modify | 5 * Adblock Plus is free software: you can redistribute it and/or modify |
| 6 * it under the terms of the GNU General Public License version 3 as | 6 * it under the terms of the GNU General Public License version 3 as |
| 7 * published by the Free Software Foundation. | 7 * published by the Free Software Foundation. |
| 8 * | 8 * |
| 9 * Adblock Plus is distributed in the hope that it will be useful, | 9 * Adblock Plus is distributed in the hope that it will be useful, |
| 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of | 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 12 * GNU General Public License for more details. | 12 * GNU General Public License for more details. |
| 13 * | 13 * |
| 14 * You should have received a copy of the GNU General Public License | 14 * You should have received a copy of the GNU General Public License |
| 15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. | 15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. |
| 16 */ | 16 */ |
| 17 | 17 |
| 18 /** @module abp2blocklist */ | 18 /** @module abp2blocklist */ |
| 19 | 19 |
| 20 "use strict"; | 20 "use strict"; |
| 21 | 21 |
| 22 let filterClasses = require("filterClasses"); | 22 let filterClasses = require("filterClasses"); |
| 23 let tldjs = require("tldjs"); | 23 let tldjs = require("tldjs"); |
| 24 let punycode = require("punycode"); | 24 let punycode = require("punycode"); |
| 25 | 25 |
| 26 const selectorLimit = 5000; | 26 const selectorLimit = 5000; |
| 27 const typeMap = filterClasses.RegExpFilter.typeMap; | 27 const typeMap = filterClasses.RegExpFilter.typeMap; |
| 28 const whitelistableRequestTypes = (typeMap.IMAGE |
| 29 | typeMap.STYLESHEET |
| 30 | typeMap.SCRIPT |
| 31 | typeMap.FONT |
| 32 | typeMap.MEDIA |
| 33 | typeMap.POPUP |
| 34 | typeMap.OBJECT |
| 35 | typeMap.OBJECT_SUBREQUEST |
| 36 | typeMap.XMLHTTPREQUEST |
| 37 | typeMap.PING |
| 38 | typeMap.SUBDOCUMENT |
| 39 | typeMap.OTHER); |
| 28 | 40 |
| 29 function parseDomains(domains, included, excluded) | 41 function parseDomains(domains, included, excluded) |
| 30 { | 42 { |
| 31 for (let domain in domains) | 43 for (let domain in domains) |
| 32 { | 44 { |
| 33 if (domain != "") | 45 if (domain != "") |
| 34 { | 46 { |
| 35 let enabled = domains[domain]; | 47 let enabled = domains[domain]; |
| 36 domain = punycode.toASCII(domain.toLowerCase()); | 48 domain = punycode.toASCII(domain.toLowerCase()); |
| 37 | 49 |
| (...skipping 21 matching lines...) Expand all Loading... |
| 59 let excluded = []; | 71 let excluded = []; |
| 60 let rules = []; | 72 let rules = []; |
| 61 | 73 |
| 62 parseDomains(filter.domains, included, excluded); | 74 parseDomains(filter.domains, included, excluded); |
| 63 | 75 |
| 64 if (excluded.length == 0 && !(filter.selector in elemhideSelectorExceptions)) | 76 if (excluded.length == 0 && !(filter.selector in elemhideSelectorExceptions)) |
| 65 return {matchDomains: included.map(matchDomain), selector: filter.selector}; | 77 return {matchDomains: included.map(matchDomain), selector: filter.selector}; |
| 66 } | 78 } |
| 67 | 79 |
| 68 /** | 80 /** |
| 69 * Convert the given filter "regexpSource" string into a regular expression, | 81 * Parse the given filter "regexpSource" string. Producing a regular expression, |
| 70 * handling the conversion of unicode inside hostnames to punycode. | 82 * extracting the hostname (if any), deciding if the regular expression is safe |
| 71 * (Also deciding if the regular expression can be safely converted to and | 83 * to be converted + matched as lower case and noting if the source contains |
| 72 * matched as lower case or not.) | 84 * anything after the hostname.) |
| 73 * | 85 * |
| 74 * @param {string} text regexpSource property of a filter | 86 * @param {string} text regexpSource property of a filter |
| 75 * @returns {object} An object containing a regular expression string and a bool | 87 * @returns {object} An object containing a regular expression string, a bool |
| 76 * indicating if the filter can be safely matched as lower | 88 * indicating if the filter can be safely matched as lower |
| 77 * case: {regexp: "...", canSafelyMatchAsLowercase: true/false
} | 89 * case, a hostname string (or undefined) and a bool |
| 90 * indicating if the source only contains a hostname or not: |
| 91 * {regexp: "...", |
| 92 * canSafelyMatchAsLowercase: true/false, |
| 93 * hostname: "...", |
| 94 * justHostname: true/false} |
| 78 */ | 95 */ |
| 79 function toRegExp(text) | 96 function parseFilterRegexpSource(text) |
| 80 { | 97 { |
| 81 let result = []; | 98 let regexp = []; |
| 82 let lastIndex = text.length - 1; | 99 let lastIndex = text.length - 1; |
| 100 let hostname; |
| 83 let hostnameStart = null; | 101 let hostnameStart = null; |
| 84 let hostnameFinished = false; | 102 let hostnameFinished = false; |
| 103 let justHostname = false; |
| 85 let canSafelyMatchAsLowercase = false; | 104 let canSafelyMatchAsLowercase = false; |
| 86 | 105 |
| 87 for (let i = 0; i < text.length; i++) | 106 for (let i = 0; i < text.length; i++) |
| 88 { | 107 { |
| 89 let c = text[i]; | 108 let c = text[i]; |
| 90 | 109 |
| 110 if (hostnameFinished) |
| 111 justHostname = false; |
| 112 |
| 91 // If we're currently inside the hostname we have to be careful not to | 113 // If we're currently inside the hostname we have to be careful not to |
| 92 // escape any characters until after we have converted it to punycode. | 114 // escape any characters until after we have converted it to punycode. |
| 93 if (hostnameStart != null && !hostnameFinished) | 115 if (hostnameStart != null && !hostnameFinished) |
| 94 { | 116 { |
| 95 let endingChar = (c == "*" || c == "^" || | 117 let endingChar = (c == "*" || c == "^" || |
| 96 c == "?" || c == "/" || c == "|"); | 118 c == "?" || c == "/" || c == "|"); |
| 97 if (!endingChar && i != lastIndex) | 119 if (!endingChar && i != lastIndex) |
| 98 continue; | 120 continue; |
| 99 | 121 |
| 100 let hostname = text.substring(hostnameStart, endingChar ? i : i + 1); | 122 hostname = punycode.toASCII( |
| 101 hostnameFinished = true; | 123 text.substring(hostnameStart, endingChar ? i : i + 1) |
| 102 result.push(escapeRegExp(punycode.toASCII(hostname))); | 124 ); |
| 125 hostnameFinished = justHostname = true; |
| 126 regexp.push(escapeRegExp(hostname)); |
| 103 if (!endingChar) | 127 if (!endingChar) |
| 104 break; | 128 break; |
| 105 } | 129 } |
| 106 | 130 |
| 107 switch (c) | 131 switch (c) |
| 108 { | 132 { |
| 109 case "*": | 133 case "*": |
| 110 if (result.length > 0 && i < lastIndex && text[i + 1] != "*") | 134 if (regexp.length > 0 && i < lastIndex && text[i + 1] != "*") |
| 111 result.push(".*"); | 135 regexp.push(".*"); |
| 112 break; | 136 break; |
| 113 case "^": | 137 case "^": |
| 114 if (i < lastIndex) | 138 if (i < lastIndex) |
| 115 result.push("."); | 139 regexp.push("."); |
| 116 break; | 140 break; |
| 117 case "|": | 141 case "|": |
| 118 if (i == 0) | 142 if (i == 0) |
| 119 { | 143 { |
| 120 result.push("^"); | 144 regexp.push("^"); |
| 121 break; | 145 break; |
| 122 } | 146 } |
| 123 if (i == lastIndex) | 147 if (i == lastIndex) |
| 124 { | 148 { |
| 125 result.push("$"); | 149 regexp.push("$"); |
| 126 break; | 150 break; |
| 127 } | 151 } |
| 128 if (i == 1 && text[0] == "|") | 152 if (i == 1 && text[0] == "|") |
| 129 { | 153 { |
| 130 hostnameStart = i + 1; | 154 hostnameStart = i + 1; |
| 131 canSafelyMatchAsLowercase = true; | 155 canSafelyMatchAsLowercase = true; |
| 132 result.push("https?://"); | 156 regexp.push("https?://"); |
| 133 break; | 157 break; |
| 134 } | 158 } |
| 135 result.push("\\|"); | 159 regexp.push("\\|"); |
| 136 break; | 160 break; |
| 137 case "/": | 161 case "/": |
| 138 if (!hostnameFinished && | 162 if (!hostnameFinished && |
| 139 text.charAt(i-2) == ":" && text.charAt(i-1) == "/") | 163 text.charAt(i-2) == ":" && text.charAt(i-1) == "/") |
| 140 { | 164 { |
| 141 hostnameStart = i + 1; | 165 hostnameStart = i + 1; |
| 142 canSafelyMatchAsLowercase = true; | 166 canSafelyMatchAsLowercase = true; |
| 143 } | 167 } |
| 144 result.push("/"); | 168 regexp.push("/"); |
| 145 break; | 169 break; |
| 146 case ".": case "+": case "$": case "?": | 170 case ".": case "+": case "$": case "?": |
| 147 case "{": case "}": case "(": case ")": | 171 case "{": case "}": case "(": case ")": |
| 148 case "[": case "]": case "\\": | 172 case "[": case "]": case "\\": |
| 149 result.push("\\", c); | 173 regexp.push("\\", c); |
| 150 break; | 174 break; |
| 151 default: | 175 default: |
| 152 if (hostnameFinished && (c >= "a" && c <= "z" || | 176 if (hostnameFinished && (c >= "a" && c <= "z" || |
| 153 c >= "A" && c <= "Z")) | 177 c >= "A" && c <= "Z")) |
| 154 canSafelyMatchAsLowercase = false; | 178 canSafelyMatchAsLowercase = false; |
| 155 result.push(c); | 179 regexp.push(c); |
| 156 } | 180 } |
| 157 } | 181 } |
| 158 | 182 |
| 159 return {regexp: result.join(""), | 183 return { |
| 160 canSafelyMatchAsLowercase: canSafelyMatchAsLowercase}; | 184 regexp: regexp.join(""), |
| 161 } | 185 canSafelyMatchAsLowercase: canSafelyMatchAsLowercase, |
| 162 | 186 hostname: hostname, |
| 163 function getRegExpTrigger(filter) | 187 justHostname: justHostname |
| 164 { | 188 }; |
| 165 let result = toRegExp(filter.regexpSource); | |
| 166 | |
| 167 let trigger = {"url-filter": result.regexp}; | |
| 168 | |
| 169 // Limit rules to to HTTP(S) URLs | |
| 170 if (!/^(\^|http)/i.test(trigger["url-filter"])) | |
| 171 trigger["url-filter"] = "^https?://.*" + trigger["url-filter"]; | |
| 172 | |
| 173 // For rules containing only a hostname we know that we're matching against | |
| 174 // a lowercase string unless the matchCase option was passed. | |
| 175 if (result.canSafelyMatchAsLowercase && !filter.matchCase) | |
| 176 trigger["url-filter"] = trigger["url-filter"].toLowerCase(); | |
| 177 | |
| 178 if (result.canSafelyMatchAsLowercase || filter.matchCase) | |
| 179 trigger["url-filter-is-case-sensitive"] = true; | |
| 180 | |
| 181 return trigger; | |
| 182 } | 189 } |
| 183 | 190 |
| 184 function getResourceTypes(filter) | 191 function getResourceTypes(filter) |
| 185 { | 192 { |
| 186 let types = []; | 193 let types = []; |
| 187 | 194 |
| 188 if (filter.contentType & typeMap.IMAGE) | 195 if (filter.contentType & typeMap.IMAGE) |
| 189 types.push("image"); | 196 types.push("image"); |
| 190 if (filter.contentType & typeMap.STYLESHEET) | 197 if (filter.contentType & typeMap.STYLESHEET) |
| 191 types.push("style-sheet"); | 198 types.push("style-sheet"); |
| (...skipping 24 matching lines...) Expand all Loading... |
| 216 { | 223 { |
| 217 result.push(domain); | 224 result.push(domain); |
| 218 | 225 |
| 219 if (tldjs.getDomain(domain) == domain) | 226 if (tldjs.getDomain(domain) == domain) |
| 220 result.push("www." + domain); | 227 result.push("www." + domain); |
| 221 } | 228 } |
| 222 | 229 |
| 223 return result; | 230 return result; |
| 224 } | 231 } |
| 225 | 232 |
| 226 function convertFilter(filter, action, withResourceTypes) | 233 function convertFilterAddRules(rules, filter, action, withResourceTypes) |
| 227 { | 234 { |
| 228 let trigger = getRegExpTrigger(filter); | 235 let parsed = parseFilterRegexpSource(filter.regexpSource); |
| 236 |
| 237 // For the special case of $document whitelisting filters with just a domain |
| 238 // we can generate an equivalent blocking rule exception using if-domain. |
| 239 if (filter instanceof filterClasses.WhitelistFilter && |
| 240 filter.contentType & typeMap.DOCUMENT && |
| 241 parsed.justHostname) |
| 242 { |
| 243 rules.push({ |
| 244 trigger: { |
| 245 "url-filter": ".*", |
| 246 "if-domain": addDomainPrefix([parsed.hostname]) |
| 247 }, |
| 248 action: {type: "ignore-previous-rules"} |
| 249 }); |
| 250 // If the filter contains other supported options we'll need to generate |
| 251 // further rules for it, but if not we can simply return now. |
| 252 if (!(filter.contentType | whitelistableRequestTypes)) |
| 253 return; |
| 254 } |
| 255 |
| 256 let trigger = {"url-filter": parsed.regexp}; |
| 257 |
| 258 // Limit rules to HTTP(S) URLs |
| 259 if (!/^(\^|http)/i.test(trigger["url-filter"])) |
| 260 trigger["url-filter"] = "^https?://.*" + trigger["url-filter"]; |
| 261 |
| 262 // For rules containing only a hostname we know that we're matching against |
| 263 // a lowercase string unless the matchCase option was passed. |
| 264 if (parsed.canSafelyMatchAsLowercase && !filter.matchCase) |
| 265 trigger["url-filter"] = trigger["url-filter"].toLowerCase(); |
| 266 |
| 267 if (parsed.canSafelyMatchAsLowercase || filter.matchCase) |
| 268 trigger["url-filter-is-case-sensitive"] = true; |
| 269 |
| 229 let included = []; | 270 let included = []; |
| 230 let excluded = []; | 271 let excluded = []; |
| 231 | 272 |
| 232 parseDomains(filter.domains, included, excluded); | 273 parseDomains(filter.domains, included, excluded); |
| 233 | 274 |
| 234 if (withResourceTypes) | 275 if (withResourceTypes) |
| 235 trigger["resource-type"] = getResourceTypes(filter); | 276 trigger["resource-type"] = getResourceTypes(filter); |
| 236 if (filter.thirdParty != null) | 277 if (filter.thirdParty != null) |
| 237 trigger["load-type"] = [filter.thirdParty ? "third-party" : "first-party"]; | 278 trigger["load-type"] = [filter.thirdParty ? "third-party" : "first-party"]; |
| 238 | 279 |
| 239 if (included.length > 0) | 280 if (included.length > 0) |
| 240 trigger["if-domain"] = addDomainPrefix(included); | 281 trigger["if-domain"] = addDomainPrefix(included); |
| 241 else if (excluded.length > 0) | 282 else if (excluded.length > 0) |
| 242 trigger["unless-domain"] = addDomainPrefix(excluded); | 283 trigger["unless-domain"] = addDomainPrefix(excluded); |
| 243 | 284 |
| 244 return {trigger: trigger, action: {type: action}}; | 285 rules.push({trigger: trigger, action: {type: action}}); |
| 245 } | 286 } |
| 246 | 287 |
| 247 function hasNonASCI(obj) | 288 function hasNonASCI(obj) |
| 248 { | 289 { |
| 249 if (typeof obj == "string") | 290 if (typeof obj == "string") |
| 250 { | 291 { |
| 251 if (/[^\x00-\x7F]/.test(obj)) | 292 if (/[^\x00-\x7F]/.test(obj)) |
| 252 return true; | 293 return true; |
| 253 } | 294 } |
| 254 | 295 |
| (...skipping 90 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 345 return; | 386 return; |
| 346 if (filter instanceof filterClasses.RegExpFilter && | 387 if (filter instanceof filterClasses.RegExpFilter && |
| 347 filter.regexpSource == null) | 388 filter.regexpSource == null) |
| 348 return; | 389 return; |
| 349 | 390 |
| 350 if (filter instanceof filterClasses.BlockingFilter) | 391 if (filter instanceof filterClasses.BlockingFilter) |
| 351 this.requestFilters.push(filter); | 392 this.requestFilters.push(filter); |
| 352 | 393 |
| 353 if (filter instanceof filterClasses.WhitelistFilter) | 394 if (filter instanceof filterClasses.WhitelistFilter) |
| 354 { | 395 { |
| 355 if (filter.contentType & (typeMap.IMAGE | 396 if (filter.contentType & (typeMap.DOCUMENT | whitelistableRequestTypes)) |
| 356 | typeMap.STYLESHEET | |
| 357 | typeMap.SCRIPT | |
| 358 | typeMap.FONT | |
| 359 | typeMap.MEDIA | |
| 360 | typeMap.POPUP | |
| 361 | typeMap.OBJECT | |
| 362 | typeMap.OBJECT_SUBREQUEST | |
| 363 | typeMap.XMLHTTPREQUEST | |
| 364 | typeMap.PING | |
| 365 | typeMap.SUBDOCUMENT | |
| 366 | typeMap.OTHER)) | |
| 367 this.requestExceptions.push(filter); | 397 this.requestExceptions.push(filter); |
| 368 | 398 |
| 369 if (filter.contentType & typeMap.ELEMHIDE) | 399 if (filter.contentType & typeMap.ELEMHIDE) |
| 370 this.elemhideExceptions.push(filter); | 400 this.elemhideExceptions.push(filter); |
| 371 } | 401 } |
| 372 | 402 |
| 373 if (filter instanceof filterClasses.ElemHideFilter) | 403 if (filter instanceof filterClasses.ElemHideFilter) |
| 374 this.elemhideFilters.push(filter); | 404 this.elemhideFilters.push(filter); |
| 375 | 405 |
| 376 if (filter instanceof filterClasses.ElemHideException) | 406 if (filter instanceof filterClasses.ElemHideException) |
| 377 { | 407 { |
| 378 let domains = this.elemhideSelectorExceptions[filter.selector]; | 408 let domains = this.elemhideSelectorExceptions[filter.selector]; |
| 379 if (!domains) | 409 if (!domains) |
| 380 domains = this.elemhideSelectorExceptions[filter.selector] = []; | 410 domains = this.elemhideSelectorExceptions[filter.selector] = []; |
| 381 | 411 |
| 382 parseDomains(filter.domains, domains, []); | 412 parseDomains(filter.domains, domains, []); |
| 383 } | 413 } |
| 384 }; | 414 }; |
| 385 | 415 |
| 386 /** | 416 /** |
| 387 * Generate content blocker list for all filters that were added | 417 * Generate content blocker list for all filters that were added |
| 388 * | 418 * |
| 389 * @returns {Filter} filter Filter to convert | 419 * @returns {Filter} filter Filter to convert |
| 390 */ | 420 */ |
| 391 ContentBlockerList.prototype.generateRules = function(filter) | 421 ContentBlockerList.prototype.generateRules = function(filter) |
| 392 { | 422 { |
| 393 let rules = []; | 423 let rules = []; |
| 394 | 424 |
| 395 function addRule(rule) | |
| 396 { | |
| 397 if (!hasNonASCI(rule)) | |
| 398 rules.push(rule); | |
| 399 } | |
| 400 | |
| 401 let groupedElemhideFilters = new Map(); | 425 let groupedElemhideFilters = new Map(); |
| 402 for (let filter of this.elemhideFilters) | 426 for (let filter of this.elemhideFilters) |
| 403 { | 427 { |
| 404 let result = convertElemHideFilter(filter, this.elemhideSelectorExceptions); | 428 let result = convertElemHideFilter(filter, this.elemhideSelectorExceptions); |
| 405 if (!result) | 429 if (!result) |
| 406 continue; | 430 continue; |
| 407 | 431 |
| 408 if (result.matchDomains.length == 0) | 432 if (result.matchDomains.length == 0) |
| 409 result.matchDomains = ["^https?://"]; | 433 result.matchDomains = ["^https?://"]; |
| 410 | 434 |
| 411 for (let matchDomain of result.matchDomains) | 435 for (let matchDomain of result.matchDomains) |
| 412 { | 436 { |
| 413 let group = groupedElemhideFilters.get(matchDomain) || []; | 437 let group = groupedElemhideFilters.get(matchDomain) || []; |
| 414 group.push(result.selector); | 438 group.push(result.selector); |
| 415 groupedElemhideFilters.set(matchDomain, group); | 439 groupedElemhideFilters.set(matchDomain, group); |
| 416 } | 440 } |
| 417 } | 441 } |
| 418 | 442 |
| 419 groupedElemhideFilters.forEach((selectors, matchDomain) => | 443 groupedElemhideFilters.forEach((selectors, matchDomain) => |
| 420 { | 444 { |
| 421 while (selectors.length) | 445 while (selectors.length) |
| 422 { | 446 { |
| 423 let selector = selectors.splice(0, selectorLimit).join(", "); | 447 let selector = selectors.splice(0, selectorLimit).join(", "); |
| 424 | 448 |
| 425 // As of Safari 9.0 element IDs are matched as lowercase. We work around | 449 // As of Safari 9.0 element IDs are matched as lowercase. We work around |
| 426 // this by converting to the attribute format [id="elementID"] | 450 // this by converting to the attribute format [id="elementID"] |
| 427 selector = convertIDSelectorsToAttributeSelectors(selector); | 451 selector = convertIDSelectorsToAttributeSelectors(selector); |
| 428 | 452 |
| 429 addRule({ | 453 rules.push({ |
| 430 trigger: {"url-filter": matchDomain, | 454 trigger: {"url-filter": matchDomain, |
| 431 "url-filter-is-case-sensitive": true}, | 455 "url-filter-is-case-sensitive": true}, |
| 432 action: {type: "css-display-none", | 456 action: {type: "css-display-none", |
| 433 selector: selector} | 457 selector: selector} |
| 434 }); | 458 }); |
| 435 } | 459 } |
| 436 }); | 460 }); |
| 437 | 461 |
| 438 for (let filter of this.elemhideExceptions) | 462 for (let filter of this.elemhideExceptions) |
| 439 addRule(convertFilter(filter, "ignore-previous-rules", false)); | 463 convertFilterAddRules(rules, filter, "ignore-previous-rules", false); |
| 440 for (let filter of this.requestFilters) | 464 for (let filter of this.requestFilters) |
| 441 addRule(convertFilter(filter, "block", true)); | 465 convertFilterAddRules(rules, filter, "block", true); |
| 442 for (let filter of this.requestExceptions) | 466 for (let filter of this.requestExceptions) |
| 443 addRule(convertFilter(filter, "ignore-previous-rules", true)); | 467 convertFilterAddRules(rules, filter, "ignore-previous-rules", true); |
| 444 | 468 |
| 445 return rules; | 469 return rules.filter(rule => !hasNonASCI(rule)); |
| 446 }; | 470 }; |
| OLD | NEW |