| Left: | ||
| Right: |
| LEFT | RIGHT |
|---|---|
| 1 /* | 1 /* |
| 2 * This file is part of Adblock Plus <https://adblockplus.org/>, | 2 * This file is part of Adblock Plus <https://adblockplus.org/>, |
| 3 * Copyright (C) 2006-2017 eyeo GmbH | 3 * Copyright (C) 2006-2017 eyeo GmbH |
| 4 * | 4 * |
| 5 * Adblock Plus is free software: you can redistribute it and/or modify | 5 * Adblock Plus is free software: you can redistribute it and/or modify |
| 6 * it under the terms of the GNU General Public License version 3 as | 6 * it under the terms of the GNU General Public License version 3 as |
| 7 * published by the Free Software Foundation. | 7 * published by the Free Software Foundation. |
| 8 * | 8 * |
| 9 * Adblock Plus is distributed in the hope that it will be useful, | 9 * Adblock Plus is distributed in the hope that it will be useful, |
| 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of | 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| (...skipping 58 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 69 let subdomains = []; | 69 let subdomains = []; |
| 70 let suffixLength = domain.length + 1; | 70 let suffixLength = domain.length + 1; |
| 71 | 71 |
| 72 for (let name of list) | 72 for (let name of list) |
| 73 { | 73 { |
| 74 if (name.length > suffixLength && name.slice(-suffixLength) == "." + domain) | 74 if (name.length > suffixLength && name.slice(-suffixLength) == "." + domain) |
| 75 subdomains.push(name.slice(0, -suffixLength)); | 75 subdomains.push(name.slice(0, -suffixLength)); |
| 76 } | 76 } |
| 77 | 77 |
| 78 return subdomains; | 78 return subdomains; |
| 79 } | |
| 80 | |
| 81 function extractFilterDomains(filters) | |
| 82 { | |
| 83 let domains = new Set(); | |
| 84 for (let filter of filters) | |
| 85 { | |
| 86 let parsed = parseFilterRegexpSource(filter.regexpSource); | |
| 87 if (parsed.justHostname) | |
| 88 domains.add(parsed.hostname); | |
| 89 } | |
| 90 return domains; | |
| 79 } | 91 } |
| 80 | 92 |
| 81 function convertElemHideFilter(filter, elemhideSelectorExceptions) | 93 function convertElemHideFilter(filter, elemhideSelectorExceptions) |
| 82 { | 94 { |
| 83 let included = []; | 95 let included = []; |
| 84 let excluded = []; | 96 let excluded = []; |
| 85 let rules = []; | 97 let rules = []; |
| 86 | 98 |
| 87 parseDomains(filter.domains, included, excluded); | 99 parseDomains(filter.domains, included, excluded); |
| 88 | 100 |
| (...skipping 13 matching lines...) Expand all Loading... | |
| 102 * case, a hostname string (or undefined) and a bool | 114 * case, a hostname string (or undefined) and a bool |
| 103 * indicating if the source only contains a hostname or not: | 115 * indicating if the source only contains a hostname or not: |
| 104 * {regexp: "...", | 116 * {regexp: "...", |
| 105 * canSafelyMatchAsLowercase: true/false, | 117 * canSafelyMatchAsLowercase: true/false, |
| 106 * hostname: "...", | 118 * hostname: "...", |
| 107 * justHostname: true/false} | 119 * justHostname: true/false} |
| 108 */ | 120 */ |
| 109 function parseFilterRegexpSource(text) | 121 function parseFilterRegexpSource(text) |
| 110 { | 122 { |
| 111 let regexp = []; | 123 let regexp = []; |
| 112 let lastIndex = text.length - 1; | 124 |
| 125 // Convert the text into an array of Unicode characters. | |
| 126 // | |
| 127 // In the case of surrogate pairs (the smiley emoji, for example), one | |
| 128 // Unicode code point is represented by two JavaScript characters together. | |
| 129 // We want to iterate over Unicode code points rather than JavaScript | |
| 130 // characters. | |
| 131 let characters = Array.from(text); | |
| 132 | |
| 133 let lastIndex = characters.length - 1; | |
| 113 let hostname; | 134 let hostname; |
| 114 let hostnameStart = null; | 135 let hostnameStart = null; |
| 115 let hostnameFinished = false; | 136 let hostnameFinished = false; |
| 116 let justHostname = false; | 137 let justHostname = false; |
| 117 let canSafelyMatchAsLowercase = false; | 138 let canSafelyMatchAsLowercase = false; |
| 118 | 139 |
| 119 for (let i = 0; i < text.length; i++) | 140 for (let i = 0; i < characters.length; i++) |
| 120 { | 141 { |
| 121 let c = text[i]; | 142 let c = characters[i]; |
| 122 | 143 |
| 123 if (hostnameFinished) | 144 if (hostnameFinished) |
| 124 justHostname = false; | 145 justHostname = false; |
| 125 | 146 |
| 126 // If we're currently inside the hostname we have to be careful not to | 147 // If we're currently inside the hostname we have to be careful not to |
| 127 // escape any characters until after we have converted it to punycode. | 148 // escape any characters until after we have converted it to punycode. |
| 128 if (hostnameStart != null && !hostnameFinished) | 149 if (hostnameStart != null && !hostnameFinished) |
| 129 { | 150 { |
| 130 let endingChar = (c == "*" || c == "^" || | 151 let endingChar = (c == "*" || c == "^" || |
| 131 c == "?" || c == "/" || c == "|"); | 152 c == "?" || c == "/" || c == "|"); |
| 132 if (!endingChar && i != lastIndex) | 153 if (!endingChar && i != lastIndex) |
| 133 continue; | 154 continue; |
| 134 | 155 |
| 135 hostname = punycode.toASCII( | 156 hostname = punycode.toASCII( |
| 136 text.substring(hostnameStart, endingChar ? i : i + 1) | 157 characters.slice(hostnameStart, endingChar ? i : i + 1).join("") |
| 158 .toLowerCase() | |
| 137 ); | 159 ); |
| 138 hostnameFinished = justHostname = true; | 160 hostnameFinished = justHostname = true; |
| 139 regexp.push(escapeRegExp(hostname)); | 161 regexp.push(escapeRegExp(hostname)); |
| 140 if (!endingChar) | 162 if (!endingChar) |
| 141 break; | 163 break; |
| 142 } | 164 } |
| 143 | 165 |
| 144 switch (c) | 166 switch (c) |
| 145 { | 167 { |
| 146 case "*": | 168 case "*": |
| 147 if (regexp.length > 0 && i < lastIndex && text[i + 1] != "*") | 169 if (regexp.length > 0 && i < lastIndex && characters[i + 1] != "*") |
| 148 regexp.push(".*"); | 170 regexp.push(".*"); |
| 149 break; | 171 break; |
| 150 case "^": | 172 case "^": |
| 151 if (i < lastIndex) | 173 let alphabet = "a-z"; |
| 152 regexp.push("[^.%A-Za-z0-9_]"); | 174 // If justHostname is true and we've encountered a "^", it means we're |
| 175 // still in the hostname part of the URL. Since hostnames are always | |
| 176 // lower case (Punycode), there's no need to include "A-Z" in the | |
| 177 // pattern. Further, subsequent code may lower-case the entire regular | |
| 178 // expression (if the URL contains only the hostname part), leaving us | |
| 179 // with "a-za-z", which would be redundant. | |
| 180 if (!justHostname) | |
| 181 alphabet = "A-Z" + alphabet; | |
| 182 let digits = "0-9"; | |
| 183 // Note that the "-" must appear first here in order to retain its | |
| 184 // literal meaning within the brackets. | |
| 185 let specialCharacters = "-_.%"; | |
| 186 let separator = "[^" + specialCharacters + alphabet + digits + "]"; | |
| 187 if (i == 0) | |
| 188 regexp.push("^https?://(.*" + separator + ")?"); | |
| 189 else if (i == lastIndex) | |
| 190 regexp.push("(" + separator + ".*)?$"); | |
| 153 else | 191 else |
| 154 regexp.push("([^.%A-Za-z0-9_].*)?$"); | 192 regexp.push(separator); |
|
Sebastian Noack
2017/06/16 21:13:18
Can you put the duplicated part of the regexp in a
Manish Jethani
2017/06/19 10:39:54
Done.
| |
| 155 canSafelyMatchAsLowercase = false; | |
|
Sebastian Noack
2017/06/16 21:13:18
Why is that necessary?
Manish Jethani
2017/06/19 10:39:54
It was converting "A-Z" into "a-z". I thought abou
| |
| 156 break; | 193 break; |
| 157 case "|": | 194 case "|": |
| 158 if (i == 0) | 195 if (i == 0) |
| 159 { | 196 { |
| 160 regexp.push("^"); | 197 regexp.push("^"); |
| 161 break; | 198 break; |
| 162 } | 199 } |
| 163 if (i == lastIndex) | 200 if (i == lastIndex) |
| 164 { | 201 { |
| 165 regexp.push("$"); | 202 regexp.push("$"); |
| 166 break; | 203 break; |
| 167 } | 204 } |
| 168 if (i == 1 && text[0] == "|") | 205 if (i == 1 && characters[0] == "|") |
| 169 { | 206 { |
| 170 hostnameStart = i + 1; | 207 hostnameStart = i + 1; |
| 171 canSafelyMatchAsLowercase = true; | 208 canSafelyMatchAsLowercase = true; |
| 172 regexp.push("https?://([^/]+\\.)?"); | 209 regexp.push("https?://([^/]+\\.)?"); |
| 173 break; | 210 break; |
| 174 } | 211 } |
| 175 regexp.push("\\|"); | 212 regexp.push("\\|"); |
| 176 break; | 213 break; |
| 177 case "/": | 214 case "/": |
| 178 if (!hostnameFinished && | 215 if (!hostnameFinished && |
| 179 text.charAt(i-2) == ":" && text.charAt(i-1) == "/") | 216 characters[i - 2] == ":" && characters[i - 1] == "/") |
| 180 { | 217 { |
| 181 hostnameStart = i + 1; | 218 hostnameStart = i + 1; |
| 182 canSafelyMatchAsLowercase = true; | 219 canSafelyMatchAsLowercase = true; |
| 183 } | 220 } |
| 184 regexp.push("/"); | 221 regexp.push("/"); |
| 185 break; | 222 break; |
| 186 case ".": case "+": case "$": case "?": | 223 case ".": case "+": case "$": case "?": |
| 187 case "{": case "}": case "(": case ")": | 224 case "{": case "}": case "(": case ")": |
| 188 case "[": case "]": case "\\": | 225 case "[": case "]": case "\\": |
| 189 regexp.push("\\", c); | 226 regexp.push("\\", c); |
| 190 break; | 227 break; |
| 191 default: | 228 default: |
| 192 if (hostnameFinished && (c >= "a" && c <= "z" || | 229 if (hostnameFinished && (c >= "a" && c <= "z" || |
| 193 c >= "A" && c <= "Z")) | 230 c >= "A" && c <= "Z")) |
| 194 canSafelyMatchAsLowercase = false; | 231 canSafelyMatchAsLowercase = false; |
| 195 regexp.push(c); | 232 regexp.push(c == "%" ? c : encodeURI(c)); |
| 196 } | 233 } |
| 197 } | 234 } |
| 198 | 235 |
| 199 return { | 236 return { |
| 200 regexp: regexp.join(""), | 237 regexp: regexp.join(""), |
| 201 canSafelyMatchAsLowercase: canSafelyMatchAsLowercase, | 238 canSafelyMatchAsLowercase: canSafelyMatchAsLowercase, |
| 202 hostname: hostname, | 239 hostname: hostname, |
| 203 justHostname: justHostname | 240 justHostname: justHostname |
| 204 }; | 241 }; |
| 205 } | 242 } |
| (...skipping 66 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 272 let included = []; | 309 let included = []; |
| 273 let excluded = []; | 310 let excluded = []; |
| 274 | 311 |
| 275 parseDomains(filter.domains, included, excluded); | 312 parseDomains(filter.domains, included, excluded); |
| 276 | 313 |
| 277 if (exceptionDomains) | 314 if (exceptionDomains) |
| 278 excluded = excluded.concat(exceptionDomains); | 315 excluded = excluded.concat(exceptionDomains); |
| 279 | 316 |
| 280 if (withResourceTypes) | 317 if (withResourceTypes) |
| 281 { | 318 { |
| 282 trigger["resource-type"] = getResourceTypes(filter); | 319 let resourceTypes = getResourceTypes(filter); |
| 283 | 320 |
| 284 if (trigger["resource-type"].length == 0) | 321 // Content blocker rules can't differentiate between sub-document requests |
| 322 // (iframes) and top-level document requests. To avoid too many false | |
| 323 // positives, we prevent rules with no hostname part from blocking document | |
| 324 // requests. | |
| 325 // | |
| 326 // Once Safari 11 becomes our minimum supported version, we could change | |
| 327 // our approach here to use the new "unless-top-url" property instead. | |
| 328 if (filter instanceof filterClasses.BlockingFilter && !parsed.hostname) | |
| 329 resourceTypes = resourceTypes.filter(type => type != "document"); | |
| 330 | |
| 331 if (resourceTypes.length == 0) | |
| 285 return; | 332 return; |
| 333 | |
| 334 trigger["resource-type"] = resourceTypes; | |
| 286 } | 335 } |
| 287 | 336 |
| 288 if (filter.thirdParty != null) | 337 if (filter.thirdParty != null) |
| 289 trigger["load-type"] = [filter.thirdParty ? "third-party" : "first-party"]; | 338 trigger["load-type"] = [filter.thirdParty ? "third-party" : "first-party"]; |
| 290 | 339 |
| 291 if (included.length > 0) | 340 if (included.length > 0) |
| 292 { | 341 { |
| 293 trigger["if-domain"] = []; | 342 trigger["if-domain"] = []; |
| 294 | 343 |
| 295 for (let name of included) | 344 for (let name of included) |
| (...skipping 15 matching lines...) Expand all Loading... | |
| 311 { | 360 { |
| 312 trigger["if-domain"].push("*" + name); | 361 trigger["if-domain"].push("*" + name); |
| 313 } | 362 } |
| 314 } | 363 } |
| 315 } | 364 } |
| 316 else if (excluded.length > 0) | 365 else if (excluded.length > 0) |
| 317 { | 366 { |
| 318 trigger["unless-domain"] = excluded.map(name => "*" + name); | 367 trigger["unless-domain"] = excluded.map(name => "*" + name); |
| 319 } | 368 } |
| 320 else if (filter instanceof filterClasses.BlockingFilter && | 369 else if (filter instanceof filterClasses.BlockingFilter && |
| 321 filter.contentType & typeMap.SUBDOCUMENT) | 370 filter.contentType & typeMap.SUBDOCUMENT && parsed.hostname) |
| 322 { | 371 { |
| 372 // Rules with a hostname part are still allowed to block document requests, | |
| 373 // but we add an exception for top-level documents. | |
| 374 // | |
| 375 // Note that we can only do this if there's no "unless-domain" property for | |
| 376 // now. This also only works in Safari 11 onwards, while older versions | |
| 377 // simply ignore this property. Once Safari 11 becomes our minimum | |
| 378 // supported version, we can merge "unless-domain" into "unless-top-url". | |
| 323 trigger["unless-top-url"] = [trigger["url-filter"]]; | 379 trigger["unless-top-url"] = [trigger["url-filter"]]; |
| 324 if (trigger["url-filter-is-case-sensitive"]) | 380 if (trigger["url-filter-is-case-sensitive"]) |
| 325 trigger["top-url-filter-is-case-sensitive"] = true; | 381 trigger["top-url-filter-is-case-sensitive"] = true; |
| 326 } | 382 } |
| 327 | 383 |
| 328 rules.push({trigger: trigger, action: {type: action}}); | 384 rules.push({trigger: trigger, action: {type: action}}); |
| 329 } | |
| 330 | |
| 331 function hasNonASCI(obj) | |
| 332 { | |
| 333 if (typeof obj == "string") | |
| 334 { | |
| 335 if (/[^\x00-\x7F]/.test(obj)) | |
| 336 return true; | |
| 337 } | |
| 338 | |
| 339 if (typeof obj == "object") | |
| 340 { | |
| 341 if (obj instanceof Array) | |
| 342 for (let item of obj) | |
| 343 if (hasNonASCI(item)) | |
| 344 return true; | |
| 345 | |
| 346 let names = Object.getOwnPropertyNames(obj); | |
| 347 for (let name of names) | |
| 348 if (hasNonASCI(obj[name])) | |
| 349 return true; | |
| 350 } | |
| 351 | |
| 352 return false; | |
| 353 } | 385 } |
| 354 | 386 |
| 355 function convertIDSelectorsToAttributeSelectors(selector) | 387 function convertIDSelectorsToAttributeSelectors(selector) |
| 356 { | 388 { |
| 357 // First we figure out where all the IDs are | 389 // First we figure out where all the IDs are |
| 358 let sep = ""; | 390 let sep = ""; |
| 359 let start = null; | 391 let start = null; |
| 360 let positions = []; | 392 let positions = []; |
| 361 for (let i = 0; i < selector.length; i++) | 393 for (let i = 0; i < selector.length; i++) |
| 362 { | 394 { |
| (...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 396 { | 428 { |
| 397 newSelector.push(selector.substring(i, pos.start)); | 429 newSelector.push(selector.substring(i, pos.start)); |
| 398 newSelector.push('[id=', selector.substring(pos.start + 1, pos.end), ']'); | 430 newSelector.push('[id=', selector.substring(pos.start + 1, pos.end), ']'); |
| 399 i = pos.end; | 431 i = pos.end; |
| 400 } | 432 } |
| 401 newSelector.push(selector.substring(i)); | 433 newSelector.push(selector.substring(i)); |
| 402 | 434 |
| 403 return newSelector.join(""); | 435 return newSelector.join(""); |
| 404 } | 436 } |
| 405 | 437 |
| 406 function addCSSRules(rules, selectors, matchDomain) | 438 function addCSSRules(rules, selectors, matchDomain, exceptionDomains) |
| 407 { | 439 { |
| 440 let unlessDomain = exceptionDomains.size > 0 ? [] : null; | |
| 441 | |
| 442 exceptionDomains.forEach(name => unlessDomain.push("*" + name)); | |
| 443 | |
| 408 while (selectors.length) | 444 while (selectors.length) |
| 409 { | 445 { |
| 410 let selector = selectors.splice(0, selectorLimit).join(", "); | 446 let selector = selectors.splice(0, selectorLimit).join(", "); |
| 411 | 447 |
| 412 // As of Safari 9.0 element IDs are matched as lowercase. We work around | 448 // As of Safari 9.0 element IDs are matched as lowercase. We work around |
| 413 // this by converting to the attribute format [id="elementID"] | 449 // this by converting to the attribute format [id="elementID"] |
| 414 selector = convertIDSelectorsToAttributeSelectors(selector); | 450 selector = convertIDSelectorsToAttributeSelectors(selector); |
| 415 | 451 |
| 416 rules.push({ | 452 let rule = { |
| 417 trigger: {"url-filter": matchDomain, | 453 trigger: {"url-filter": matchDomain, |
| 418 "url-filter-is-case-sensitive": true}, | 454 "url-filter-is-case-sensitive": true}, |
| 419 action: {type: "css-display-none", | 455 action: {type: "css-display-none", |
| 420 selector: selector} | 456 selector: selector} |
| 421 }); | 457 }; |
| 458 | |
| 459 if (unlessDomain) | |
| 460 rule.trigger["unless-domain"] = unlessDomain; | |
| 461 | |
| 462 rules.push(rule); | |
| 422 } | 463 } |
| 423 } | 464 } |
| 424 | 465 |
| 425 let ContentBlockerList = | 466 let ContentBlockerList = |
| 426 /** | 467 /** |
| 427 * Create a new Adblock Plus filter to content blocker list converter | 468 * Create a new Adblock Plus filter to content blocker list converter |
| 428 * | 469 * |
| 429 * @constructor | 470 * @constructor |
| 430 */ | 471 */ |
| 431 exports.ContentBlockerList = function () | 472 exports.ContentBlockerList = function () |
| (...skipping 76 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 508 { | 549 { |
| 509 for (let matchDomain of result.matchDomains) | 550 for (let matchDomain of result.matchDomains) |
| 510 { | 551 { |
| 511 let group = groupedElemhideFilters.get(matchDomain) || []; | 552 let group = groupedElemhideFilters.get(matchDomain) || []; |
| 512 group.push(result.selector); | 553 group.push(result.selector); |
| 513 groupedElemhideFilters.set(matchDomain, group); | 554 groupedElemhideFilters.set(matchDomain, group); |
| 514 } | 555 } |
| 515 } | 556 } |
| 516 } | 557 } |
| 517 | 558 |
| 518 addCSSRules(rules, genericSelectors, "^https?://"); | 559 // Separate out the element hiding exceptions that have only a hostname part |
| 519 | 560 // from the rest. This allows us to implement a workaround for issue #5345 |
| 520 // Right after the generic element hiding filters, add the exceptions that | 561 // (WebKit bug #167423), but as a bonus it also reduces the number of |
| 521 // should apply only to those filters. | 562 // generated rules. The downside is that the exception will only apply to the |
| 522 for (let filter of this.generichideExceptions) | 563 // top-level document, not to iframes. We have to live with this until the |
| 523 convertFilterAddRules(rules, filter, "ignore-previous-rules", false); | 564 // WebKit bug is fixed in all supported versions of Safari. |
| 565 // https://bugs.webkit.org/show_bug.cgi?id=167423 | |
| 566 // | |
| 567 // Note that as a result of this workaround we end up with a huge rule set in | |
| 568 // terms of the amount of memory used. This can cause Node.js to throw | |
| 569 // "JavaScript heap out of memory". To avoid this, call Node.js with | |
| 570 // --max_old_space_size=4096 | |
| 571 let elemhideExceptionDomains = extractFilterDomains(this.elemhideExceptions); | |
| 572 | |
| 573 let genericSelectorExceptionDomains = | |
| 574 extractFilterDomains(this.generichideExceptions); | |
| 575 elemhideExceptionDomains.forEach(name => | |
| 576 { | |
| 577 genericSelectorExceptionDomains.add(name); | |
| 578 }); | |
| 579 | |
| 580 addCSSRules(rules, genericSelectors, "^https?://", | |
| 581 genericSelectorExceptionDomains); | |
| 524 | 582 |
| 525 groupedElemhideFilters.forEach((selectors, matchDomain) => | 583 groupedElemhideFilters.forEach((selectors, matchDomain) => |
| 526 { | 584 { |
| 527 addCSSRules(rules, selectors, matchDomain); | 585 addCSSRules(rules, selectors, matchDomain, elemhideExceptionDomains); |
| 528 }); | 586 }); |
| 529 | |
| 530 for (let filter of this.elemhideExceptions) | |
| 531 convertFilterAddRules(rules, filter, "ignore-previous-rules", false); | |
| 532 | 587 |
| 533 let requestFilterExceptionDomains = []; | 588 let requestFilterExceptionDomains = []; |
| 534 for (let filter of this.genericblockExceptions) | 589 for (let filter of this.genericblockExceptions) |
| 535 { | 590 { |
| 536 let parsed = parseFilterRegexpSource(filter.regexpSource); | 591 let parsed = parseFilterRegexpSource(filter.regexpSource); |
| 537 if (parsed.hostname) | 592 if (parsed.hostname) |
| 538 requestFilterExceptionDomains.push(parsed.hostname); | 593 requestFilterExceptionDomains.push(parsed.hostname); |
| 539 } | 594 } |
| 540 | 595 |
| 541 for (let filter of this.requestFilters) | 596 for (let filter of this.requestFilters) |
| 542 { | 597 { |
| 543 convertFilterAddRules(rules, filter, "block", true, | 598 convertFilterAddRules(rules, filter, "block", true, |
| 544 requestFilterExceptionDomains); | 599 requestFilterExceptionDomains); |
| 545 } | 600 } |
| 546 | 601 |
| 547 for (let filter of this.requestExceptions) | 602 for (let filter of this.requestExceptions) |
| 548 convertFilterAddRules(rules, filter, "ignore-previous-rules", true); | 603 convertFilterAddRules(rules, filter, "ignore-previous-rules", true); |
| 549 | 604 |
| 550 return rules.filter(rule => !hasNonASCI(rule)); | 605 return rules; |
| 551 }; | 606 }; |
| LEFT | RIGHT |