| Left: | ||
| Right: |
| LEFT | RIGHT |
|---|---|
| 1 /* | 1 /* |
| 2 * This file is part of Adblock Plus <https://adblockplus.org/>, | 2 * This file is part of Adblock Plus <https://adblockplus.org/>, |
| 3 * Copyright (C) 2006-2017 eyeo GmbH | 3 * Copyright (C) 2006-2017 eyeo GmbH |
| 4 * | 4 * |
| 5 * Adblock Plus is free software: you can redistribute it and/or modify | 5 * Adblock Plus is free software: you can redistribute it and/or modify |
| 6 * it under the terms of the GNU General Public License version 3 as | 6 * it under the terms of the GNU General Public License version 3 as |
| 7 * published by the Free Software Foundation. | 7 * published by the Free Software Foundation. |
| 8 * | 8 * |
| 9 * Adblock Plus is distributed in the hope that it will be useful, | 9 * Adblock Plus is distributed in the hope that it will be useful, |
| 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of | 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| (...skipping 103 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 114 * case, a hostname string (or undefined) and a bool | 114 * case, a hostname string (or undefined) and a bool |
| 115 * indicating if the source only contains a hostname or not: | 115 * indicating if the source only contains a hostname or not: |
| 116 * {regexp: "...", | 116 * {regexp: "...", |
| 117 * canSafelyMatchAsLowercase: true/false, | 117 * canSafelyMatchAsLowercase: true/false, |
| 118 * hostname: "...", | 118 * hostname: "...", |
| 119 * justHostname: true/false} | 119 * justHostname: true/false} |
| 120 */ | 120 */ |
| 121 function parseFilterRegexpSource(text) | 121 function parseFilterRegexpSource(text) |
| 122 { | 122 { |
| 123 let regexp = []; | 123 let regexp = []; |
| 124 let lastIndex = text.length - 1; | 124 |
| 125 // Convert the text into an array of Unicode characters. | |
| 126 // | |
| 127 // In the case of surrogate pairs (the smiley emoji, for example), one | |
| 128 // Unicode code point is represented by two JavaScript characters together. | |
| 129 // We want to iterate over Unicode code points rather than JavaScript | |
| 130 // characters. | |
| 131 let characters = Array.from(text); | |
| 132 | |
| 133 let lastIndex = characters.length - 1; | |
| 125 let hostname; | 134 let hostname; |
| 126 let hostnameStart = null; | 135 let hostnameStart = null; |
| 127 let hostnameFinished = false; | 136 let hostnameFinished = false; |
| 128 let justHostname = false; | 137 let justHostname = false; |
| 129 let canSafelyMatchAsLowercase = false; | 138 let canSafelyMatchAsLowercase = false; |
| 130 | 139 |
| 131 for (let i = 0; i < text.length; i++) | 140 for (let i = 0; i < characters.length; i++) |
| 132 { | 141 { |
| 133 let c = text[i]; | 142 let c = characters[i]; |
| 134 | 143 |
| 135 if (hostnameFinished) | 144 if (hostnameFinished) |
| 136 justHostname = false; | 145 justHostname = false; |
| 137 | 146 |
| 138 // If we're currently inside the hostname we have to be careful not to | 147 // If we're currently inside the hostname we have to be careful not to |
| 139 // escape any characters until after we have converted it to punycode. | 148 // escape any characters until after we have converted it to punycode. |
| 140 if (hostnameStart != null && !hostnameFinished) | 149 if (hostnameStart != null && !hostnameFinished) |
| 141 { | 150 { |
| 142 let endingChar = (c == "*" || c == "^" || | 151 let endingChar = (c == "*" || c == "^" || |
| 143 c == "?" || c == "/" || c == "|"); | 152 c == "?" || c == "/" || c == "|"); |
| 144 if (!endingChar && i != lastIndex) | 153 if (!endingChar && i != lastIndex) |
| 145 continue; | 154 continue; |
| 146 | 155 |
| 147 hostname = punycode.toASCII( | 156 hostname = punycode.toASCII( |
| 148 text.substring(hostnameStart, endingChar ? i : i + 1).toLowerCase() | 157 characters.slice(hostnameStart, endingChar ? i : i + 1).join("") |
|
Manish Jethani
2017/07/12 08:59:56
This was the only conflict during rebase, since te
| |
| 158 .toLowerCase() | |
| 149 ); | 159 ); |
| 150 hostnameFinished = justHostname = true; | 160 hostnameFinished = justHostname = true; |
| 151 regexp.push(escapeRegExp(hostname)); | 161 regexp.push(escapeRegExp(hostname)); |
| 152 if (!endingChar) | 162 if (!endingChar) |
| 153 break; | 163 break; |
| 154 } | 164 } |
| 155 | 165 |
| 156 switch (c) | 166 switch (c) |
| 157 { | 167 { |
| 158 case "*": | 168 case "*": |
| 159 if (regexp.length > 0 && i < lastIndex && text[i + 1] != "*") | 169 if (regexp.length > 0 && i < lastIndex && characters[i + 1] != "*") |
| 160 regexp.push(".*"); | 170 regexp.push(".*"); |
| 161 break; | 171 break; |
| 162 case "^": | 172 case "^": |
| 163 if (i < lastIndex) | 173 if (i < lastIndex) |
| 164 regexp.push("."); | 174 regexp.push("."); |
| 165 break; | 175 break; |
| 166 case "|": | 176 case "|": |
| 167 if (i == 0) | 177 if (i == 0) |
| 168 { | 178 { |
| 169 regexp.push("^"); | 179 regexp.push("^"); |
| 170 break; | 180 break; |
| 171 } | 181 } |
| 172 if (i == lastIndex) | 182 if (i == lastIndex) |
| 173 { | 183 { |
| 174 regexp.push("$"); | 184 regexp.push("$"); |
| 175 break; | 185 break; |
| 176 } | 186 } |
| 177 if (i == 1 && text[0] == "|") | 187 if (i == 1 && characters[0] == "|") |
| 178 { | 188 { |
| 179 hostnameStart = i + 1; | 189 hostnameStart = i + 1; |
| 180 canSafelyMatchAsLowercase = true; | 190 canSafelyMatchAsLowercase = true; |
| 181 regexp.push("https?://([^/]+\\.)?"); | 191 regexp.push("https?://([^/]+\\.)?"); |
| 182 break; | 192 break; |
| 183 } | 193 } |
| 184 regexp.push("\\|"); | 194 regexp.push("\\|"); |
| 185 break; | 195 break; |
| 186 case "/": | 196 case "/": |
| 187 if (!hostnameFinished && | 197 if (!hostnameFinished && |
| 188 text.charAt(i-2) == ":" && text.charAt(i-1) == "/") | 198 characters[i - 2] == ":" && characters[i - 1] == "/") |
| 189 { | 199 { |
| 190 hostnameStart = i + 1; | 200 hostnameStart = i + 1; |
| 191 canSafelyMatchAsLowercase = true; | 201 canSafelyMatchAsLowercase = true; |
| 192 } | 202 } |
| 193 regexp.push("/"); | 203 regexp.push("/"); |
| 194 break; | 204 break; |
| 195 case ".": case "+": case "$": case "?": | 205 case ".": case "+": case "$": case "?": |
| 196 case "{": case "}": case "(": case ")": | 206 case "{": case "}": case "(": case ")": |
| 197 case "[": case "]": case "\\": | 207 case "[": case "]": case "\\": |
| 198 regexp.push("\\", c); | 208 regexp.push("\\", c); |
| 199 break; | 209 break; |
| 200 default: | 210 default: |
| 201 if (hostnameFinished && (c >= "a" && c <= "z" || | 211 if (hostnameFinished && (c >= "a" && c <= "z" || |
| 202 c >= "A" && c <= "Z")) | 212 c >= "A" && c <= "Z")) |
| 203 canSafelyMatchAsLowercase = false; | 213 canSafelyMatchAsLowercase = false; |
| 204 regexp.push(c); | 214 regexp.push(c == "%" ? c : encodeURI(c)); |
| 205 } | 215 } |
| 206 } | 216 } |
| 207 | 217 |
| 208 return { | 218 return { |
| 209 regexp: regexp.join(""), | 219 regexp: regexp.join(""), |
| 210 canSafelyMatchAsLowercase: canSafelyMatchAsLowercase, | 220 canSafelyMatchAsLowercase: canSafelyMatchAsLowercase, |
| 211 hostname: hostname, | 221 hostname: hostname, |
| 212 justHostname: justHostname | 222 justHostname: justHostname |
| 213 }; | 223 }; |
| 214 } | 224 } |
| (...skipping 66 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 281 let included = []; | 291 let included = []; |
| 282 let excluded = []; | 292 let excluded = []; |
| 283 | 293 |
| 284 parseDomains(filter.domains, included, excluded); | 294 parseDomains(filter.domains, included, excluded); |
| 285 | 295 |
| 286 if (exceptionDomains) | 296 if (exceptionDomains) |
| 287 excluded = excluded.concat(exceptionDomains); | 297 excluded = excluded.concat(exceptionDomains); |
| 288 | 298 |
| 289 if (withResourceTypes) | 299 if (withResourceTypes) |
| 290 { | 300 { |
| 291 trigger["resource-type"] = getResourceTypes(filter); | 301 let resourceTypes = getResourceTypes(filter); |
| 292 | 302 |
| 293 if (trigger["resource-type"].length == 0) | 303 // Content blocker rules can't differentiate between sub-document requests |
| 304 // (iframes) and top-level document requests. To avoid too many false | |
| 305 // positives, we prevent rules with no hostname part from blocking document | |
| 306 // requests. | |
| 307 // | |
| 308 // Once Safari 11 becomes our minimum supported version, we could change | |
| 309 // our approach here to use the new "unless-top-url" property instead. | |
| 310 if (filter instanceof filterClasses.BlockingFilter && !parsed.hostname) | |
| 311 resourceTypes = resourceTypes.filter(type => type != "document"); | |
| 312 | |
| 313 if (resourceTypes.length == 0) | |
| 294 return; | 314 return; |
| 315 | |
| 316 trigger["resource-type"] = resourceTypes; | |
| 295 } | 317 } |
| 296 | 318 |
| 297 if (filter.thirdParty != null) | 319 if (filter.thirdParty != null) |
| 298 trigger["load-type"] = [filter.thirdParty ? "third-party" : "first-party"]; | 320 trigger["load-type"] = [filter.thirdParty ? "third-party" : "first-party"]; |
| 299 | 321 |
| 300 if (included.length > 0) | 322 if (included.length > 0) |
| 301 { | 323 { |
| 302 trigger["if-domain"] = []; | 324 trigger["if-domain"] = []; |
| 303 | 325 |
| 304 for (let name of included) | 326 for (let name of included) |
| (...skipping 15 matching lines...) Expand all Loading... | |
| 320 { | 342 { |
| 321 trigger["if-domain"].push("*" + name); | 343 trigger["if-domain"].push("*" + name); |
| 322 } | 344 } |
| 323 } | 345 } |
| 324 } | 346 } |
| 325 else if (excluded.length > 0) | 347 else if (excluded.length > 0) |
| 326 { | 348 { |
| 327 trigger["unless-domain"] = excluded.map(name => "*" + name); | 349 trigger["unless-domain"] = excluded.map(name => "*" + name); |
| 328 } | 350 } |
| 329 else if (filter instanceof filterClasses.BlockingFilter && | 351 else if (filter instanceof filterClasses.BlockingFilter && |
| 330 filter.contentType & typeMap.SUBDOCUMENT) | 352 filter.contentType & typeMap.SUBDOCUMENT && parsed.hostname) |
| 331 { | 353 { |
| 354 // Rules with a hostname part are still allowed to block document requests, | |
| 355 // but we add an exception for top-level documents. | |
| 356 // | |
| 357 // Note that we can only do this if there's no "unless-domain" property for | |
| 358 // now. This also only works in Safari 11 onwards, while older versions | |
| 359 // simply ignore this property. Once Safari 11 becomes our minimum | |
| 360 // supported version, we can merge "unless-domain" into "unless-top-url". | |
| 332 trigger["unless-top-url"] = [trigger["url-filter"]]; | 361 trigger["unless-top-url"] = [trigger["url-filter"]]; |
| 333 if (trigger["url-filter-is-case-sensitive"]) | 362 if (trigger["url-filter-is-case-sensitive"]) |
| 334 trigger["top-url-filter-is-case-sensitive"] = true; | 363 trigger["top-url-filter-is-case-sensitive"] = true; |
| 335 } | 364 } |
| 336 | 365 |
| 337 rules.push({trigger: trigger, action: {type: action}}); | 366 rules.push({trigger: trigger, action: {type: action}}); |
| 338 } | |
| 339 | |
| 340 function hasNonASCI(obj) | |
| 341 { | |
| 342 if (typeof obj == "string") | |
| 343 { | |
| 344 if (/[^\x00-\x7F]/.test(obj)) | |
| 345 return true; | |
| 346 } | |
| 347 | |
| 348 if (typeof obj == "object") | |
| 349 { | |
| 350 if (obj instanceof Array) | |
| 351 for (let item of obj) | |
| 352 if (hasNonASCI(item)) | |
| 353 return true; | |
| 354 | |
| 355 let names = Object.getOwnPropertyNames(obj); | |
| 356 for (let name of names) | |
| 357 if (hasNonASCI(obj[name])) | |
| 358 return true; | |
| 359 } | |
| 360 | |
| 361 return false; | |
| 362 } | 367 } |
| 363 | 368 |
| 364 function convertIDSelectorsToAttributeSelectors(selector) | 369 function convertIDSelectorsToAttributeSelectors(selector) |
| 365 { | 370 { |
| 366 // First we figure out where all the IDs are | 371 // First we figure out where all the IDs are |
| 367 let sep = ""; | 372 let sep = ""; |
| 368 let start = null; | 373 let start = null; |
| 369 let positions = []; | 374 let positions = []; |
| 370 for (let i = 0; i < selector.length; i++) | 375 for (let i = 0; i < selector.length; i++) |
| 371 { | 376 { |
| (...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 407 newSelector.push('[id=', selector.substring(pos.start + 1, pos.end), ']'); | 412 newSelector.push('[id=', selector.substring(pos.start + 1, pos.end), ']'); |
| 408 i = pos.end; | 413 i = pos.end; |
| 409 } | 414 } |
| 410 newSelector.push(selector.substring(i)); | 415 newSelector.push(selector.substring(i)); |
| 411 | 416 |
| 412 return newSelector.join(""); | 417 return newSelector.join(""); |
| 413 } | 418 } |
| 414 | 419 |
| 415 function addCSSRules(rules, selectors, matchDomain, exceptionDomains) | 420 function addCSSRules(rules, selectors, matchDomain, exceptionDomains) |
| 416 { | 421 { |
| 422 let unlessDomain = exceptionDomains.size > 0 ? [] : null; | |
| 423 | |
| 424 exceptionDomains.forEach(name => unlessDomain.push("*" + name)); | |
| 425 | |
| 417 while (selectors.length) | 426 while (selectors.length) |
| 418 { | 427 { |
| 419 let selector = selectors.splice(0, selectorLimit).join(", "); | 428 let selector = selectors.splice(0, selectorLimit).join(", "); |
| 420 | 429 |
| 421 // As of Safari 9.0 element IDs are matched as lowercase. We work around | 430 // As of Safari 9.0 element IDs are matched as lowercase. We work around |
| 422 // this by converting to the attribute format [id="elementID"] | 431 // this by converting to the attribute format [id="elementID"] |
| 423 selector = convertIDSelectorsToAttributeSelectors(selector); | 432 selector = convertIDSelectorsToAttributeSelectors(selector); |
| 424 | 433 |
| 425 let rule = { | 434 let rule = { |
| 426 trigger: {"url-filter": matchDomain, | 435 trigger: {"url-filter": matchDomain, |
| 427 "url-filter-is-case-sensitive": true}, | 436 "url-filter-is-case-sensitive": true}, |
| 428 action: {type: "css-display-none", | 437 action: {type: "css-display-none", |
| 429 selector: selector} | 438 selector: selector} |
| 430 }; | 439 }; |
| 431 | 440 |
| 432 if (exceptionDomains.size > 0) | 441 if (unlessDomain) |
| 433 { | 442 rule.trigger["unless-domain"] = unlessDomain; |
| 434 rule.trigger["unless-domain"] = []; | |
| 435 exceptionDomains.forEach(name => | |
| 436 { | |
| 437 rule.trigger["unless-domain"].push("*" + name); | |
| 438 }); | |
| 439 } | |
| 440 | 443 |
| 441 rules.push(rule); | 444 rules.push(rule); |
| 442 } | 445 } |
| 443 } | 446 } |
| 444 | 447 |
| 445 let ContentBlockerList = | 448 let ContentBlockerList = |
| 446 /** | 449 /** |
| 447 * Create a new Adblock Plus filter to content blocker list converter | 450 * Create a new Adblock Plus filter to content blocker list converter |
| 448 * | 451 * |
| 449 * @constructor | 452 * @constructor |
| (...skipping 96 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 546 // Note that as a result of this workaround we end up with a huge rule set in | 549 // Note that as a result of this workaround we end up with a huge rule set in |
| 547 // terms of the amount of memory used. This can cause Node.js to throw | 550 // terms of the amount of memory used. This can cause Node.js to throw |
| 548 // "JavaScript heap out of memory". To avoid this, call Node.js with | 551 // "JavaScript heap out of memory". To avoid this, call Node.js with |
| 549 // --max_old_space_size=4096 | 552 // --max_old_space_size=4096 |
| 550 let elemhideExceptionDomains = extractFilterDomains(this.elemhideExceptions); | 553 let elemhideExceptionDomains = extractFilterDomains(this.elemhideExceptions); |
| 551 | 554 |
| 552 let genericSelectorExceptionDomains = | 555 let genericSelectorExceptionDomains = |
| 553 extractFilterDomains(this.generichideExceptions); | 556 extractFilterDomains(this.generichideExceptions); |
| 554 elemhideExceptionDomains.forEach(name => | 557 elemhideExceptionDomains.forEach(name => |
| 555 { | 558 { |
| 556 genericSelectorExceptionDomains.add(name); | 559 genericSelectorExceptionDomains.add(name); |
|
kzar
2017/07/10 12:33:08
I wonder if it would be better to pass two Sets of
Manish Jethani
2017/07/11 11:19:18
We'd still have to combine them into one set if we
kzar
2017/07/11 12:20:03
Fair enough.
| |
| 557 }); | 560 }); |
| 558 | 561 |
| 559 addCSSRules(rules, genericSelectors, "^https?://", | 562 addCSSRules(rules, genericSelectors, "^https?://", |
| 560 genericSelectorExceptionDomains); | 563 genericSelectorExceptionDomains); |
| 561 | 564 |
| 562 groupedElemhideFilters.forEach((selectors, matchDomain) => | 565 groupedElemhideFilters.forEach((selectors, matchDomain) => |
| 563 { | 566 { |
| 564 addCSSRules(rules, selectors, matchDomain, elemhideExceptionDomains); | 567 addCSSRules(rules, selectors, matchDomain, elemhideExceptionDomains); |
| 565 }); | 568 }); |
| 566 | 569 |
| 567 let requestFilterExceptionDomains = []; | 570 let requestFilterExceptionDomains = []; |
| 568 for (let filter of this.genericblockExceptions) | 571 for (let filter of this.genericblockExceptions) |
| 569 { | 572 { |
| 570 let parsed = parseFilterRegexpSource(filter.regexpSource); | 573 let parsed = parseFilterRegexpSource(filter.regexpSource); |
| 571 if (parsed.hostname) | 574 if (parsed.hostname) |
| 572 requestFilterExceptionDomains.push(parsed.hostname); | 575 requestFilterExceptionDomains.push(parsed.hostname); |
| 573 } | 576 } |
| 574 | 577 |
| 575 for (let filter of this.requestFilters) | 578 for (let filter of this.requestFilters) |
| 576 { | 579 { |
| 577 convertFilterAddRules(rules, filter, "block", true, | 580 convertFilterAddRules(rules, filter, "block", true, |
| 578 requestFilterExceptionDomains); | 581 requestFilterExceptionDomains); |
| 579 } | 582 } |
| 580 | 583 |
| 581 for (let filter of this.requestExceptions) | 584 for (let filter of this.requestExceptions) |
| 582 convertFilterAddRules(rules, filter, "ignore-previous-rules", true); | 585 convertFilterAddRules(rules, filter, "ignore-previous-rules", true); |
| 583 | 586 |
| 584 return rules.filter(rule => !hasNonASCI(rule)); | 587 return rules; |
| 585 }; | 588 }; |
| LEFT | RIGHT |