| Left: | ||
| Right: |
| OLD | NEW |
|---|---|
| 1 /* | 1 /* |
| 2 * This file is part of Adblock Plus <https://adblockplus.org/>, | 2 * This file is part of Adblock Plus <https://adblockplus.org/>, |
| 3 * Copyright (C) 2006-2016 Eyeo GmbH | 3 * Copyright (C) 2006-2016 Eyeo GmbH |
| 4 * | 4 * |
| 5 * Adblock Plus is free software: you can redistribute it and/or modify | 5 * Adblock Plus is free software: you can redistribute it and/or modify |
| 6 * it under the terms of the GNU General Public License version 3 as | 6 * it under the terms of the GNU General Public License version 3 as |
| 7 * published by the Free Software Foundation. | 7 * published by the Free Software Foundation. |
| 8 * | 8 * |
| 9 * Adblock Plus is distributed in the hope that it will be useful, | 9 * Adblock Plus is distributed in the hope that it will be useful, |
| 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of | 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| (...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 59 let excluded = []; | 59 let excluded = []; |
| 60 let rules = []; | 60 let rules = []; |
| 61 | 61 |
| 62 parseDomains(filter.domains, included, excluded); | 62 parseDomains(filter.domains, included, excluded); |
| 63 | 63 |
| 64 if (excluded.length == 0 && !(filter.selector in elemhideSelectorExceptions)) | 64 if (excluded.length == 0 && !(filter.selector in elemhideSelectorExceptions)) |
| 65 return {matchDomains: included.map(matchDomain), selector: filter.selector}; | 65 return {matchDomains: included.map(matchDomain), selector: filter.selector}; |
| 66 } | 66 } |
| 67 | 67 |
| 68 /** | 68 /** |
| 69 * Convert the given filter "regexpSource" string into a regular expression, | 69 * Parse the given filter "regexpSource" string. Producing a regular expression, |
| 70 * handling the conversion of unicode inside hostnames to punycode. | 70 * extracting the hostname (if any), deciding if the regular expression is safe |
| 71 * (Also deciding if the regular expression can be safely converted to and | 71 * to be converted + matched as lower case and noting if the source contains |
| 72 * matched as lower case or not.) | 72 * anything after the hostname.) |
| 73 * | 73 * |
| 74 * @param {string} text regexpSource property of a filter | 74 * @param {string} text regexpSource property of a filter |
| 75 * @returns {object} An object containing a regular expression string and a bool | 75 * @returns {object} An object containing a regular expression string, a bool |
| 76 * indicating if the filter can be safely matched as lower | 76 * indicating if the filter can be safely matched as lower |
| 77 * case: {regexp: "...", canSafelyMatchAsLowercase: true/false } | 77 * case, a hostname string (or undefined) and a bool |
| 78 * indicating if the source only contains a hostname or not: | |
| 79 * {regexp: "...", | |
| 80 * canSafelyMatchAsLowercase: true/false, | |
| 81 * hostname: "...", | |
| 82 * justHostname: true/false} | |
| 78 */ | 83 */ |
| 79 function toRegExp(text) | 84 function parseFilterRegexpSource(text) |
| 80 { | 85 { |
| 81 let result = []; | 86 let regexp = []; |
| 82 let lastIndex = text.length - 1; | 87 let lastIndex = text.length - 1; |
| 88 let hostname; | |
| 83 let hostnameStart = null; | 89 let hostnameStart = null; |
| 84 let hostnameFinished = false; | 90 let hostnameFinished = false; |
| 91 let justHostname = false; | |
| 85 let canSafelyMatchAsLowercase = false; | 92 let canSafelyMatchAsLowercase = false; |
| 86 | 93 |
| 87 for (let i = 0; i < text.length; i++) | 94 for (let i = 0; i < text.length; i++) |
| 88 { | 95 { |
| 89 let c = text[i]; | 96 let c = text[i]; |
| 90 | 97 |
| 98 if (hostnameFinished) | |
| 99 justHostname = false; | |
| 100 | |
| 91 // If we're currently inside the hostname we have to be careful not to | 101 // If we're currently inside the hostname we have to be careful not to |
| 92 // escape any characters until after we have converted it to punycode. | 102 // escape any characters until after we have converted it to punycode. |
| 93 if (hostnameStart != null && !hostnameFinished) | 103 if (hostnameStart != null && !hostnameFinished) |
| 94 { | 104 { |
| 95 let endingChar = (c == "*" || c == "^" || | 105 let endingChar = (c == "*" || c == "^" || |
| 96 c == "?" || c == "/" || c == "|"); | 106 c == "?" || c == "/" || c == "|"); |
| 97 if (!endingChar && i != lastIndex) | 107 if (!endingChar && i != lastIndex) |
| 98 continue; | 108 continue; |
| 99 | 109 |
| 100 let hostname = text.substring(hostnameStart, endingChar ? i : i + 1); | 110 hostname = punycode.toASCII( |
| 101 hostnameFinished = true; | 111 text.substring(hostnameStart, endingChar ? i : i + 1) |
| 102 result.push(escapeRegExp(punycode.toASCII(hostname))); | 112 ); |
| 113 hostnameFinished = justHostname = true; | |
| 114 regexp.push(escapeRegExp(hostname)); | |
| 103 if (!endingChar) | 115 if (!endingChar) |
| 104 break; | 116 break; |
| 105 } | 117 } |
| 106 | 118 |
| 107 switch (c) | 119 switch (c) |
| 108 { | 120 { |
| 109 case "*": | 121 case "*": |
| 110 if (result.length > 0 && i < lastIndex && text[i + 1] != "*") | 122 if (regexp.length > 0 && i < lastIndex && text[i + 1] != "*") |
| 111 result.push(".*"); | 123 regexp.push(".*"); |
| 112 break; | 124 break; |
| 113 case "^": | 125 case "^": |
| 114 if (i < lastIndex) | 126 if (i < lastIndex) |
| 115 result.push("."); | 127 regexp.push("."); |
| 116 break; | 128 break; |
| 117 case "|": | 129 case "|": |
| 118 if (i == 0) | 130 if (i == 0) |
| 119 { | 131 { |
| 120 result.push("^"); | 132 regexp.push("^"); |
| 121 break; | 133 break; |
| 122 } | 134 } |
| 123 if (i == lastIndex) | 135 if (i == lastIndex) |
| 124 { | 136 { |
| 125 result.push("$"); | 137 regexp.push("$"); |
| 126 break; | 138 break; |
| 127 } | 139 } |
| 128 if (i == 1 && text[0] == "|") | 140 if (i == 1 && text[0] == "|") |
| 129 { | 141 { |
| 130 hostnameStart = i + 1; | 142 hostnameStart = i + 1; |
| 131 canSafelyMatchAsLowercase = true; | 143 canSafelyMatchAsLowercase = true; |
| 132 result.push("https?://"); | 144 regexp.push("https?://"); |
| 133 break; | 145 break; |
| 134 } | 146 } |
| 135 result.push("\\|"); | 147 regexp.push("\\|"); |
| 136 break; | 148 break; |
| 137 case "/": | 149 case "/": |
| 138 if (!hostnameFinished && | 150 if (!hostnameFinished && |
| 139 text.charAt(i-2) == ":" && text.charAt(i-1) == "/") | 151 text.charAt(i-2) == ":" && text.charAt(i-1) == "/") |
| 140 { | 152 { |
| 141 hostnameStart = i + 1; | 153 hostnameStart = i + 1; |
| 142 canSafelyMatchAsLowercase = true; | 154 canSafelyMatchAsLowercase = true; |
| 143 } | 155 } |
| 144 result.push("/"); | 156 regexp.push("/"); |
| 145 break; | 157 break; |
| 146 case ".": case "+": case "$": case "?": | 158 case ".": case "+": case "$": case "?": |
| 147 case "{": case "}": case "(": case ")": | 159 case "{": case "}": case "(": case ")": |
| 148 case "[": case "]": case "\\": | 160 case "[": case "]": case "\\": |
| 149 result.push("\\", c); | 161 regexp.push("\\", c); |
| 150 break; | 162 break; |
| 151 default: | 163 default: |
| 152 if (hostnameFinished && (c >= "a" && c <= "z" || | 164 if (hostnameFinished && (c >= "a" && c <= "z" || |
| 153 c >= "A" && c <= "Z")) | 165 c >= "A" && c <= "Z")) |
| 154 canSafelyMatchAsLowercase = false; | 166 canSafelyMatchAsLowercase = false; |
| 155 result.push(c); | 167 regexp.push(c); |
| 156 } | 168 } |
| 157 } | 169 } |
| 158 | 170 |
| 159 return {regexp: result.join(""), | 171 return { |
| 160 canSafelyMatchAsLowercase: canSafelyMatchAsLowercase}; | 172 regexp: regexp.join(""), |
| 161 } | 173 canSafelyMatchAsLowercase: canSafelyMatchAsLowercase, |
| 162 | 174 hostname: hostname, |
| 163 function getRegExpTrigger(filter) | 175 justHostname: justHostname |
| 164 { | 176 }; |
| 165 let result = toRegExp(filter.regexpSource); | |
| 166 | |
| 167 let trigger = {"url-filter": result.regexp}; | |
| 168 | |
| 169 // Limit rules to to HTTP(S) URLs | |
| 170 if (!/^(\^|http)/i.test(trigger["url-filter"])) | |
| 171 trigger["url-filter"] = "^https?://.*" + trigger["url-filter"]; | |
| 172 | |
| 173 // For rules containing only a hostname we know that we're matching against | |
| 174 // a lowercase string unless the matchCase option was passed. | |
| 175 if (result.canSafelyMatchAsLowercase && !filter.matchCase) | |
| 176 trigger["url-filter"] = trigger["url-filter"].toLowerCase(); | |
| 177 | |
| 178 if (result.canSafelyMatchAsLowercase || filter.matchCase) | |
| 179 trigger["url-filter-is-case-sensitive"] = true; | |
| 180 | |
| 181 return trigger; | |
| 182 } | 177 } |
| 183 | 178 |
| 184 function getResourceTypes(filter) | 179 function getResourceTypes(filter) |
| 185 { | 180 { |
| 186 let types = []; | 181 let types = []; |
| 187 | 182 |
| 188 if (filter.contentType & typeMap.IMAGE) | 183 if (filter.contentType & typeMap.IMAGE) |
| 189 types.push("image"); | 184 types.push("image"); |
| 190 if (filter.contentType & typeMap.STYLESHEET) | 185 if (filter.contentType & typeMap.STYLESHEET) |
| 191 types.push("style-sheet"); | 186 types.push("style-sheet"); |
| (...skipping 26 matching lines...) Expand all Loading... | |
| 218 | 213 |
| 219 if (tldjs.getDomain(domain) == domain) | 214 if (tldjs.getDomain(domain) == domain) |
| 220 result.push("www." + domain); | 215 result.push("www." + domain); |
| 221 } | 216 } |
| 222 | 217 |
| 223 return result; | 218 return result; |
| 224 } | 219 } |
| 225 | 220 |
| 226 function convertFilter(filter, action, withResourceTypes) | 221 function convertFilter(filter, action, withResourceTypes) |
| 227 { | 222 { |
| 228 let trigger = getRegExpTrigger(filter); | 223 let parsed = parseFilterRegexpSource(filter.regexpSource); |
| 224 | |
| 225 // For the special case of $document whitelisting filters with just a domain | |
| 226 // we can generate an equivalent blocking rule exception using if-domain. | |
| 227 if (filter.contentType == typeMap.DOCUMENT && parsed.justHostname) | |
|
Sebastian Noack
2016/05/12 12:12:25
For filters like example.com$document,image we wou
kzar
2016/05/16 16:22:36
Done.
| |
| 228 return {trigger: {"url-filter": ".*", | |
|
Sebastian Noack
2016/05/12 12:12:26
Nit: Mind wrapping the nested object for better re
Sebastian Noack
2016/05/12 12:12:26
Wouldn't an empty string be sufficient as url-filt
kzar
2016/05/16 16:22:36
Done.
kzar
2016/05/16 16:22:36
Unfortunately this causes a "Extension compilation
| |
| 229 "if-domain": addDomainPrefix([parsed.hostname])}, | |
| 230 action: {type: "ignore-previous-rules"}}; | |
| 231 | |
| 232 let trigger = {"url-filter": parsed.regexp}; | |
| 233 | |
| 234 // Limit rules to to HTTP(S) URLs | |
|
Sebastian Noack
2016/05/12 12:12:26
Typo: to to
kzar
2016/05/16 16:22:36
Done.
| |
| 235 if (!/^(\^|http)/i.test(trigger["url-filter"])) | |
| 236 trigger["url-filter"] = "^https?://.*" + trigger["url-filter"]; | |
| 237 | |
| 238 // For rules containing only a hostname we know that we're matching against | |
| 239 // a lowercase string unless the matchCase option was passed. | |
| 240 if (parsed.canSafelyMatchAsLowercase && !filter.matchCase) | |
| 241 trigger["url-filter"] = trigger["url-filter"].toLowerCase(); | |
| 242 | |
| 243 if (parsed.canSafelyMatchAsLowercase || filter.matchCase) | |
| 244 trigger["url-filter-is-case-sensitive"] = true; | |
| 245 | |
| 229 let included = []; | 246 let included = []; |
| 230 let excluded = []; | 247 let excluded = []; |
| 231 | 248 |
| 232 parseDomains(filter.domains, included, excluded); | 249 parseDomains(filter.domains, included, excluded); |
| 233 | 250 |
| 234 if (withResourceTypes) | 251 if (withResourceTypes) |
| 235 trigger["resource-type"] = getResourceTypes(filter); | 252 trigger["resource-type"] = getResourceTypes(filter); |
| 236 if (filter.thirdParty != null) | 253 if (filter.thirdParty != null) |
| 237 trigger["load-type"] = [filter.thirdParty ? "third-party" : "first-party"]; | 254 trigger["load-type"] = [filter.thirdParty ? "third-party" : "first-party"]; |
| 238 | 255 |
| (...skipping 106 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 345 return; | 362 return; |
| 346 if (filter instanceof filterClasses.RegExpFilter && | 363 if (filter instanceof filterClasses.RegExpFilter && |
| 347 filter.regexpSource == null) | 364 filter.regexpSource == null) |
| 348 return; | 365 return; |
| 349 | 366 |
| 350 if (filter instanceof filterClasses.BlockingFilter) | 367 if (filter instanceof filterClasses.BlockingFilter) |
| 351 this.requestFilters.push(filter); | 368 this.requestFilters.push(filter); |
| 352 | 369 |
| 353 if (filter instanceof filterClasses.WhitelistFilter) | 370 if (filter instanceof filterClasses.WhitelistFilter) |
| 354 { | 371 { |
| 355 if (filter.contentType & (typeMap.IMAGE | 372 if (filter.contentType & (typeMap.DOCUMENT |
| 373 | typeMap.IMAGE | |
| 356 | typeMap.STYLESHEET | 374 | typeMap.STYLESHEET |
| 357 | typeMap.SCRIPT | 375 | typeMap.SCRIPT |
| 358 | typeMap.FONT | 376 | typeMap.FONT |
| 359 | typeMap.MEDIA | 377 | typeMap.MEDIA |
| 360 | typeMap.POPUP | 378 | typeMap.POPUP |
| 361 | typeMap.OBJECT | 379 | typeMap.OBJECT |
| 362 | typeMap.OBJECT_SUBREQUEST | 380 | typeMap.OBJECT_SUBREQUEST |
| 363 | typeMap.XMLHTTPREQUEST | 381 | typeMap.XMLHTTPREQUEST |
| 364 | typeMap.PING | 382 | typeMap.PING |
| 365 | typeMap.SUBDOCUMENT | 383 | typeMap.SUBDOCUMENT |
| (...skipping 71 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 437 | 455 |
| 438 for (let filter of this.elemhideExceptions) | 456 for (let filter of this.elemhideExceptions) |
| 439 addRule(convertFilter(filter, "ignore-previous-rules", false)); | 457 addRule(convertFilter(filter, "ignore-previous-rules", false)); |
| 440 for (let filter of this.requestFilters) | 458 for (let filter of this.requestFilters) |
| 441 addRule(convertFilter(filter, "block", true)); | 459 addRule(convertFilter(filter, "block", true)); |
| 442 for (let filter of this.requestExceptions) | 460 for (let filter of this.requestExceptions) |
| 443 addRule(convertFilter(filter, "ignore-previous-rules", true)); | 461 addRule(convertFilter(filter, "ignore-previous-rules", true)); |
| 444 | 462 |
| 445 return rules; | 463 return rules; |
| 446 }; | 464 }; |
| OLD | NEW |