 Issue 29337803:
  Issue 3710 - Unify hostname logic  (Closed)
    
  
    Issue 29337803:
  Issue 3710 - Unify hostname logic  (Closed) 
  | Left: | ||
| Right: | 
| OLD | NEW | 
|---|---|
| 1 /* | 1 /* | 
| 2 * This file is part of Adblock Plus <https://adblockplus.org/>, | 2 * This file is part of Adblock Plus <https://adblockplus.org/>, | 
| 3 * Copyright (C) 2006-2016 Eyeo GmbH | 3 * Copyright (C) 2006-2016 Eyeo GmbH | 
| 4 * | 4 * | 
| 5 * Adblock Plus is free software: you can redistribute it and/or modify | 5 * Adblock Plus is free software: you can redistribute it and/or modify | 
| 6 * it under the terms of the GNU General Public License version 3 as | 6 * it under the terms of the GNU General Public License version 3 as | 
| 7 * published by the Free Software Foundation. | 7 * published by the Free Software Foundation. | 
| 8 * | 8 * | 
| 9 * Adblock Plus is distributed in the hope that it will be useful, | 9 * Adblock Plus is distributed in the hope that it will be useful, | 
| 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of | 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of | 
| (...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 59 let excluded = []; | 59 let excluded = []; | 
| 60 let rules = []; | 60 let rules = []; | 
| 61 | 61 | 
| 62 parseDomains(filter.domains, included, excluded); | 62 parseDomains(filter.domains, included, excluded); | 
| 63 | 63 | 
| 64 if (excluded.length == 0 && !(filter.selector in elemhideSelectorExceptions)) | 64 if (excluded.length == 0 && !(filter.selector in elemhideSelectorExceptions)) | 
| 65 return {matchDomains: included.map(matchDomain), selector: filter.selector}; | 65 return {matchDomains: included.map(matchDomain), selector: filter.selector}; | 
| 66 } | 66 } | 
| 67 | 67 | 
| 68 /** | 68 /** | 
| 69 * Convert the given filter "regexpSource" string into a regular expression. | 69 * Convert the given filter "regexpSource" string into a regular expression, | 
| 70 * handling the conversion of unicode inside hostnames to punycode. | |
| 70 * (Also deciding if the regular expression can be safely converted to and | 71 * (Also deciding if the regular expression can be safely converted to and | 
| 71 * matched as lower case or not.) | 72 * matched as lower case or not.) | 
| 72 * | 73 * | 
| 73 * @param {string} text regexpSource property of a filter | 74 * @param {string} text regexpSource property of a filter | 
| 74 * @returns {object} An object containing a regular expression string and a bool | 75 * @returns {object} An object containing a regular expression string and a bool | 
| 75 * indicating if the filter can be safely matched as lower | 76 * indicating if the filter can be safely matched as lower | 
| 76 * case: {regexp: "...", caseSenstive: true/false} | 77 * case: {regexp: "...", canSafelyMatchAsLowercase: true/false } | 
| 77 */ | 78 */ | 
| 78 function toRegExp(text) | 79 function toRegExp(text) | 
| 79 { | 80 { | 
| 80 let result = []; | 81 let result = []; | 
| 81 let lastIndex = text.length - 1; | 82 let lastIndex = text.length - 1; | 
| 82 let hostnameStarted = false; | 83 let hostnameStart = null; | 
| 83 let hostnameFinished = false; | 84 let hostnameFinished = false; | 
| 84 let caseSensitive = false; | 85 let canSafelyMatchAsLowercase = false; | 
| 85 | 86 | 
| 86 for (let i = 0; i < text.length; i++) | 87 for (let i = 0; i < text.length; i++) | 
| 87 { | 88 { | 
| 88 let c = text[i]; | 89 let c = text[i]; | 
| 89 | 90 | 
| 91 // If we're currently inside the hostname we have to be careful not to | |
| 92 // escape any characters until after we have converted it to punycode. | |
| 93 if (hostnameStart != null && !hostnameFinished) | |
| 94 { | |
| 95 if (c == "*" || c == "^" || c == "?" || c == "/" || i == lastIndex) | |
| 
Sebastian Noack
2016/02/27 20:30:38
If you turn the logic here the other way around, y
 
Sebastian Noack
2016/02/27 20:30:38
I'm not entirely sure if the case of last index is
 
kzar
2016/02/27 21:28:53
Done.
 
kzar
2016/02/27 21:28:53
Good point but it's even more complicated, what if
 | |
| 96 { | |
| 97 hostnameFinished = true; | |
| 98 let hostname = text.substring(hostnameStart, i); | |
| 99 result.push(escapeRegExp(punycode.toASCII(hostname))); | |
| 100 } | |
| 101 else | |
| 102 continue; | |
| 103 } | |
| 104 | |
| 90 switch (c) | 105 switch (c) | 
| 91 { | 106 { | 
| 92 case "*": | 107 case "*": | 
| 93 if (hostnameStarted) | |
| 94 hostnameFinished = true; | |
| 95 if (result.length > 0 && i < lastIndex && text[i + 1] != "*") | 108 if (result.length > 0 && i < lastIndex && text[i + 1] != "*") | 
| 96 result.push(".*"); | 109 result.push(".*"); | 
| 97 break; | 110 break; | 
| 98 case "^": | 111 case "^": | 
| 99 if (hostnameStarted) | |
| 100 hostnameFinished = true; | |
| 101 if (i < lastIndex) | 112 if (i < lastIndex) | 
| 102 result.push("."); | 113 result.push("."); | 
| 103 break; | 114 break; | 
| 104 case "|": | 115 case "|": | 
| 105 if (i == 0) | 116 if (i == 0) | 
| 106 { | 117 { | 
| 107 result.push("^"); | 118 result.push("^"); | 
| 108 break; | 119 break; | 
| 109 } | 120 } | 
| 110 if (i == lastIndex) | 121 if (i == lastIndex) | 
| 111 { | 122 { | 
| 112 result.push("$"); | 123 result.push("$"); | 
| 113 break; | 124 break; | 
| 114 } | 125 } | 
| 115 if (i == 1 && text[0] == "|") | 126 if (i == 1 && text[0] == "|") | 
| 116 { | 127 { | 
| 117 hostnameStarted = caseSensitive = true; | |
| 118 result.push("https?://"); | 128 result.push("https?://"); | 
| 
Sebastian Noack
2016/02/27 20:30:38
Nit: Mind moving that line to just above the break
 
kzar
2016/02/27 21:28:53
Done.
 | |
| 129 hostnameStart = i + 1; | |
| 130 canSafelyMatchAsLowercase = true; | |
| 119 break; | 131 break; | 
| 120 } | 132 } | 
| 121 result.push("\\", c); | 133 result.push("\\|"); | 
| 122 break; | 134 break; | 
| 123 case "?": | 135 case "/": | 
| 124 if (hostnameStarted) | 136 result.push("/"); | 
| 
Sebastian Noack
2016/02/27 20:30:38
Nit: Mind moving that line to just above the break
 
kzar
2016/02/27 21:28:53
Done.
 | |
| 125 hostnameFinished = true; | 137 if (!hostnameFinished && | 
| 
Sebastian Noack
2016/02/27 20:30:38
Nit: It doesn't matter, but I personally find that
 
kzar
2016/02/27 21:28:53
I'd rather leave this one as it is.
 | |
| 126 case ".": case "+": case "$": case "{": case "}": | 138 text.charAt(i-2) == ":" && text.charAt(i-1) == "/") | 
| 139 { | |
| 140 hostnameStart = i + 1; | |
| 141 canSafelyMatchAsLowercase = true; | |
| 142 } | |
| 143 break; | |
| 144 case ".": case "+": case "$": case "{": case "}": case "?": | |
| 
Sebastian Noack
2016/02/27 20:30:38
Nit: I think the way this block was originally wra
 
kzar
2016/02/27 21:28:53
Done.
 | |
| 127 case "(": case ")": case "[": case "]": case "\\": | 145 case "(": case ")": case "[": case "]": case "\\": | 
| 128 result.push("\\", c); | 146 result.push("\\", c); | 
| 129 break; | 147 break; | 
| 130 case "/": | |
| 131 if (hostnameStarted) | |
| 132 hostnameFinished = true; | |
| 133 else if (text.charAt(i-2) == ":" && text.charAt(i-1) == "/") | |
| 134 hostnameStarted = caseSensitive = true; | |
| 
kzar
2016/02/27 14:29:34
(I've switched this around as I decided that sneak
 | |
| 135 default: | 148 default: | 
| 136 if (hostnameFinished && (c >= "a" && c <= "z" || | 149 if (hostnameFinished && (c >= "a" && c <= "z" || | 
| 137 c >= "A" && c <= "Z")) | 150 c >= "A" && c <= "Z")) | 
| 138 caseSensitive = false; | 151 canSafelyMatchAsLowercase = false; | 
| 139 result.push(c); | 152 result.push(c); | 
| 140 } | 153 } | 
| 141 } | 154 } | 
| 142 | 155 | 
| 143 return {regexp: result.join(""), caseSensitive: caseSensitive}; | 156 return {regexp: result.join(""), | 
| 157 canSafelyMatchAsLowercase: canSafelyMatchAsLowercase}; | |
| 144 } | 158 } | 
| 145 | 159 | 
| 146 function getRegExpTrigger(filter) | 160 function getRegExpTrigger(filter) | 
| 147 { | 161 { | 
| 148 let result = toRegExp(filter.regexpSource.replace( | 162 let result = toRegExp(filter.regexpSource); | 
| 149 // Safari expects punycode, filter lists use unicode | |
| 150 /^(\|\||\|?https?:\/\/)([\w\-.*\u0080-\uFFFF]+)/i, | |
| 151 function (match, prefix, domain) | |
| 152 { | |
| 153 return prefix + punycode.toASCII(domain); | |
| 154 } | |
| 155 )); | |
| 156 | 163 | 
| 157 let trigger = {"url-filter": result.regexp}; | 164 let trigger = {"url-filter": result.regexp}; | 
| 158 | 165 | 
| 159 // Limit rules to to HTTP(S) URLs | 166 // Limit rules to to HTTP(S) URLs | 
| 160 if (!/^(\^|http)/i.test(trigger["url-filter"])) | 167 if (!/^(\^|http)/i.test(trigger["url-filter"])) | 
| 161 trigger["url-filter"] = "^https?://.*" + trigger["url-filter"]; | 168 trigger["url-filter"] = "^https?://.*" + trigger["url-filter"]; | 
| 162 | 169 | 
| 163 // For rules containing only a hostname we know that we're matching against | 170 // For rules containing only a hostname we know that we're matching against | 
| 164 // a lowercase string unless the matchCase option was passed. | 171 // a lowercase string unless the matchCase option was passed. | 
| 165 if (result.caseSensitive && !filter.matchCase) | 172 if (result.canSafelyMatchAsLowercase && !filter.matchCase) | 
| 166 trigger["url-filter"] = trigger["url-filter"].toLowerCase(); | 173 trigger["url-filter"] = trigger["url-filter"].toLowerCase(); | 
| 167 | 174 | 
| 168 if (result.caseSensitive || filter.matchCase) | 175 if (result.canSafelyMatchAsLowercase || filter.matchCase) | 
| 169 trigger["url-filter-is-case-sensitive"] = true; | 176 trigger["url-filter-is-case-sensitive"] = true; | 
| 170 | 177 | 
| 171 return trigger; | 178 return trigger; | 
| 172 } | 179 } | 
| 173 | 180 | 
| 174 function getResourceTypes(filter) | 181 function getResourceTypes(filter) | 
| 175 { | 182 { | 
| 176 let types = []; | 183 let types = []; | 
| 177 | 184 | 
| 178 if (filter.contentType & typeMap.IMAGE) | 185 if (filter.contentType & typeMap.IMAGE) | 
| (...skipping 248 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 427 | 434 | 
| 428 for (let filter of this.elemhideExceptions) | 435 for (let filter of this.elemhideExceptions) | 
| 429 addRule(convertFilter(filter, "ignore-previous-rules", false)); | 436 addRule(convertFilter(filter, "ignore-previous-rules", false)); | 
| 430 for (let filter of this.requestFilters) | 437 for (let filter of this.requestFilters) | 
| 431 addRule(convertFilter(filter, "block", true)); | 438 addRule(convertFilter(filter, "block", true)); | 
| 432 for (let filter of this.requestExceptions) | 439 for (let filter of this.requestExceptions) | 
| 433 addRule(convertFilter(filter, "ignore-previous-rules", true)); | 440 addRule(convertFilter(filter, "ignore-previous-rules", true)); | 
| 434 | 441 | 
| 435 return rules; | 442 return rules; | 
| 436 }; | 443 }; | 
| OLD | NEW |