 Issue 29468575:
  Issue 5332 - Support Unicode characters in path  (Closed) 
  Base URL: https://hg.adblockplus.org/abp2blocklist
    
  
    Issue 29468575:
  Issue 5332 - Support Unicode characters in path  (Closed) 
  Base URL: https://hg.adblockplus.org/abp2blocklist| Left: | ||
| Right: | 
| OLD | NEW | 
|---|---|
| 1 /* | 1 /* | 
| 2 * This file is part of Adblock Plus <https://adblockplus.org/>, | 2 * This file is part of Adblock Plus <https://adblockplus.org/>, | 
| 3 * Copyright (C) 2006-2017 eyeo GmbH | 3 * Copyright (C) 2006-2017 eyeo GmbH | 
| 4 * | 4 * | 
| 5 * Adblock Plus is free software: you can redistribute it and/or modify | 5 * Adblock Plus is free software: you can redistribute it and/or modify | 
| 6 * it under the terms of the GNU General Public License version 3 as | 6 * it under the terms of the GNU General Public License version 3 as | 
| 7 * published by the Free Software Foundation. | 7 * published by the Free Software Foundation. | 
| 8 * | 8 * | 
| 9 * Adblock Plus is distributed in the hope that it will be useful, | 9 * Adblock Plus is distributed in the hope that it will be useful, | 
| 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of | 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of | 
| (...skipping 91 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 102 * case, a hostname string (or undefined) and a bool | 102 * case, a hostname string (or undefined) and a bool | 
| 103 * indicating if the source only contains a hostname or not: | 103 * indicating if the source only contains a hostname or not: | 
| 104 * {regexp: "...", | 104 * {regexp: "...", | 
| 105 * canSafelyMatchAsLowercase: true/false, | 105 * canSafelyMatchAsLowercase: true/false, | 
| 106 * hostname: "...", | 106 * hostname: "...", | 
| 107 * justHostname: true/false} | 107 * justHostname: true/false} | 
| 108 */ | 108 */ | 
| 109 function parseFilterRegexpSource(text) | 109 function parseFilterRegexpSource(text) | 
| 110 { | 110 { | 
| 111 let regexp = []; | 111 let regexp = []; | 
| 112 let lastIndex = text.length - 1; | 112 let characters = Array.from(text); | 
| 
Manish Jethani
2017/06/19 14:41:06
We just need to convert the string into an array t
 
kzar
2017/07/07 12:18:07
Maybe add a comment about this?
 
Manish Jethani
2017/07/08 06:02:19
Done.
 | |
| 113 let lastIndex = characters.length - 1; | |
| 113 let hostname; | 114 let hostname; | 
| 114 let hostnameStart = null; | 115 let hostnameStart = null; | 
| 115 let hostnameFinished = false; | 116 let hostnameFinished = false; | 
| 116 let justHostname = false; | 117 let justHostname = false; | 
| 117 let canSafelyMatchAsLowercase = false; | 118 let canSafelyMatchAsLowercase = false; | 
| 118 | 119 | 
| 119 for (let i = 0; i < text.length; i++) | 120 for (let i = 0; i < characters.length; i++) | 
| 120 { | 121 { | 
| 121 let c = text[i]; | 122 let c = characters[i]; | 
| 
Manish Jethani
2017/06/19 14:41:06
Note that c.length is 2 if the character is a surr
 
kzar
2017/07/07 12:18:07
Acknowledged.
 | |
| 122 | 123 | 
| 123 if (hostnameFinished) | 124 if (hostnameFinished) | 
| 124 justHostname = false; | 125 justHostname = false; | 
| 125 | 126 | 
| 126 // If we're currently inside the hostname we have to be careful not to | 127 // If we're currently inside the hostname we have to be careful not to | 
| 127 // escape any characters until after we have converted it to punycode. | 128 // escape any characters until after we have converted it to punycode. | 
| 128 if (hostnameStart != null && !hostnameFinished) | 129 if (hostnameStart != null && !hostnameFinished) | 
| 129 { | 130 { | 
| 130 let endingChar = (c == "*" || c == "^" || | 131 let endingChar = (c == "*" || c == "^" || | 
| 131 c == "?" || c == "/" || c == "|"); | 132 c == "?" || c == "/" || c == "|"); | 
| 132 if (!endingChar && i != lastIndex) | 133 if (!endingChar && i != lastIndex) | 
| 133 continue; | 134 continue; | 
| 134 | 135 | 
| 135 hostname = punycode.toASCII( | 136 hostname = punycode.toASCII( | 
| 136 text.substring(hostnameStart, endingChar ? i : i + 1) | 137 characters.slice(hostnameStart, endingChar ? i : i + 1).join("") | 
| 137 ); | 138 ); | 
| 138 hostnameFinished = justHostname = true; | 139 hostnameFinished = justHostname = true; | 
| 139 regexp.push(escapeRegExp(hostname)); | 140 regexp.push(escapeRegExp(hostname)); | 
| 140 if (!endingChar) | 141 if (!endingChar) | 
| 141 break; | 142 break; | 
| 142 } | 143 } | 
| 143 | 144 | 
| 144 switch (c) | 145 switch (c) | 
| 145 { | 146 { | 
| 146 case "*": | 147 case "*": | 
| 147 if (regexp.length > 0 && i < lastIndex && text[i + 1] != "*") | 148 if (regexp.length > 0 && i < lastIndex && characters[i + 1] != "*") | 
| 148 regexp.push(".*"); | 149 regexp.push(".*"); | 
| 149 break; | 150 break; | 
| 150 case "^": | 151 case "^": | 
| 151 if (i < lastIndex) | 152 if (i < lastIndex) | 
| 152 regexp.push("."); | 153 regexp.push("."); | 
| 153 break; | 154 break; | 
| 154 case "|": | 155 case "|": | 
| 155 if (i == 0) | 156 if (i == 0) | 
| 156 { | 157 { | 
| 157 regexp.push("^"); | 158 regexp.push("^"); | 
| 158 break; | 159 break; | 
| 159 } | 160 } | 
| 160 if (i == lastIndex) | 161 if (i == lastIndex) | 
| 161 { | 162 { | 
| 162 regexp.push("$"); | 163 regexp.push("$"); | 
| 163 break; | 164 break; | 
| 164 } | 165 } | 
| 165 if (i == 1 && text[0] == "|") | 166 if (i == 1 && characters[0] == "|") | 
| 166 { | 167 { | 
| 167 hostnameStart = i + 1; | 168 hostnameStart = i + 1; | 
| 168 canSafelyMatchAsLowercase = true; | 169 canSafelyMatchAsLowercase = true; | 
| 169 regexp.push("https?://([^/]+\\.)?"); | 170 regexp.push("https?://([^/]+\\.)?"); | 
| 170 break; | 171 break; | 
| 171 } | 172 } | 
| 172 regexp.push("\\|"); | 173 regexp.push("\\|"); | 
| 173 break; | 174 break; | 
| 174 case "/": | 175 case "/": | 
| 175 if (!hostnameFinished && | 176 if (!hostnameFinished && | 
| 176 text.charAt(i-2) == ":" && text.charAt(i-1) == "/") | 177 characters[i - 2] == ":" && characters[i - 1] == "/") | 
| 177 { | 178 { | 
| 178 hostnameStart = i + 1; | 179 hostnameStart = i + 1; | 
| 179 canSafelyMatchAsLowercase = true; | 180 canSafelyMatchAsLowercase = true; | 
| 180 } | 181 } | 
| 181 regexp.push("/"); | 182 regexp.push("/"); | 
| 182 break; | 183 break; | 
| 183 case ".": case "+": case "$": case "?": | 184 case ".": case "+": case "$": case "?": | 
| 184 case "{": case "}": case "(": case ")": | 185 case "{": case "}": case "(": case ")": | 
| 185 case "[": case "]": case "\\": | 186 case "[": case "]": case "\\": | 
| 186 regexp.push("\\", c); | 187 regexp.push("\\", c); | 
| 187 break; | 188 break; | 
| 188 default: | 189 default: | 
| 189 if (hostnameFinished && (c >= "a" && c <= "z" || | 190 if (hostnameFinished && (c >= "a" && c <= "z" || | 
| 190 c >= "A" && c <= "Z")) | 191 c >= "A" && c <= "Z")) | 
| 191 canSafelyMatchAsLowercase = false; | 192 canSafelyMatchAsLowercase = false; | 
| 192 regexp.push(c); | 193 regexp.push(c == "%" ? c : encodeURI(c)); | 
| 
Manish Jethani
2017/06/19 14:41:06
Do not encode the percent sign because that means
 | |
| 193 } | 194 } | 
| 194 } | 195 } | 
| 195 | 196 | 
| 196 return { | 197 return { | 
| 197 regexp: regexp.join(""), | 198 regexp: regexp.join(""), | 
| 198 canSafelyMatchAsLowercase: canSafelyMatchAsLowercase, | 199 canSafelyMatchAsLowercase: canSafelyMatchAsLowercase, | 
| 199 hostname: hostname, | 200 hostname: hostname, | 
| 200 justHostname: justHostname | 201 justHostname: justHostname | 
| 201 }; | 202 }; | 
| 202 } | 203 } | 
| (...skipping 336 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 539 { | 540 { | 
| 540 convertFilterAddRules(rules, filter, "block", true, | 541 convertFilterAddRules(rules, filter, "block", true, | 
| 541 requestFilterExceptionDomains); | 542 requestFilterExceptionDomains); | 
| 542 } | 543 } | 
| 543 | 544 | 
| 544 for (let filter of this.requestExceptions) | 545 for (let filter of this.requestExceptions) | 
| 545 convertFilterAddRules(rules, filter, "ignore-previous-rules", true); | 546 convertFilterAddRules(rules, filter, "ignore-previous-rules", true); | 
| 546 | 547 | 
| 547 return rules.filter(rule => !hasNonASCI(rule)); | 548 return rules.filter(rule => !hasNonASCI(rule)); | 
| 548 }; | 549 }; | 
| OLD | NEW |