OLD | NEW |
1 /* | 1 /* |
2 * This file is part of Adblock Plus <https://adblockplus.org/>, | 2 * This file is part of Adblock Plus <https://adblockplus.org/>, |
3 * Copyright (C) 2006-2017 eyeo GmbH | 3 * Copyright (C) 2006-2017 eyeo GmbH |
4 * | 4 * |
5 * Adblock Plus is free software: you can redistribute it and/or modify | 5 * Adblock Plus is free software: you can redistribute it and/or modify |
6 * it under the terms of the GNU General Public License version 3 as | 6 * it under the terms of the GNU General Public License version 3 as |
7 * published by the Free Software Foundation. | 7 * published by the Free Software Foundation. |
8 * | 8 * |
9 * Adblock Plus is distributed in the hope that it will be useful, | 9 * Adblock Plus is distributed in the hope that it will be useful, |
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of | 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
(...skipping 91 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
102 * case, a hostname string (or undefined) and a bool | 102 * case, a hostname string (or undefined) and a bool |
103 * indicating if the source only contains a hostname or not: | 103 * indicating if the source only contains a hostname or not: |
104 * {regexp: "...", | 104 * {regexp: "...", |
105 * canSafelyMatchAsLowercase: true/false, | 105 * canSafelyMatchAsLowercase: true/false, |
106 * hostname: "...", | 106 * hostname: "...", |
107 * justHostname: true/false} | 107 * justHostname: true/false} |
108 */ | 108 */ |
109 function parseFilterRegexpSource(text) | 109 function parseFilterRegexpSource(text) |
110 { | 110 { |
111 let regexp = []; | 111 let regexp = []; |
112 let lastIndex = text.length - 1; | 112 |
| 113 // Convert the text into an array of Unicode characters. |
| 114 // |
| 115 // In the case of surrogate pairs (the smiley emoji, for example), one |
| 116 // Unicode code point is represented by two JavaScript characters together. |
| 117 // We want to iterate over Unicode code points rather than JavaScript |
| 118 // characters. |
| 119 let characters = Array.from(text); |
| 120 |
| 121 let lastIndex = characters.length - 1; |
113 let hostname; | 122 let hostname; |
114 let hostnameStart = null; | 123 let hostnameStart = null; |
115 let hostnameFinished = false; | 124 let hostnameFinished = false; |
116 let justHostname = false; | 125 let justHostname = false; |
117 let canSafelyMatchAsLowercase = false; | 126 let canSafelyMatchAsLowercase = false; |
118 | 127 |
119 for (let i = 0; i < text.length; i++) | 128 for (let i = 0; i < characters.length; i++) |
120 { | 129 { |
121 let c = text[i]; | 130 let c = characters[i]; |
122 | 131 |
123 if (hostnameFinished) | 132 if (hostnameFinished) |
124 justHostname = false; | 133 justHostname = false; |
125 | 134 |
126 // If we're currently inside the hostname we have to be careful not to | 135 // If we're currently inside the hostname we have to be careful not to |
127 // escape any characters until after we have converted it to punycode. | 136 // escape any characters until after we have converted it to punycode. |
128 if (hostnameStart != null && !hostnameFinished) | 137 if (hostnameStart != null && !hostnameFinished) |
129 { | 138 { |
130 let endingChar = (c == "*" || c == "^" || | 139 let endingChar = (c == "*" || c == "^" || |
131 c == "?" || c == "/" || c == "|"); | 140 c == "?" || c == "/" || c == "|"); |
132 if (!endingChar && i != lastIndex) | 141 if (!endingChar && i != lastIndex) |
133 continue; | 142 continue; |
134 | 143 |
135 hostname = punycode.toASCII( | 144 hostname = punycode.toASCII( |
136 text.substring(hostnameStart, endingChar ? i : i + 1) | 145 characters.slice(hostnameStart, endingChar ? i : i + 1).join("") |
137 ); | 146 ); |
138 hostnameFinished = justHostname = true; | 147 hostnameFinished = justHostname = true; |
139 regexp.push(escapeRegExp(hostname)); | 148 regexp.push(escapeRegExp(hostname)); |
140 if (!endingChar) | 149 if (!endingChar) |
141 break; | 150 break; |
142 } | 151 } |
143 | 152 |
144 switch (c) | 153 switch (c) |
145 { | 154 { |
146 case "*": | 155 case "*": |
147 if (regexp.length > 0 && i < lastIndex && text[i + 1] != "*") | 156 if (regexp.length > 0 && i < lastIndex && characters[i + 1] != "*") |
148 regexp.push(".*"); | 157 regexp.push(".*"); |
149 break; | 158 break; |
150 case "^": | 159 case "^": |
151 if (i < lastIndex) | 160 if (i < lastIndex) |
152 regexp.push("."); | 161 regexp.push("."); |
153 break; | 162 break; |
154 case "|": | 163 case "|": |
155 if (i == 0) | 164 if (i == 0) |
156 { | 165 { |
157 regexp.push("^"); | 166 regexp.push("^"); |
158 break; | 167 break; |
159 } | 168 } |
160 if (i == lastIndex) | 169 if (i == lastIndex) |
161 { | 170 { |
162 regexp.push("$"); | 171 regexp.push("$"); |
163 break; | 172 break; |
164 } | 173 } |
165 if (i == 1 && text[0] == "|") | 174 if (i == 1 && characters[0] == "|") |
166 { | 175 { |
167 hostnameStart = i + 1; | 176 hostnameStart = i + 1; |
168 canSafelyMatchAsLowercase = true; | 177 canSafelyMatchAsLowercase = true; |
169 regexp.push("https?://([^/]+\\.)?"); | 178 regexp.push("https?://([^/]+\\.)?"); |
170 break; | 179 break; |
171 } | 180 } |
172 regexp.push("\\|"); | 181 regexp.push("\\|"); |
173 break; | 182 break; |
174 case "/": | 183 case "/": |
175 if (!hostnameFinished && | 184 if (!hostnameFinished && |
176 text.charAt(i-2) == ":" && text.charAt(i-1) == "/") | 185 characters[i - 2] == ":" && characters[i - 1] == "/") |
177 { | 186 { |
178 hostnameStart = i + 1; | 187 hostnameStart = i + 1; |
179 canSafelyMatchAsLowercase = true; | 188 canSafelyMatchAsLowercase = true; |
180 } | 189 } |
181 regexp.push("/"); | 190 regexp.push("/"); |
182 break; | 191 break; |
183 case ".": case "+": case "$": case "?": | 192 case ".": case "+": case "$": case "?": |
184 case "{": case "}": case "(": case ")": | 193 case "{": case "}": case "(": case ")": |
185 case "[": case "]": case "\\": | 194 case "[": case "]": case "\\": |
186 regexp.push("\\", c); | 195 regexp.push("\\", c); |
187 break; | 196 break; |
188 default: | 197 default: |
189 if (hostnameFinished && (c >= "a" && c <= "z" || | 198 if (hostnameFinished && (c >= "a" && c <= "z" || |
190 c >= "A" && c <= "Z")) | 199 c >= "A" && c <= "Z")) |
191 canSafelyMatchAsLowercase = false; | 200 canSafelyMatchAsLowercase = false; |
192 regexp.push(c); | 201 regexp.push(c == "%" ? c : encodeURI(c)); |
193 } | 202 } |
194 } | 203 } |
195 | 204 |
196 return { | 205 return { |
197 regexp: regexp.join(""), | 206 regexp: regexp.join(""), |
198 canSafelyMatchAsLowercase: canSafelyMatchAsLowercase, | 207 canSafelyMatchAsLowercase: canSafelyMatchAsLowercase, |
199 hostname: hostname, | 208 hostname: hostname, |
200 justHostname: justHostname | 209 justHostname: justHostname |
201 }; | 210 }; |
202 } | 211 } |
(...skipping 336 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
539 { | 548 { |
540 convertFilterAddRules(rules, filter, "block", true, | 549 convertFilterAddRules(rules, filter, "block", true, |
541 requestFilterExceptionDomains); | 550 requestFilterExceptionDomains); |
542 } | 551 } |
543 | 552 |
544 for (let filter of this.requestExceptions) | 553 for (let filter of this.requestExceptions) |
545 convertFilterAddRules(rules, filter, "ignore-previous-rules", true); | 554 convertFilterAddRules(rules, filter, "ignore-previous-rules", true); |
546 | 555 |
547 return rules.filter(rule => !hasNonASCI(rule)); | 556 return rules.filter(rule => !hasNonASCI(rule)); |
548 }; | 557 }; |
OLD | NEW |