Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Side by Side Diff: lib/abp2blocklist.js

Issue 29468575: Issue 5332 - Support Unicode characters in path (Closed) Base URL: https://hg.adblockplus.org/abp2blocklist
Patch Set: Created June 19, 2017, 2:35 p.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | test/abp2blocklist.js » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * This file is part of Adblock Plus <https://adblockplus.org/>, 2 * This file is part of Adblock Plus <https://adblockplus.org/>,
3 * Copyright (C) 2006-2017 eyeo GmbH 3 * Copyright (C) 2006-2017 eyeo GmbH
4 * 4 *
5 * Adblock Plus is free software: you can redistribute it and/or modify 5 * Adblock Plus is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 3 as 6 * it under the terms of the GNU General Public License version 3 as
7 * published by the Free Software Foundation. 7 * published by the Free Software Foundation.
8 * 8 *
9 * Adblock Plus is distributed in the hope that it will be useful, 9 * Adblock Plus is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
(...skipping 91 matching lines...) Expand 10 before | Expand all | Expand 10 after
102 * case, a hostname string (or undefined) and a bool 102 * case, a hostname string (or undefined) and a bool
103 * indicating if the source only contains a hostname or not: 103 * indicating if the source only contains a hostname or not:
104 * {regexp: "...", 104 * {regexp: "...",
105 * canSafelyMatchAsLowercase: true/false, 105 * canSafelyMatchAsLowercase: true/false,
106 * hostname: "...", 106 * hostname: "...",
107 * justHostname: true/false} 107 * justHostname: true/false}
108 */ 108 */
109 function parseFilterRegexpSource(text) 109 function parseFilterRegexpSource(text)
110 { 110 {
111 let regexp = []; 111 let regexp = [];
112 let lastIndex = text.length - 1; 112 let characters = Array.from(text);
Manish Jethani 2017/06/19 14:41:06 We just need to convert the string into an array t
kzar 2017/07/07 12:18:07 Maybe add a comment about this?
Manish Jethani 2017/07/08 06:02:19 Done.
113 let lastIndex = characters.length - 1;
113 let hostname; 114 let hostname;
114 let hostnameStart = null; 115 let hostnameStart = null;
115 let hostnameFinished = false; 116 let hostnameFinished = false;
116 let justHostname = false; 117 let justHostname = false;
117 let canSafelyMatchAsLowercase = false; 118 let canSafelyMatchAsLowercase = false;
118 119
119 for (let i = 0; i < text.length; i++) 120 for (let i = 0; i < characters.length; i++)
120 { 121 {
121 let c = text[i]; 122 let c = characters[i];
Manish Jethani 2017/06/19 14:41:06 Note that c.length is 2 if the character is a surr
kzar 2017/07/07 12:18:07 Acknowledged.
122 123
123 if (hostnameFinished) 124 if (hostnameFinished)
124 justHostname = false; 125 justHostname = false;
125 126
126 // If we're currently inside the hostname we have to be careful not to 127 // If we're currently inside the hostname we have to be careful not to
127 // escape any characters until after we have converted it to punycode. 128 // escape any characters until after we have converted it to punycode.
128 if (hostnameStart != null && !hostnameFinished) 129 if (hostnameStart != null && !hostnameFinished)
129 { 130 {
130 let endingChar = (c == "*" || c == "^" || 131 let endingChar = (c == "*" || c == "^" ||
131 c == "?" || c == "/" || c == "|"); 132 c == "?" || c == "/" || c == "|");
132 if (!endingChar && i != lastIndex) 133 if (!endingChar && i != lastIndex)
133 continue; 134 continue;
134 135
135 hostname = punycode.toASCII( 136 hostname = punycode.toASCII(
136 text.substring(hostnameStart, endingChar ? i : i + 1) 137 characters.slice(hostnameStart, endingChar ? i : i + 1).join("")
137 ); 138 );
138 hostnameFinished = justHostname = true; 139 hostnameFinished = justHostname = true;
139 regexp.push(escapeRegExp(hostname)); 140 regexp.push(escapeRegExp(hostname));
140 if (!endingChar) 141 if (!endingChar)
141 break; 142 break;
142 } 143 }
143 144
144 switch (c) 145 switch (c)
145 { 146 {
146 case "*": 147 case "*":
147 if (regexp.length > 0 && i < lastIndex && text[i + 1] != "*") 148 if (regexp.length > 0 && i < lastIndex && characters[i + 1] != "*")
148 regexp.push(".*"); 149 regexp.push(".*");
149 break; 150 break;
150 case "^": 151 case "^":
151 if (i < lastIndex) 152 if (i < lastIndex)
152 regexp.push("."); 153 regexp.push(".");
153 break; 154 break;
154 case "|": 155 case "|":
155 if (i == 0) 156 if (i == 0)
156 { 157 {
157 regexp.push("^"); 158 regexp.push("^");
158 break; 159 break;
159 } 160 }
160 if (i == lastIndex) 161 if (i == lastIndex)
161 { 162 {
162 regexp.push("$"); 163 regexp.push("$");
163 break; 164 break;
164 } 165 }
165 if (i == 1 && text[0] == "|") 166 if (i == 1 && characters[0] == "|")
166 { 167 {
167 hostnameStart = i + 1; 168 hostnameStart = i + 1;
168 canSafelyMatchAsLowercase = true; 169 canSafelyMatchAsLowercase = true;
169 regexp.push("https?://([^/]+\\.)?"); 170 regexp.push("https?://([^/]+\\.)?");
170 break; 171 break;
171 } 172 }
172 regexp.push("\\|"); 173 regexp.push("\\|");
173 break; 174 break;
174 case "/": 175 case "/":
175 if (!hostnameFinished && 176 if (!hostnameFinished &&
176 text.charAt(i-2) == ":" && text.charAt(i-1) == "/") 177 characters[i - 2] == ":" && characters[i - 1] == "/")
177 { 178 {
178 hostnameStart = i + 1; 179 hostnameStart = i + 1;
179 canSafelyMatchAsLowercase = true; 180 canSafelyMatchAsLowercase = true;
180 } 181 }
181 regexp.push("/"); 182 regexp.push("/");
182 break; 183 break;
183 case ".": case "+": case "$": case "?": 184 case ".": case "+": case "$": case "?":
184 case "{": case "}": case "(": case ")": 185 case "{": case "}": case "(": case ")":
185 case "[": case "]": case "\\": 186 case "[": case "]": case "\\":
186 regexp.push("\\", c); 187 regexp.push("\\", c);
187 break; 188 break;
188 default: 189 default:
189 if (hostnameFinished && (c >= "a" && c <= "z" || 190 if (hostnameFinished && (c >= "a" && c <= "z" ||
190 c >= "A" && c <= "Z")) 191 c >= "A" && c <= "Z"))
191 canSafelyMatchAsLowercase = false; 192 canSafelyMatchAsLowercase = false;
192 regexp.push(c); 193 regexp.push(c == "%" ? c : encodeURI(c));
Manish Jethani 2017/06/19 14:41:06 Do not encode the percent sign because that means
193 } 194 }
194 } 195 }
195 196
196 return { 197 return {
197 regexp: regexp.join(""), 198 regexp: regexp.join(""),
198 canSafelyMatchAsLowercase: canSafelyMatchAsLowercase, 199 canSafelyMatchAsLowercase: canSafelyMatchAsLowercase,
199 hostname: hostname, 200 hostname: hostname,
200 justHostname: justHostname 201 justHostname: justHostname
201 }; 202 };
202 } 203 }
(...skipping 336 matching lines...) Expand 10 before | Expand all | Expand 10 after
539 { 540 {
540 convertFilterAddRules(rules, filter, "block", true, 541 convertFilterAddRules(rules, filter, "block", true,
541 requestFilterExceptionDomains); 542 requestFilterExceptionDomains);
542 } 543 }
543 544
544 for (let filter of this.requestExceptions) 545 for (let filter of this.requestExceptions)
545 convertFilterAddRules(rules, filter, "ignore-previous-rules", true); 546 convertFilterAddRules(rules, filter, "ignore-previous-rules", true);
546 547
547 return rules.filter(rule => !hasNonASCI(rule)); 548 return rules.filter(rule => !hasNonASCI(rule));
548 }; 549 };
OLDNEW
« no previous file with comments | « no previous file | test/abp2blocklist.js » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld