Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Delta Between Two Patch Sets: lib/abp2blocklist.js

Issue 29468575: Issue 5332 - Support Unicode characters in path (Closed) Base URL: https://hg.adblockplus.org/abp2blocklist
Left Patch Set: Created June 19, 2017, 2:35 p.m.
Right Patch Set: Add comment about Unicode surrogate pairs Created July 8, 2017, 6 a.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
Left: Side by side diff | Download
Right: Side by side diff | Download
« no previous file with change/comment | « no previous file | test/abp2blocklist.js » ('j') | no next file with change/comment »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
LEFTRIGHT
1 /* 1 /*
2 * This file is part of Adblock Plus <https://adblockplus.org/>, 2 * This file is part of Adblock Plus <https://adblockplus.org/>,
3 * Copyright (C) 2006-2017 eyeo GmbH 3 * Copyright (C) 2006-2017 eyeo GmbH
4 * 4 *
5 * Adblock Plus is free software: you can redistribute it and/or modify 5 * Adblock Plus is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 3 as 6 * it under the terms of the GNU General Public License version 3 as
7 * published by the Free Software Foundation. 7 * published by the Free Software Foundation.
8 * 8 *
9 * Adblock Plus is distributed in the hope that it will be useful, 9 * Adblock Plus is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
(...skipping 91 matching lines...) Expand 10 before | Expand all | Expand 10 after
102 * case, a hostname string (or undefined) and a bool 102 * case, a hostname string (or undefined) and a bool
103 * indicating if the source only contains a hostname or not: 103 * indicating if the source only contains a hostname or not:
104 * {regexp: "...", 104 * {regexp: "...",
105 * canSafelyMatchAsLowercase: true/false, 105 * canSafelyMatchAsLowercase: true/false,
106 * hostname: "...", 106 * hostname: "...",
107 * justHostname: true/false} 107 * justHostname: true/false}
108 */ 108 */
109 function parseFilterRegexpSource(text) 109 function parseFilterRegexpSource(text)
110 { 110 {
111 let regexp = []; 111 let regexp = [];
112
113 // Convert the text into an array of Unicode characters.
114 //
115 // In the case of surrogate pairs (the smiley emoji, for example), one
116 // Unicode code point is represented by two JavaScript characters together.
117 // We want to iterate over Unicode code points rather than JavaScript
118 // characters.
112 let characters = Array.from(text); 119 let characters = Array.from(text);
Manish Jethani 2017/06/19 14:41:06 We just need to convert the string into an array t
kzar 2017/07/07 12:18:07 Maybe add a comment about this?
Manish Jethani 2017/07/08 06:02:19 Done.
120
113 let lastIndex = characters.length - 1; 121 let lastIndex = characters.length - 1;
114 let hostname; 122 let hostname;
115 let hostnameStart = null; 123 let hostnameStart = null;
116 let hostnameFinished = false; 124 let hostnameFinished = false;
117 let justHostname = false; 125 let justHostname = false;
118 let canSafelyMatchAsLowercase = false; 126 let canSafelyMatchAsLowercase = false;
119 127
120 for (let i = 0; i < characters.length; i++) 128 for (let i = 0; i < characters.length; i++)
121 { 129 {
122 let c = characters[i]; 130 let c = characters[i];
Manish Jethani 2017/06/19 14:41:06 Note that c.length is 2 if the character is a surr
kzar 2017/07/07 12:18:07 Acknowledged.
123 131
124 if (hostnameFinished) 132 if (hostnameFinished)
125 justHostname = false; 133 justHostname = false;
126 134
127 // If we're currently inside the hostname we have to be careful not to 135 // If we're currently inside the hostname we have to be careful not to
128 // escape any characters until after we have converted it to punycode. 136 // escape any characters until after we have converted it to punycode.
129 if (hostnameStart != null && !hostnameFinished) 137 if (hostnameStart != null && !hostnameFinished)
130 { 138 {
131 let endingChar = (c == "*" || c == "^" || 139 let endingChar = (c == "*" || c == "^" ||
132 c == "?" || c == "/" || c == "|"); 140 c == "?" || c == "/" || c == "|");
(...skipping 50 matching lines...) Expand 10 before | Expand all | Expand 10 after
183 break; 191 break;
184 case ".": case "+": case "$": case "?": 192 case ".": case "+": case "$": case "?":
185 case "{": case "}": case "(": case ")": 193 case "{": case "}": case "(": case ")":
186 case "[": case "]": case "\\": 194 case "[": case "]": case "\\":
187 regexp.push("\\", c); 195 regexp.push("\\", c);
188 break; 196 break;
189 default: 197 default:
190 if (hostnameFinished && (c >= "a" && c <= "z" || 198 if (hostnameFinished && (c >= "a" && c <= "z" ||
191 c >= "A" && c <= "Z")) 199 c >= "A" && c <= "Z"))
192 canSafelyMatchAsLowercase = false; 200 canSafelyMatchAsLowercase = false;
193 regexp.push(c == "%" ? c : encodeURI(c)); 201 regexp.push(c == "%" ? c : encodeURI(c));
Manish Jethani 2017/06/19 14:41:06 Do not encode the percent sign because that means
194 } 202 }
195 } 203 }
196 204
197 return { 205 return {
198 regexp: regexp.join(""), 206 regexp: regexp.join(""),
199 canSafelyMatchAsLowercase: canSafelyMatchAsLowercase, 207 canSafelyMatchAsLowercase: canSafelyMatchAsLowercase,
200 hostname: hostname, 208 hostname: hostname,
201 justHostname: justHostname 209 justHostname: justHostname
202 }; 210 };
203 } 211 }
(...skipping 336 matching lines...) Expand 10 before | Expand all | Expand 10 after
540 { 548 {
541 convertFilterAddRules(rules, filter, "block", true, 549 convertFilterAddRules(rules, filter, "block", true,
542 requestFilterExceptionDomains); 550 requestFilterExceptionDomains);
543 } 551 }
544 552
545 for (let filter of this.requestExceptions) 553 for (let filter of this.requestExceptions)
546 convertFilterAddRules(rules, filter, "ignore-previous-rules", true); 554 convertFilterAddRules(rules, filter, "ignore-previous-rules", true);
547 555
548 return rules.filter(rule => !hasNonASCI(rule)); 556 return rules.filter(rule => !hasNonASCI(rule));
549 }; 557 };
LEFTRIGHT

Powered by Google App Engine
This is Rietveld