lib/abp2blocklist.js - Issue 29468575: Issue 5332 - Support Unicode characters in path

Side by Side Diff: lib/abp2blocklist.js

Issue 29468575: Issue 5332 - Support Unicode characters in path (Closed) Base URL: https://hg.adblockplus.org/abp2blocklist

Patch Set: Add comment about Unicode surrogate pairs Created July 8, 2017, 6 a.m.

Left:
Right:

Use n/p to move between diff chunks; N/P to move between comments.

Jump to:

View unified diff | Download patch

OLD	NEW
1 /*	1 /*

2 * This file is part of Adblock Plus <https://adblockplus.org/>,	2 * This file is part of Adblock Plus <https://adblockplus.org/>,

3 * Copyright (C) 2006-2017 eyeo GmbH	3 * Copyright (C) 2006-2017 eyeo GmbH

4 *	4 *

5 * Adblock Plus is free software: you can redistribute it and/or modify	5 * Adblock Plus is free software: you can redistribute it and/or modify

6 * it under the terms of the GNU General Public License version 3 as	6 * it under the terms of the GNU General Public License version 3 as

7 * published by the Free Software Foundation.	7 * published by the Free Software Foundation.

8 *	8 *

9 * Adblock Plus is distributed in the hope that it will be useful,	9 * Adblock Plus is distributed in the hope that it will be useful,

10 * but WITHOUT ANY WARRANTY; without even the implied warranty of	10 * but WITHOUT ANY WARRANTY; without even the implied warranty of

(...skipping 91 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
102 * case, a hostname string (or undefined) and a bool	102 * case, a hostname string (or undefined) and a bool

103 * indicating if the source only contains a hostname or not:	103 * indicating if the source only contains a hostname or not:

104 * {regexp: "...",	104 * {regexp: "...",

105 * canSafelyMatchAsLowercase: true/false,	105 * canSafelyMatchAsLowercase: true/false,

106 * hostname: "...",	106 * hostname: "...",

107 * justHostname: true/false}	107 * justHostname: true/false}

108 */	108 */

109 function parseFilterRegexpSource(text)	109 function parseFilterRegexpSource(text)

110 {	110 {

111 let regexp = [];	111 let regexp = [];

112 let lastIndex = text.length - 1;	112

	113 // Convert the text into an array of Unicode characters.

	114 //

	115 // In the case of surrogate pairs (the smiley emoji, for example), one

	116 // Unicode code point is represented by two JavaScript characters together.

	117 // We want to iterate over Unicode code points rather than JavaScript

	118 // characters.

	119 let characters = Array.from(text);

	120

	121 let lastIndex = characters.length - 1;

113 let hostname;	122 let hostname;

114 let hostnameStart = null;	123 let hostnameStart = null;

115 let hostnameFinished = false;	124 let hostnameFinished = false;

116 let justHostname = false;	125 let justHostname = false;

117 let canSafelyMatchAsLowercase = false;	126 let canSafelyMatchAsLowercase = false;

118	127

119 for (let i = 0; i < text.length; i++)	128 for (let i = 0; i < characters.length; i++)

120 {	129 {

121 let c = text[i];	130 let c = characters[i];

122	131

123 if (hostnameFinished)	132 if (hostnameFinished)

124 justHostname = false;	133 justHostname = false;

125	134

126 // If we're currently inside the hostname we have to be careful not to	135 // If we're currently inside the hostname we have to be careful not to

127 // escape any characters until after we have converted it to punycode.	136 // escape any characters until after we have converted it to punycode.

128 if (hostnameStart != null && !hostnameFinished)	137 if (hostnameStart != null && !hostnameFinished)

129 {	138 {

130 let endingChar = (c == "*" \|\| c == "^" \|\|	139 let endingChar = (c == "*" \|\| c == "^" \|\|

131 c == "?" \|\| c == "/" \|\| c == "\|");	140 c == "?" \|\| c == "/" \|\| c == "\|");

132 if (!endingChar && i != lastIndex)	141 if (!endingChar && i != lastIndex)

133 continue;	142 continue;

134	143

135 hostname = punycode.toASCII(	144 hostname = punycode.toASCII(

136 text.substring(hostnameStart, endingChar ? i : i + 1)	145 characters.slice(hostnameStart, endingChar ? i : i + 1).join("")

137 );	146 );

138 hostnameFinished = justHostname = true;	147 hostnameFinished = justHostname = true;

139 regexp.push(escapeRegExp(hostname));	148 regexp.push(escapeRegExp(hostname));

140 if (!endingChar)	149 if (!endingChar)

141 break;	150 break;

142 }	151 }

143	152

144 switch (c)	153 switch (c)

145 {	154 {

146 case "*":	155 case "*":

147 if (regexp.length > 0 && i < lastIndex && text[i + 1] != "*")	156 if (regexp.length > 0 && i < lastIndex && characters[i + 1] != "*")

148 regexp.push(".*");	157 regexp.push(".*");

149 break;	158 break;

150 case "^":	159 case "^":

151 if (i < lastIndex)	160 if (i < lastIndex)

152 regexp.push(".");	161 regexp.push(".");

153 break;	162 break;

154 case "\|":	163 case "\|":

155 if (i == 0)	164 if (i == 0)

156 {	165 {

157 regexp.push("^");	166 regexp.push("^");

158 break;	167 break;

159 }	168 }

160 if (i == lastIndex)	169 if (i == lastIndex)

161 {	170 {

162 regexp.push("$");	171 regexp.push("$");

163 break;	172 break;

164 }	173 }

165 if (i == 1 && text[0] == "\|")	174 if (i == 1 && characters[0] == "\|")

166 {	175 {

167 hostnameStart = i + 1;	176 hostnameStart = i + 1;

168 canSafelyMatchAsLowercase = true;	177 canSafelyMatchAsLowercase = true;

169 regexp.push("https?://([^/]+\\.)?");	178 regexp.push("https?://([^/]+\\.)?");

170 break;	179 break;

171 }	180 }

172 regexp.push("\\\|");	181 regexp.push("\\\|");

173 break;	182 break;

174 case "/":	183 case "/":

175 if (!hostnameFinished &&	184 if (!hostnameFinished &&

176 text.charAt(i-2) == ":" && text.charAt(i-1) == "/")	185 characters[i - 2] == ":" && characters[i - 1] == "/")

177 {	186 {

178 hostnameStart = i + 1;	187 hostnameStart = i + 1;

179 canSafelyMatchAsLowercase = true;	188 canSafelyMatchAsLowercase = true;

180 }	189 }

181 regexp.push("/");	190 regexp.push("/");

182 break;	191 break;

183 case ".": case "+": case "$": case "?":	192 case ".": case "+": case "$": case "?":

184 case "{": case "}": case "(": case ")":	193 case "{": case "}": case "(": case ")":

185 case "[": case "]": case "\\":	194 case "[": case "]": case "\\":

186 regexp.push("\\", c);	195 regexp.push("\\", c);

187 break;	196 break;

188 default:	197 default:

189 if (hostnameFinished && (c >= "a" && c <= "z" \|\|	198 if (hostnameFinished && (c >= "a" && c <= "z" \|\|

190 c >= "A" && c <= "Z"))	199 c >= "A" && c <= "Z"))

191 canSafelyMatchAsLowercase = false;	200 canSafelyMatchAsLowercase = false;

192 regexp.push(c);	201 regexp.push(c == "%" ? c : encodeURI(c));

193 }	202 }

194 }	203 }

195	204

196 return {	205 return {

197 regexp: regexp.join(""),	206 regexp: regexp.join(""),

198 canSafelyMatchAsLowercase: canSafelyMatchAsLowercase,	207 canSafelyMatchAsLowercase: canSafelyMatchAsLowercase,

199 hostname: hostname,	208 hostname: hostname,

200 justHostname: justHostname	209 justHostname: justHostname

201 };	210 };

202 }	211 }

(...skipping 336 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
539 {	548 {

540 convertFilterAddRules(rules, filter, "block", true,	549 convertFilterAddRules(rules, filter, "block", true,

541 requestFilterExceptionDomains);	550 requestFilterExceptionDomains);

542 }	551 }

543	552

544 for (let filter of this.requestExceptions)	553 for (let filter of this.requestExceptions)

545 convertFilterAddRules(rules, filter, "ignore-previous-rules", true);	554 convertFilterAddRules(rules, filter, "ignore-previous-rules", true);

546	555

547 return rules.filter(rule => !hasNonASCI(rule));	556 return rules.filter(rule => !hasNonASCI(rule));

548 };	557 };

OLD	NEW

« no previous file with comments | « no previous file | test/abp2blocklist.js » ('j') | no next file with comments »