lib/abp2blocklist.js - Issue 29426594: Issue 3673 - Merge closely matching rules

Delta Between Two Patch Sets: lib/abp2blocklist.js

Issue 29426594: Issue 3673 - Merge closely matching rules (Closed) Base URL: https://hg.adblockplus.org/abp2blocklist

Left Patch Set: Make it work in Safari Created May 5, 2017, 4:36 a.m.

Right Patch Set: Rebase Created July 28, 2017, 1:31 p.m.

Left:
Right:

Use n/p to move between diff chunks; N/P to move between comments.

Jump to:

Left: Side by side diff | Download
Right: Side by side diff | Download

LEFT	RIGHT
1 /*	1 /*

2 * This file is part of Adblock Plus <https://adblockplus.org/>,	2 * This file is part of Adblock Plus <https://adblockplus.org/>,

3 * Copyright (C) 2006-2017 eyeo GmbH	3 * Copyright (C) 2006-2017 eyeo GmbH

4 *	4 *

5 * Adblock Plus is free software: you can redistribute it and/or modify	5 * Adblock Plus is free software: you can redistribute it and/or modify

6 * it under the terms of the GNU General Public License version 3 as	6 * it under the terms of the GNU General Public License version 3 as

7 * published by the Free Software Foundation.	7 * published by the Free Software Foundation.

8 *	8 *

9 * Adblock Plus is distributed in the hope that it will be useful,	9 * Adblock Plus is distributed in the hope that it will be useful,

10 * but WITHOUT ANY WARRANTY; without even the implied warranty of	10 * but WITHOUT ANY WARRANTY; without even the implied warranty of

11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the	11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

12 * GNU General Public License for more details.	12 * GNU General Public License for more details.

13 *	13 *

14 * You should have received a copy of the GNU General Public License	14 * You should have received a copy of the GNU General Public License

15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.	15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.

16 */	16 */

17	17

18 /** @module abp2blocklist */	18 /** @module abp2blocklist */

19	19

20 "use strict";	20 "use strict";

21	21

22 let filterClasses = require("filterClasses");	22 let filterClasses = require("filterClasses");

23 let tldjs = require("tldjs");

24 let punycode = require("punycode");	23 let punycode = require("punycode");

25	24

26 const selectorLimit = 5000;	25 const selectorLimit = 5000;

27 const typeMap = filterClasses.RegExpFilter.typeMap;	26 const typeMap = filterClasses.RegExpFilter.typeMap;

28 const whitelistableRequestTypes = (typeMap.IMAGE	27

29 \| typeMap.STYLESHEET	28 const httpRequestTypes = typeMap.IMAGE \|

30 \| typeMap.SCRIPT	29 typeMap.STYLESHEET \|

31 \| typeMap.FONT	30 typeMap.SCRIPT \|

32 \| typeMap.MEDIA	31 typeMap.FONT \|

33 \| typeMap.POPUP	32 typeMap.MEDIA \|

34 \| typeMap.OBJECT	33 typeMap.POPUP \|

35 \| typeMap.OBJECT_SUBREQUEST	34 typeMap.OBJECT \|

36 \| typeMap.XMLHTTPREQUEST	35 typeMap.OBJECT_SUBREQUEST \|

37 \| typeMap.PING	36 typeMap.XMLHTTPREQUEST \|

38 \| typeMap.SUBDOCUMENT	37 typeMap.PING \|

39 \| typeMap.OTHER);	38 typeMap.SUBDOCUMENT \|

40	39 typeMap.OTHER;

41 function pearsonHash(message)	40 const rawRequestTypes = typeMap.XMLHTTPREQUEST \|

42 {	41 typeMap.WEBSOCKET \|

43 // This is an implementation of the Pearson hashing algorithm, where we	42 typeMap.WEBRTC \|

44 // generate a 32-bit digest for a given message.	43 typeMap.OBJECT_SUBREQUEST \|

45	44 typeMap.PING \|

46 // Note that this code will only look at the lowest 8 bits of each character.	45 typeMap.OTHER;

47 // For best results, encode the input as UTF-8.	46 const whitelistableRequestTypes = httpRequestTypes \|

48	47 typeMap.WEBSOCKET \|

49 // A table of all numbers from 0 through 255 shuffled up.	48 typeMap.WEBRTC;

50 const table = [	49

51 0xe5, 0x0a, 0x9f, 0x79, 0x99, 0xad, 0x10, 0x85, 0x5d, 0x55, 0x75, 0x2e,	50 function callLater(func)

52 0x04, 0x1a, 0xb5, 0x7d, 0x96, 0xe6, 0xa3, 0xc6, 0x82, 0x87, 0xb2, 0xef,	51 {

53 0x00, 0x64, 0x70, 0x4b, 0xe4, 0x2f, 0x37, 0x52, 0x90, 0x1d, 0x08, 0x68,	52 return new Promise(resolve =>

54 0x3a, 0x26, 0x74, 0xaa, 0xc1, 0x80, 0x17, 0xc2, 0x0f, 0xec, 0xc8, 0x1f,	53 {

55 0xe2, 0x3c, 0xe1, 0xa1, 0x8f, 0xfd, 0x9d, 0x0b, 0xd5, 0xcc, 0xd4, 0xd9,	54 let call = () => resolve(func());

56 0xf0, 0x3d, 0x5e, 0x57, 0xae, 0x12, 0x46, 0xb0, 0x63, 0x94, 0x61, 0x9a,	55

57 0xbb, 0x76, 0x0c, 0x3f, 0xc4, 0x59, 0xdc, 0x5c, 0xb4, 0xc7, 0x73, 0x39,	56 // If this looks like Node.js, call process.nextTick, otherwise call

58 0x65, 0x2c, 0x2a, 0xc3, 0xed, 0x20, 0x54, 0xfe, 0xfb, 0xd0, 0xa6, 0x33,	57 // setTimeout.

59 0x4a, 0x9e, 0xe7, 0x49, 0xea, 0x58, 0xaf, 0x35, 0x30, 0x95, 0x2b, 0x56,	58 if (typeof process != "undefined")

60 0x14, 0xff, 0xb1, 0xd6, 0x27, 0x6a, 0x88, 0x89, 0x43, 0x4c, 0xca, 0xb9,	59 process.nextTick(call);

61 0x21, 0x8a, 0x78, 0xf1, 0x18, 0x1e, 0xd1, 0xe0, 0x60, 0xf8, 0x3e, 0xdd,	60 else

62 0x25, 0x16, 0xde, 0xc0, 0x98, 0x28, 0x7a, 0x3b, 0x1b, 0x45, 0xa5, 0xb3,	61 setTimeout(call, 0);

63 0xe3, 0x84, 0xd3, 0xb8, 0xbd, 0x47, 0xe9, 0xfa, 0xc9, 0xb6, 0xbe, 0x6c,	62 });

64 0x9c, 0xac, 0xda, 0xfc, 0x41, 0x0d, 0x07, 0x91, 0x6b, 0x6f, 0x03, 0xcb,	63 }

65 0xbc, 0x8d, 0x06, 0x01, 0xd2, 0x8c, 0x19, 0x5a, 0x02, 0xba, 0x4f, 0x6e,	64

66 0x2d, 0xf4, 0xcf, 0x7e, 0x6d, 0x42, 0x93, 0x31, 0xbf, 0xdf, 0x5f, 0x67,	65 function async(callees, mapFunction)

67 0x53, 0xf6, 0x38, 0x9b, 0xa7, 0xe8, 0xee, 0x5b, 0x0e, 0x22, 0xdb, 0x51,	66 {

68 0x8e, 0x69, 0x97, 0x32, 0x36, 0xce, 0x77, 0x4d, 0xa4, 0xf2, 0x23, 0xc5,	67 if (!(Symbol.iterator in callees))

69 0x11, 0x05, 0xab, 0xf9, 0x13, 0xd8, 0x7b, 0xa8, 0x40, 0x66, 0xd7, 0x24,	68 callees = [callees];

70 0x86, 0xa0, 0xeb, 0xf3, 0x81, 0x4e, 0x50, 0xf7, 0xb7, 0x7f, 0x83, 0xa2,	69

71 0xa9, 0x09, 0xcd, 0x62, 0x7c, 0x92, 0x8b, 0x71, 0x44, 0xf5, 0x72, 0x1c,	70 let lastPause = Date.now();

72 0x29, 0x48, 0x34, 0x15	71 let index = 0;

73 ];	72

74	73 let promise = Promise.resolve();

75 let digest = 0;	74

76	75 for (let next of callees)

77 for (let i = 0; i < 4; i++)	76 {

78 {	77 let currentIndex = index;

79 let d = table[(message.charCodeAt(0) + i) % 256];	78

80 for (let j = 1; j < message.length; j++)	79 promise = promise.then(() =>

81 d = table[d ^ message.charCodeAt(j)];	80 {

82 digest \|= (d & 0xff) << i * 8;	81 if (mapFunction)

83 }	82 next = mapFunction(next, currentIndex);

84	83

85 return digest;	84 // If it has been 100ms or longer since the last call, take a pause. This

	85 // keeps the browser from freezing up.

	86 let now = Date.now();

	87 if (now - lastPause >= 100)

	88 {

	89 lastPause = now;

	90 return callLater(next);

	91 }

	92

	93 return next();

	94 });

	95

	96 index++;

	97 }

	98

	99 return promise;

86 }	100 }

87	101

88 function parseDomains(domains, included, excluded)	102 function parseDomains(domains, included, excluded)

89 {	103 {

90 for (let domain in domains)	104 for (let domain in domains)

91 {	105 {

92 if (domain != "")	106 if (domain != "")

93 {	107 {

94 let enabled = domains[domain];	108 let enabled = domains[domain];

95 domain = punycode.toASCII(domain.toLowerCase());	109 domain = punycode.toASCII(domain.toLowerCase());

96	110

97 if (!enabled)	111 if (!enabled)

98 excluded.push(domain);	112 excluded.push(domain);

99 else if (!domains[""])	113 else if (!domains[""])

100 included.push(domain);	114 included.push(domain);

101 }	115 }

102 }	116 }

103 }	117 }

104	118

105 function escapeRegExp(s)	119 function escapeRegExp(s)

106 {	120 {

107 return s.replace(/[.*+?^${}()\|[\]\\]/g, "\\$&");	121 return s.replace(/[.*+?^${}()\|[\]\\]/g, "\\$&");

108 }	122 }

109	123

110 function matchDomain(domain)	124 function matchDomain(domain)

111 {	125 {

	126 if (!domain)

	127 return "^https?://";

	128

112 return "^https?://([^/:]*\\.)?" + escapeRegExp(domain).toLowerCase() + "[/:]";	129 return "^https?://([^/:]*\\.)?" + escapeRegExp(domain).toLowerCase() + "[/:]";

	130 }

	131

	132 function getURLSchemes(contentType)

	133 {

	134 // If the given content type includes all supported URL schemes, simply

	135 // return a single generic URL scheme pattern. This minimizes the size of the

	136 // generated rule set. The downside to this is that it will also match

	137 // schemes that we do not want to match (e.g. "ftp://"), but this can be

	138 // mitigated by adding exceptions for those schemes.

	139 if (contentType & typeMap.WEBSOCKET && contentType & typeMap.WEBRTC &&

	140 contentType & httpRequestTypes)

	141 return ["[^:]+:(//)?"];

	142

	143 let urlSchemes = [];

	144

	145 if (contentType & typeMap.WEBSOCKET)

	146 urlSchemes.push("wss?://");

	147

	148 if (contentType & typeMap.WEBRTC)

	149 urlSchemes.push("stuns?:", "turns?:");

	150

	151 if (contentType & httpRequestTypes)

	152 urlSchemes.push("https?://");

	153

	154 return urlSchemes;

	155 }

	156

	157 function findSubdomainsInList(domain, list)

	158 {

	159 let subdomains = [];

	160 let suffixLength = domain.length + 1;

	161

	162 for (let name of list)

	163 {

	164 if (name.length > suffixLength && name.slice(-suffixLength) == "." + domain)

	165 subdomains.push(name.slice(0, -suffixLength));

	166 }

	167

	168 return subdomains;

	169 }

	170

	171 function extractFilterDomains(filters)

	172 {

	173 let domains = new Set();

	174 for (let filter of filters)

	175 {

	176 let parsed = parseFilterRegexpSource(filter.regexpSource);

	177 if (parsed.justHostname)

	178 domains.add(parsed.hostname);

	179 }

	180 return domains;

113 }	181 }

114	182

115 function convertElemHideFilter(filter, elemhideSelectorExceptions)	183 function convertElemHideFilter(filter, elemhideSelectorExceptions)

116 {	184 {

117 let included = [];	185 let included = [];

118 let excluded = [];	186 let excluded = [];

119 let rules = [];

120	187

121 parseDomains(filter.domains, included, excluded);	188 parseDomains(filter.domains, included, excluded);

122	189

123 if (excluded.length == 0 && !(filter.selector in elemhideSelectorExceptions))	190 if (excluded.length == 0 && !(filter.selector in elemhideSelectorExceptions))

124 return {matchDomains: included.map(matchDomain), selector: filter.selector};	191 return {matchDomains: included, selector: filter.selector};

125 }	192 }

126	193

127 /**	194 /**

128 * Parse the given filter "regexpSource" string. Producing a regular expression,	195 * Parse the given filter "regexpSource" string. Producing a regular expression,

129 * extracting the hostname (if any), deciding if the regular expression is safe	196 * extracting the hostname (if any), deciding if the regular expression is safe

130 * to be converted + matched as lower case and noting if the source contains	197 * to be converted + matched as lower case and noting if the source contains

131 * anything after the hostname.)	198 * anything after the hostname.)

132 *	199 *

133 * @param {string} text regexpSource property of a filter	200 * @param {string} text regexpSource property of a filter

	201 * @param {string} urlScheme The URL scheme to use in the regular expression

134 * @returns {object} An object containing a regular expression string, a bool	202 * @returns {object} An object containing a regular expression string, a bool

135 * indicating if the filter can be safely matched as lower	203 * indicating if the filter can be safely matched as lower

136 * case, a hostname string (or undefined) and a bool	204 * case, a hostname string (or undefined) and a bool

137 * indicating if the source only contains a hostname or not:	205 * indicating if the source only contains a hostname or not:

138 * {regexp: "...",	206 * {regexp: "...",

139 * canSafelyMatchAsLowercase: true/false,	207 * canSafelyMatchAsLowercase: true/false,

140 * hostname: "...",	208 * hostname: "...",

141 * justHostname: true/false}	209 * justHostname: true/false}

142 */	210 */

143 function parseFilterRegexpSource(text)	211 function parseFilterRegexpSource(text, urlScheme)

144 {	212 {

145 let regexp = [];	213 let regexp = [];

146 let lastIndex = text.length - 1;	214

	215 // Convert the text into an array of Unicode characters.

	216 //

	217 // In the case of surrogate pairs (the smiley emoji, for example), one

	218 // Unicode code point is represented by two JavaScript characters together.

	219 // We want to iterate over Unicode code points rather than JavaScript

	220 // characters.

	221 let characters = Array.from(text);

	222

	223 let lastIndex = characters.length - 1;

147 let hostname;	224 let hostname;

148 let hostnameStart = null;	225 let hostnameStart = null;

149 let hostnameFinished = false;	226 let hostnameFinished = false;

150 let justHostname = false;	227 let justHostname = false;

151 let canSafelyMatchAsLowercase = false;	228 let canSafelyMatchAsLowercase = false;

152	229

153 for (let i = 0; i < text.length; i++)	230 if (!urlScheme)

154 {	231 urlScheme = getURLSchemes()[0];

155 let c = text[i];	232

	233 for (let i = 0; i < characters.length; i++)

	234 {

	235 let c = characters[i];

156	236

157 if (hostnameFinished)	237 if (hostnameFinished)

158 justHostname = false;	238 justHostname = false;

159	239

160 // If we're currently inside the hostname we have to be careful not to	240 // If we're currently inside the hostname we have to be careful not to

161 // escape any characters until after we have converted it to punycode.	241 // escape any characters until after we have converted it to punycode.

162 if (hostnameStart != null && !hostnameFinished)	242 if (hostnameStart != null && !hostnameFinished)

163 {	243 {

164 let endingChar = (c == "*" \|\| c == "^" \|\|	244 let endingChar = (c == "*" \|\| c == "^" \|\|

165 c == "?" \|\| c == "/" \|\| c == "\|");	245 c == "?" \|\| c == "/" \|\| c == "\|");

166 if (!endingChar && i != lastIndex)	246 if (!endingChar && i != lastIndex)

167 continue;	247 continue;

168	248

169 hostname = punycode.toASCII(	249 hostname = punycode.toASCII(

170 text.substring(hostnameStart, endingChar ? i : i + 1)	250 characters.slice(hostnameStart, endingChar ? i : i + 1).join("")

	251 .toLowerCase()

171 );	252 );

172 hostnameFinished = justHostname = true;	253 hostnameFinished = justHostname = true;

173 regexp.push(escapeRegExp(hostname));	254 regexp.push(escapeRegExp(hostname));

174 if (!endingChar)	255 if (!endingChar)

175 break;	256 break;

176 }	257 }

177	258

178 switch (c)	259 switch (c)

179 {	260 {

180 case "*":	261 case "*":

181 if (regexp.length > 0 && i < lastIndex && text[i + 1] != "*")	262 if (regexp.length > 0 && i < lastIndex && characters[i + 1] != "*")

182 regexp.push(".*");	263 regexp.push(".*");

183 break;	264 break;

184 case "^":	265 case "^":

185 if (i < lastIndex)	266 let alphabet = "a-z";

186 regexp.push(".");	267 // If justHostname is true and we've encountered a "^", it means we're

	268 // still in the hostname part of the URL. Since hostnames are always

	269 // lower case (Punycode), there's no need to include "A-Z" in the

	270 // pattern. Further, subsequent code may lower-case the entire regular

	271 // expression (if the URL contains only the hostname part), leaving us

	272 // with "a-za-z", which would be redundant.

	273 if (!justHostname)

	274 alphabet = "A-Z" + alphabet;

	275 let digits = "0-9";

	276 // Note that the "-" must appear first here in order to retain its

	277 // literal meaning within the brackets.

	278 let specialCharacters = "-_.%";

	279 let separator = "[^" + specialCharacters + alphabet + digits + "]";

	280 if (i == 0)

	281 regexp.push("^" + urlScheme + "(.*" + separator + ")?");

	282 else if (i == lastIndex)

	283 regexp.push("(" + separator + ".*)?$");

	284 else

	285 regexp.push(separator);

187 break;	286 break;

188 case "\|":	287 case "\|":

189 if (i == 0)	288 if (i == 0)

190 {	289 {

191 regexp.push("^");	290 regexp.push("^");

192 break;	291 break;

193 }	292 }

194 if (i == lastIndex)	293 if (i == lastIndex)

195 {	294 {

196 regexp.push("$");	295 regexp.push("$");

197 break;	296 break;

198 }	297 }

199 if (i == 1 && text[0] == "\|")	298 if (i == 1 && characters[0] == "\|")

200 {	299 {

201 hostnameStart = i + 1;	300 hostnameStart = i + 1;

202 canSafelyMatchAsLowercase = true;	301 canSafelyMatchAsLowercase = true;

203 regexp.push("https?://([^/]+\\.)?");	302 regexp.push(urlScheme + "([^/]+\\.)?");

204 break;	303 break;

205 }	304 }

206 regexp.push("\\\|");	305 regexp.push("\\\|");

207 break;	306 break;

208 case "/":	307 case "/":

209 if (!hostnameFinished &&	308 if (!hostnameFinished &&

210 text.charAt(i-2) == ":" && text.charAt(i-1) == "/")	309 characters[i - 2] == ":" && characters[i - 1] == "/")

211 {	310 {

212 hostnameStart = i + 1;	311 hostnameStart = i + 1;

213 canSafelyMatchAsLowercase = true;	312 canSafelyMatchAsLowercase = true;

214 }	313 }

215 regexp.push("/");	314 regexp.push("/");

216 break;	315 break;

217 case ".": case "+": case "$": case "?":	316 case ".": case "+": case "$": case "?":

218 case "{": case "}": case "(": case ")":	317 case "{": case "}": case "(": case ")":

219 case "[": case "]": case "\\":	318 case "[": case "]": case "\\":

220 regexp.push("\\", c);	319 regexp.push("\\", c);

221 break;	320 break;

222 default:	321 default:

223 if (hostnameFinished && (c >= "a" && c <= "z" \|\|	322 if (hostnameFinished && (c >= "a" && c <= "z" \|\|

224 c >= "A" && c <= "Z"))	323 c >= "A" && c <= "Z"))

225 canSafelyMatchAsLowercase = false;	324 canSafelyMatchAsLowercase = false;

226 regexp.push(c);	325 regexp.push(c == "%" ? c : encodeURI(c));

227 }	326 }

228 }	327 }

229	328

230 return {	329 return {

231 regexp: regexp.join(""),	330 regexp: regexp.join(""),

232 canSafelyMatchAsLowercase: canSafelyMatchAsLowercase,	331 canSafelyMatchAsLowercase: canSafelyMatchAsLowercase,

233 hostname: hostname,	332 hostname: hostname,

234 justHostname: justHostname	333 justHostname: justHostname

235 };	334 };

236 }	335 }

237	336

238 function getResourceTypes(filter)	337 function getResourceTypes(contentType)

239 {	338 {

240 let types = [];	339 let types = [];

241	340

242 if (filter.contentType & typeMap.IMAGE)	341 if (contentType & typeMap.IMAGE)

243 types.push("image");	342 types.push("image");

244 if (filter.contentType & typeMap.STYLESHEET)	343 if (contentType & typeMap.STYLESHEET)

245 types.push("style-sheet");	344 types.push("style-sheet");

246 if (filter.contentType & typeMap.SCRIPT)	345 if (contentType & typeMap.SCRIPT)

247 types.push("script");	346 types.push("script");

248 if (filter.contentType & typeMap.FONT)	347 if (contentType & typeMap.FONT)

249 types.push("font");	348 types.push("font");

250 if (filter.contentType & (typeMap.MEDIA \| typeMap.OBJECT))	349 if (contentType & (typeMap.MEDIA \| typeMap.OBJECT))

251 types.push("media");	350 types.push("media");

252 if (filter.contentType & typeMap.POPUP)	351 if (contentType & typeMap.POPUP)

253 types.push("popup");	352 types.push("popup");

254 if (filter.contentType & (typeMap.XMLHTTPREQUEST \|	353 if (contentType & rawRequestTypes)

255 typeMap.OBJECT_SUBREQUEST \|

256 typeMap.PING \|

257 typeMap.OTHER))

258 types.push("raw");	354 types.push("raw");

259 if (filter.contentType & typeMap.SUBDOCUMENT)	355 if (contentType & typeMap.SUBDOCUMENT)

260 types.push("document");	356 types.push("document");

261	357

262 return types;	358 return types;

263 }	359 }

264	360

265 function addDomainPrefix(domains)	361 function makeRuleCopies(trigger, action, urlSchemes)

266 {	362 {

267 let result = [];	363 let copies = [];

268	364

269 for (let domain of domains)	365 // Always make a deep copy of the rule, since rules may have to be

270 {	366 // manipulated individually at a later stage.

271 result.push(domain);	367 let stringifiedTrigger = JSON.stringify(trigger);

272	368

273 if (tldjs.getDomain(domain) == domain)	369 let filterPattern = trigger["url-filter"].substring(1);

274 result.push("www." + domain);	370 let startIndex = 0;

275 }	371

276	372 // If the URL filter already begins with the first URL scheme pattern, skip

277 return result;	373 // it.

278 }	374 if (trigger["url-filter"].startsWith("^" + urlSchemes[0]))

279	375 {

280 function convertFilterAddRules(rules, filter, action, withResourceTypes)	376 filterPattern = filterPattern.substring(urlSchemes[0].length);

281 {	377 startIndex = 1;

282 let parsed = parseFilterRegexpSource(filter.regexpSource);	378 }

	379 else

	380 {

	381 filterPattern = ".*" + filterPattern;

	382 }

	383

	384 for (let i = startIndex; i < urlSchemes.length; i++)

	385 {

	386 let copyTrigger = Object.assign(JSON.parse(stringifiedTrigger), {

	387 "url-filter": "^" + urlSchemes[i] + filterPattern

	388 });

	389 copies.push({trigger: copyTrigger, action});

	390 }

	391

	392 return copies;

	393 }

	394

	395 function excludeTopURLFromTrigger(trigger)

	396 {

	397 trigger["unless-top-url"] = [trigger["url-filter"]];

	398 if (trigger["url-filter-is-case-sensitive"])

	399 trigger["top-url-filter-is-case-sensitive"] = true;

	400 }

	401

	402 function convertFilterAddRules(rules, filter, action, withResourceTypes,

	403 exceptionDomains, contentType)

	404 {

	405 if (!contentType)

	406 contentType = filter.contentType;

	407

	408 // If WebSocket or WebRTC are given along with other options but not

	409 // including all three of WebSocket, WebRTC, and at least one HTTP raw type,

	410 // we must generate multiple rules. For example, for the filter

	411 // "foo$websocket,image", we must generate one rule with "^wss?://" and "raw"

	412 // and another rule with "^https?://" and "image". If we merge the two, we

	413 // end up blocking requests of all HTTP raw types (e.g. XMLHttpRequest)

	414 // inadvertently.

	415 if ((contentType & typeMap.WEBSOCKET && contentType != typeMap.WEBSOCKET &&

	416 !(contentType & typeMap.WEBRTC &&

	417 contentType & rawRequestTypes & httpRequestTypes)) \|\|

	418 (contentType & typeMap.WEBRTC && contentType != typeMap.WEBRTC &&

	419 !(contentType & typeMap.WEBSOCKET &&

	420 contentType & rawRequestTypes & httpRequestTypes)))

	421 {

	422 if (contentType & typeMap.WEBSOCKET)

	423 {

	424 convertFilterAddRules(rules, filter, action, withResourceTypes,

	425 exceptionDomains, typeMap.WEBSOCKET);

	426 }

	427

	428 if (contentType & typeMap.WEBRTC)

	429 {

	430 convertFilterAddRules(rules, filter, action, withResourceTypes,

	431 exceptionDomains, typeMap.WEBRTC);

	432 }

	433

	434 contentType &= ~(typeMap.WEBSOCKET \| typeMap.WEBRTC);

	435

	436 if (!contentType)

	437 return;

	438 }

	439

	440 let urlSchemes = getURLSchemes(contentType);

	441 let parsed = parseFilterRegexpSource(filter.regexpSource, urlSchemes[0]);

283	442

284 // For the special case of $document whitelisting filters with just a domain	443 // For the special case of $document whitelisting filters with just a domain

285 // we can generate an equivalent blocking rule exception using if-domain.	444 // we can generate an equivalent blocking rule exception using if-domain.

286 if (filter instanceof filterClasses.WhitelistFilter &&	445 if (filter instanceof filterClasses.WhitelistFilter &&

287 filter.contentType & typeMap.DOCUMENT &&	446 contentType & typeMap.DOCUMENT &&

288 parsed.justHostname)	447 parsed.justHostname)

289 {	448 {

290 rules.push({	449 rules.push({

291 trigger: {	450 trigger: {

292 "url-filter": ".*",	451 "url-filter": ".*",

293 "if-domain": addDomainPrefix([parsed.hostname])	452 "if-domain": ["*" + parsed.hostname]

294 },	453 },

295 action: {type: "ignore-previous-rules"}	454 action: {type: "ignore-previous-rules"}

296 });	455 });

297 // If the filter contains other supported options we'll need to generate	456 // If the filter contains other supported options we'll need to generate

298 // further rules for it, but if not we can simply return now.	457 // further rules for it, but if not we can simply return now.

299 if (!(filter.contentType & whitelistableRequestTypes))	458 if (!(contentType & whitelistableRequestTypes))

300 return;	459 return;

301 }	460 }

302	461

303 let trigger = {"url-filter": parsed.regexp};	462 let trigger = {"url-filter": parsed.regexp};

304	463

305 // Limit rules to HTTP(S) URLs	464 // If the URL filter begins with one of the URL schemes for this content

306 if (!/^(\^\|http)/i.test(trigger["url-filter"]))	465 // type, we generate additional rules for all the URL scheme patterns;

307 trigger["url-filter"] = "^https?://.*" + trigger["url-filter"];	466 // otherwise, if the start of the URL filter literally matches the first URL

	467 // scheme pattern, we just generate additional rules for the remaining URL

	468 // scheme patterns.

	469 //

	470 // For example, "stun:foo$webrtc" will give us "stun:foo", then we add a "^"

	471 // in front of this and generate two additional rules for

	472 // "^stuns?:.stun:foo" and "^turns?:.stun:foo". On the other hand,

	473 // "\|\|foo$webrtc" will give us "^stuns?:([^/]+\\.)?foo", so we just generate

	474 // "^turns?:([^/]+\\.)?foo" in addition.

	475 //

	476 // Note that the filter can be already anchored to the beginning

	477 // (e.g. "\|stun:foo$webrtc"), in which case we do not generate any additional

	478 // rules.

	479 let needAltRules = trigger["url-filter"][0] != "^" \|\|

	480 trigger["url-filter"].startsWith("^" + urlSchemes[0]);

	481

	482 if (trigger["url-filter"][0] != "^")

	483 {

	484 if (!urlSchemes.some(scheme => new RegExp("^" + scheme)

	485 .test(trigger["url-filter"])))

	486 {

	487 trigger["url-filter"] = urlSchemes[0] + ".*" + trigger["url-filter"];

	488 }

	489

	490 trigger["url-filter"] = "^" + trigger["url-filter"];

	491 }

308	492

309 // For rules containing only a hostname we know that we're matching against	493 // For rules containing only a hostname we know that we're matching against

310 // a lowercase string unless the matchCase option was passed.	494 // a lowercase string unless the matchCase option was passed.

311 if (parsed.canSafelyMatchAsLowercase && !filter.matchCase)	495 if (parsed.canSafelyMatchAsLowercase && !filter.matchCase)

312 trigger["url-filter"] = trigger["url-filter"].toLowerCase();	496 trigger["url-filter"] = trigger["url-filter"].toLowerCase();

313	497

314 if (parsed.canSafelyMatchAsLowercase \|\| filter.matchCase)	498 if (parsed.canSafelyMatchAsLowercase \|\| filter.matchCase)

315 trigger["url-filter-is-case-sensitive"] = true;	499 trigger["url-filter-is-case-sensitive"] = true;

316	500

317 let included = [];	501 let included = [];

318 let excluded = [];	502 let excluded = [];

319	503

320 parseDomains(filter.domains, included, excluded);	504 parseDomains(filter.domains, included, excluded);

321	505

	506 if (exceptionDomains)

	507 excluded = excluded.concat(exceptionDomains);

	508

322 if (withResourceTypes)	509 if (withResourceTypes)

323 {	510 {

324 trigger["resource-type"] = getResourceTypes(filter);	511 let resourceTypes = getResourceTypes(contentType);

325	512

326 if (trigger["resource-type"].length == 0)	513 // Content blocker rules can't differentiate between sub-document requests

	514 // (iframes) and top-level document requests. To avoid too many false

	515 // positives, we prevent rules with no hostname part from blocking document

	516 // requests.

	517 //

	518 // Once Safari 11 becomes our minimum supported version, we could change

	519 // our approach here to use the new "unless-top-url" property instead.

	520 if (filter instanceof filterClasses.BlockingFilter && !parsed.hostname)

	521 resourceTypes = resourceTypes.filter(type => type != "document");

	522

	523 if (resourceTypes.length == 0)

327 return;	524 return;

	525

	526 trigger["resource-type"] = resourceTypes;

328 }	527 }

329	528

330 if (filter.thirdParty != null)	529 if (filter.thirdParty != null)

331 trigger["load-type"] = [filter.thirdParty ? "third-party" : "first-party"];	530 trigger["load-type"] = [filter.thirdParty ? "third-party" : "first-party"];

332	531

	532 let addTopLevelException = false;

	533

333 if (included.length > 0)	534 if (included.length > 0)

334 trigger["if-domain"] = addDomainPrefix(included);	535 {

	536 trigger["if-domain"] = [];

	537

	538 for (let name of included)

	539 {

	540 // If this is a blocking filter or an element hiding filter, add the

	541 // subdomain wildcard only if no subdomains have been excluded.

	542 let notSubdomains = null;

	543 if ((filter instanceof filterClasses.BlockingFilter \|\|

	544 filter instanceof filterClasses.ElemHideFilter) &&

	545 (notSubdomains = findSubdomainsInList(name, excluded)).length > 0)

	546 {

	547 trigger["if-domain"].push(name);

	548

	549 // Add the "www" prefix but only if it hasn't been excluded.

	550 if (!notSubdomains.includes("www"))

	551 trigger["if-domain"].push("www." + name);

	552 }

	553 else

	554 {

	555 trigger["if-domain"].push("*" + name);

	556 }

	557 }

	558 }

335 else if (excluded.length > 0)	559 else if (excluded.length > 0)

336 trigger["unless-domain"] = addDomainPrefix(excluded);	560 {

	561 trigger["unless-domain"] = excluded.map(name => "*" + name);

	562 }

	563 else if (filter instanceof filterClasses.BlockingFilter &&

	564 filter.contentType & typeMap.SUBDOCUMENT && parsed.hostname)

	565 {

	566 // Rules with a hostname part are still allowed to block document requests,

	567 // but we add an exception for top-level documents.

	568 //

	569 // Note that we can only do this if there's no "unless-domain" property for

	570 // now. This also only works in Safari 11 onwards, while older versions

	571 // simply ignore this property. Once Safari 11 becomes our minimum

	572 // supported version, we can merge "unless-domain" into "unless-top-url".

	573 addTopLevelException = true;

	574 excludeTopURLFromTrigger(trigger);

	575 }

337	576

338 rules.push({trigger: trigger, action: {type: action}});	577 rules.push({trigger: trigger, action: {type: action}});

339 }	578

340	579 if (needAltRules)

341 function hasNonASCI(obj)	580 {

342 {	581 // Generate additional rules for any alternative URL schemes.

343 if (typeof obj == "string")	582 for (let altRule of makeRuleCopies(trigger, {type: action}, urlSchemes))

344 {	583 {

345 if (/[^\x00-\x7F]/.test(obj))	584 if (addTopLevelException)

346 return true;	585 excludeTopURLFromTrigger(altRule.trigger);

347 }	586

348	587 rules.push(altRule);

349 if (typeof obj == "object")	588 }

350 {	589 }

351 if (obj instanceof Array)

352 for (let item of obj)

353 if (hasNonASCI(item))

354 return true;

355

356 let names = Object.getOwnPropertyNames(obj);

357 for (let name of names)

358 if (hasNonASCI(obj[name]))

359 return true;

360 }

361

362 return false;

363 }	590 }

364	591

365 function convertIDSelectorsToAttributeSelectors(selector)	592 function convertIDSelectorsToAttributeSelectors(selector)

366 {	593 {

367 // First we figure out where all the IDs are	594 // First we figure out where all the IDs are

368 let sep = "";	595 let sep = "";

369 let start = null;	596 let start = null;

370 let positions = [];	597 let positions = [];

371 for (let i = 0; i < selector.length; i++)	598 for (let i = 0; i < selector.length; i++)

372 {	599 {

(...skipping 33 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
406 {	633 {

407 newSelector.push(selector.substring(i, pos.start));	634 newSelector.push(selector.substring(i, pos.start));

408 newSelector.push('[id=', selector.substring(pos.start + 1, pos.end), ']');	635 newSelector.push('[id=', selector.substring(pos.start + 1, pos.end), ']');

409 i = pos.end;	636 i = pos.end;

410 }	637 }

411 newSelector.push(selector.substring(i));	638 newSelector.push(selector.substring(i));

412	639

413 return newSelector.join("");	640 return newSelector.join("");

414 }	641 }

415	642

416 function closeMatch(s, t, singleCharacterOnly)	643 function addCSSRules(rules, selectors, domain, exceptionDomains)

417 {	644 {

418 // This function returns an edit operation, one of "substitute", "delete",	645 let unlessDomain = exceptionDomains.size > 0 ? [] : null;

419 // and "insert", along with an index in the source string where the edit must	646

420 // occur in order to arrive at the target string. If the strings are not a	647 exceptionDomains.forEach(name =>

421 // close match, it returns null.	648 {

422	649 // For domain-specific filters, include the exception domains only if

423 // If singleCharacterOnly is false, deletions or insertions of a contiguous	650 // they're subdomains of the given domain.

424 // range of characters from one string into the other, at the same index, are	651 if (!domain \|\| name.substr(-domain.length - 1) == "." + domain)

425 // treated as a single edit. For example, "internal" and "international" are	652 unlessDomain.push("*" + name);

426 // considered to be one edit apart, inserting the substring "tiona" from the	653 });

427 // latter into the former.	654

428	655 while (selectors.length)

429 // A few things to note:	656 {

430 //	657 let selector = selectors.splice(0, selectorLimit).join(", ");

431 // 1) This function does not care about how the input strings are treated	658

432 // by the caller. It only treats them as raw strings. For example, the	659 // As of Safari 9.0 element IDs are matched as lowercase. We work around

433 // caller may treat them as regular expressions, where "[ab]" and "[bc]"	660 // this by converting to the attribute format [id="elementID"]

434 // could be considered to have an edit distance of 1, since the order	661 selector = convertIDSelectorsToAttributeSelectors(selector);

435 // within the brackets does not matter. This function will still return	662

436 // null for this set of inputs since they are two edits apart.	663 let rule = {

437 //	664 trigger: {"url-filter": matchDomain(domain),

438 // 2) To be friendly to calling code that might be passing in regular	665 "url-filter-is-case-sensitive": true},

439 // expressions anyway, this function will simply return null if it	666 action: {type: "css-display-none",

440 // encounters a special character (e.g. "\", "?", "+", "*", etc.) in the	667 selector: selector}

441 // delta. For example, given "Hello" and "Hello, how are you?", it will	668 };

442 // return null instead of "{type: 'insert', index: 5, endIndex: 19}".	669

443 //	670 if (unlessDomain)

444 // 3) The calling code within this file does indeed pass in regular	671 rule.trigger["unless-domain"] = unlessDomain;

445 // expressions (the strict subset of JavaScript regular expressions	672

446 // supported by WebKit for content blockers), making the important	673 rules.push(rule);

447 // assumption that the parts where two such regular expressions may	674 }

448 // differ can always be treated as normal strings.	675 }

449 //	676

450 // For example, "^https?://./ads" and "^https?://./adv" differ only in	677 /**

451 // the last character, therefore the regular expressions can safely be	678 * Check if two strings are a close match

452 // merged into "^https?://.*/ad[sv]". If, for example, the characters in	679 *

453 // the delta were to appear within square brackets originally in the	680 * This function returns an edit operation, one of "substitute", "delete", and

454 // input strings (e.g. "^https?://./ad[sx]" and "^https?://./ad[vx]"),	681 * "insert", along with an index in the source string where the edit must occur

455 // the calling code would have to do extra work to merge the two regular	682 * in order to arrive at the target string. If the strings are not a close

456 // expressions correctly. The calling code within this file assumes that	683 * match, it returns null.

457 // this is never the case.	684 *

458	685 * Two strings are considered to be a close match if they are one edit

	686 * operation apart.

	687 *

	688 * Deletions or insertions of a contiguous range of characters from one string

	689 * into the other, at the same index, are treated as a single edit. For

	690 * example, "internal" and "international" are considered to be one edit apart

	691 * and therefore a close match.

	692 *

	693 * A few things to note:

	694 *

	695 * 1) This function does not care about the format of the input strings. For

	696 * example, the caller may pass in regular expressions, where "[ab]" and

	697 * "[bc]" could be considered to be a close match, since the order within the

	698 * brackets doesn't matter. This function will still return null for this set

	699 * of inputs since they are two edits apart.

	700 *

	701 * 2) To be friendly to calling code that might be passing in regular

	702 * expressions, this function will simply return null if it encounters a

	703 * special character (e.g. "\", "?", "+", etc.) in the delta. For example,

	704 * given "Hello" and "Hello, how are you?", it will return null.

	705 *

	706 * 3) If the caller does indeed pass in regular expressions, it must make the

	707 * important assumption that the parts where two such regular expressions may

	708 * differ can always be treated as normal strings. For example,

	709 * "^https?://example.com/ads" and "^https?://example.com/adv" differ only in

	710 * the last character, therefore the regular expressions can safely be merged

	711 * into "^https?://example.com/ad[sv]".

	712 *

	713 * @param {string} s The source string

	714 * @param {string} t The target string

	715 *

	716 * @returns {object} An object describing the single edit operation that must

	717 * occur in the source string in order to arrive at the

	718 * target string

	719 */

	720 function closeMatch(s, t)

	721 {

459 let diff = s.length - t.length;	722 let diff = s.length - t.length;

460

461 // If the string lengths differ by more than one character, we cannot arrive

462 // at target from source in a single edit operation.

463 if (singleCharacterOnly && (diff < -1 \|\| diff > 1))

464 return null;

465	723

466 // If target is longer than source, swap them for the purpose of our	724 // If target is longer than source, swap them for the purpose of our

467 // calculation.	725 // calculation.

468 if (diff < 0)	726 if (diff < 0)

469 {	727 {

470 let tmp = s;	728 let tmp = s;

471 s = t;	729 s = t;

472 t = tmp;	730 t = tmp;

473 }	731 }

474	732

475 let edit = null;	733 let edit = null;

476	734

477 // If the string lengths differ by only one character at most, use the simple	735 let i = 0;

478 // algorithm to find a single character edit.	736 let j = 0;

479 if (diff == 0 \|\| diff == 1 \|\| diff == -1)	737

480 {	738 // Start from the beginning and keep going until we hit a character that

481 for (let i = 0, j = 0; i < s.length; i++)	739 // doesn't match.

482 {	740 for (; i < s.length; i++)

483 if (s[i] == t[j])	741 {

484 {	742 if (s[i] != t[i])

485 j++;	743 break;

	744 }

	745

	746 // Now do exactly the same from the end, but also stop if we reach the

	747 // position where we terminated the previous loop.

	748 for (; j < t.length; j++)

	749 {

	750 if (t.length - j == i \|\| s[s.length - j - 1] != t[t.length - j - 1])

	751 break;

	752 }

	753

	754 if (diff == 0)

	755 {

	756 // If the strings are equal in length and the delta isn't exactly one

	757 // character, it's not a close match.

	758 if (t.length - j - i != 1)

	759 return null;

	760 }

	761 else if (i != t.length - j)

	762 {

	763 // For strings of unequal length, if we haven't found a match for every

	764 // single character in the shorter string counting from both the beginning

	765 // and the end, it's not a close match.

	766 return null;

	767 }

	768

	769 for (let k = i; k < s.length - j; k++)

	770 {

	771 // If the delta contains any special characters, it's not a close match.

	772 if (s[k] == "." \|\| s[k] == "+" \|\| s[k] == "$" \|\| s[k] == "?" \|\|

	773 s[k] == "{" \|\| s[k] == "}" \|\| s[k] == "(" \|\| s[k] == ")" \|\|

	774 s[k] == "[" \|\| s[k] == "]" \|\| s[k] == "\\")

	775 return null;

	776 }

	777

	778 if (diff == 0)

	779 {

	780 edit = {type: "substitute", index: i};

	781 }

	782 else if (diff > 0)

	783 {

	784 edit = {type: "delete", index: i};

	785

	786 if (diff > 1)

	787 edit.endIndex = s.length - j;

	788 }

	789 else

	790 {

	791 edit = {type: "insert", index: i};

	792

	793 if (diff < -1)

	794 edit.endIndex = s.length - j;

	795 }

	796

	797 return edit;

	798 }

	799

	800 function eliminateRedundantRulesByURLFilter(rulesInfo, exhaustive)

	801 {

	802 const heuristicRange = 1000;

	803

	804 let ol = rulesInfo.length;

	805

	806 // Throw out obviously redundant rules.

	807 return async(rulesInfo, (ruleInfo, index) => () =>

	808 {

	809 // If this rule is already marked as redundant, don't bother comparing it

	810 // with other rules.

	811 if (rulesInfo[index].redundant)

	812 return;

	813

	814 let limit = exhaustive ? rulesInfo.length :

	815 Math.min(index + heuristicRange, rulesInfo.length);

	816

	817 for (let i = index, j = i + 1; j < limit; j++)

	818 {

	819 if (rulesInfo[j].redundant)

	820 continue;

	821

	822 let source = rulesInfo[i].rule.trigger["url-filter"];

	823 let target = rulesInfo[j].rule.trigger["url-filter"];

	824

	825 if (source.length >= target.length)

	826 {

	827 // If one URL filter is a substring of the other starting at the

	828 // beginning, the other one is clearly redundant.

	829 if (source.substring(0, target.length) == target)

	830 {

	831 rulesInfo[i].redundant = true;

	832 break;

	833 }

486 }	834 }

487 else if (edit)	835 else if (target.substring(0, source.length) == source)

488 {	836 {

489 // Since we want one and only one edit operation, we must bail here.	837 rulesInfo[j].redundant = true;

490 return null;

491 }	838 }

492 else if ((s[i] == "." \|\| s[i] == "+" \|\| s[i] == "$" \|\| s[i] == "?" \|\|	839 }

493 s[i] == "{" \|\| s[i] == "}" \|\| s[i] == "(" \|\| s[i] == ")" \|\|	840 })

494 s[i] == "[" \|\| s[i] == "]" \|\| s[i] == "\\") \|\|	841 .then(() => rulesInfo.filter(ruleInfo => !ruleInfo.redundant));

495 (t[j] == "." \|\| t[j] == "+" \|\| t[j] == "$" \|\| t[j] == "?" \|\|	842 }

496 t[j] == "{" \|\| t[j] == "}" \|\| t[j] == "(" \|\| t[j] == ")" \|\|	843

497 t[j] == "[" \|\| t[j] == "]" \|\| t[j] == "\\"))	844 function findMatchesForRuleByURLFilter(rulesInfo, index, exhaustive)

498 {	845 {

499 // We don't deal with special characters for now.	846 // Closely matching rules are likely to be within a certain range. We only

500 return null;	847 // look for matches within this range by default. If we increase this value,

501 }	848 // it can give us more matches and a smaller resulting rule set, but possibly

502 else if (diff == 0)	849 // at a significant performance cost.

503 {	850 //

504 // If both strings are equal in length, this is a substitution.	851 // If the exhaustive option is true, we simply ignore this value and look for

505 edit = {type: "substitute", index: i};	852 // matches throughout the rule set.

506 j++;	853 const heuristicRange = 1000;

507 }	854

508 else if (diff > 0)	855 let limit = exhaustive ? rulesInfo.length :

509 {	856 Math.min(index + heuristicRange, rulesInfo.length);

510 // If the source string is longer, this is a deletion.	857

511 edit = {type: "delete", index: i};	858 for (let i = index, j = i + 1; j < limit; j++)

	859 {

	860 let source = rulesInfo[i].rule.trigger["url-filter"];

	861 let target = rulesInfo[j].rule.trigger["url-filter"];

	862

	863 let edit = closeMatch(source, target);

	864

	865 if (edit)

	866 {

	867 let urlFilter, ruleInfo, match = {edit};

	868

	869 if (edit.type == "insert")

	870 {

	871 // Convert the insertion into a deletion and stick it on the target

	872 // rule instead. We can only group deletions and substitutions;

	873 // therefore insertions must be treated as deletions on the target

	874 // rule.

	875 urlFilter = target;

	876 ruleInfo = rulesInfo[j];

	877 match.index = i;

	878 edit.type = "delete";

512 }	879 }

513 else	880 else

514 {	881 {

515 edit = {type: "insert", index: i};	882 urlFilter = source;

	883 ruleInfo = rulesInfo[i];

	884 match.index = j;

516 }	885 }

517 }	886

518 }	887 // If the edit has an end index, it represents a multiple character

519 else if (!singleCharacterOnly)	888 // edit.

520 {	889 let multiEdit = !!edit.endIndex;

521 // Try another algorithm to find a multiple character deletion or	890

522 // insertion.	891 if (multiEdit)

523	892 {

524 let i = 0, j = 0;	893 // We only care about a single multiple character edit because the

525	894 // number of characters for such a match doesn't matter, we can

526 for (; i < s.length; i++)	895 // only merge with one other rule.

527 {	896 if (!ruleInfo.multiEditMatch)

528 if (s[i] != t[i])	897 ruleInfo.multiEditMatch = match;

529 break;

530 }

531

532 for (; j < t.length; j++)

533 {

534 if (t.length - j == i \|\|

535 s[s.length - j - 1] != t[t.length - j - 1])

536 break;

537 }

538

539 if (i != t.length - j)

540 return null;

541

542 for (let k = i; k < s.length - j; k++)

543 {

544 // If there are any special characters in the delta, bail.

545 if (s[k] == "." \|\| s[k] == "+" \|\| s[k] == "$" \|\| s[k] == "?" \|\|

546 s[k] == "{" \|\| s[k] == "}" \|\| s[k] == "(" \|\| s[k] == ")" \|\|

547 s[k] == "[" \|\| s[k] == "]" \|\| s[k] == "\\")

548 return null;

549 }

550

551 if (diff > 0)

552 {

553 edit = {type: "delete", index: i, endIndex: s.length - j};

554 }

555 else

556 {

557 edit = {type: "insert", index: i, endIndex: s.length - j};

558 }

559 }

560

561 return edit;

562 }

563

564 function mergeCloselyMatchingRules(rules, options)

565 {

566 const defaultOptions = {advanced: false, exhaustive: false};

567

568 options = Object.assign({}, defaultOptions, options);

569

570 // Closely matching rules are likely to be within a certain range. We only

571 // look for matches within this range. If we increase this value, it can give

572 // us more matches and a smaller resulting rule set, but possibly at a

573 // significant performance cost.

574 const heuristicRange = 100;

575

576 let rulesInfo = new Array(rules.length);

577

578 rules.forEach((rule, index) =>

579 {

580 rulesInfo[index] = {rule};

581

582 if (rule.action.type == "ignore-previous-rules")

583 {

584 rulesInfo[index].skip = true;

585 }

586 else

587 {

588 // Save a hash of the rule but without the URL filter. We use this for

589 // comparison later.

590 let copy = {

591 trigger: Object.assign({}, rule.trigger),

592 action: Object.assign({}, rule.action)

593 };

594

595 delete copy.trigger["url-filter"];

596

597 let stringified = JSON.stringify(copy);

598

599 if (options.exhaustive)

600 {

601 // The Pearson hash function expects all characters to be within the

602 // 8-bit range.

603 stringified = encodeURIComponent(stringified);

604

605 rulesInfo[index].ruleHash = pearsonHash(stringified);

606 }	898 }

607 else	899 else

608 {	900 {

609 rulesInfo[index].ruleHash = stringified;	901 // For single character edits, multiple rules can be merged into

	902 // one. e.g. "ad", "ads", and "adv" can be merged into "ad[sv]?".

	903 if (!ruleInfo.matches)

	904 ruleInfo.matches = new Array(urlFilter.length);

	905

	906 // Matches at a particular index. For example, for a source string

	907 // "ads", both target strings "ad" (deletion) and "adv"

	908 // (substitution) match at index 2, hence they are grouped together

	909 // to possibly be merged later into "ad[sv]?".

	910 let matchesForIndex = ruleInfo.matches[edit.index];

	911

	912 if (matchesForIndex)

	913 {

	914 matchesForIndex.push(match);

	915 }

	916 else

	917 {

	918 matchesForIndex = [match];

	919 ruleInfo.matches[edit.index] = matchesForIndex;

	920 }

	921

	922 // Keep track of the best set of matches. We later sort by this to

	923 // get best results.

	924 if (!ruleInfo.bestMatches \|\|

	925 matchesForIndex.length > ruleInfo.bestMatches.length)

	926 ruleInfo.bestMatches = matchesForIndex;

610 }	927 }

611 }	928 }

612 });	929 }

613	930 }

614 for (let i = 0; i < rules.length; i++)	931

615 {	932 function mergeCandidateRulesByURLFilter(rulesInfo)

616 if (rulesInfo[i].skip)	933 {

617 continue;

618

619 let limit = options.exhaustive ? rules.length :

620 Math.min(i + heuristicRange, rules.length);

621

622 for (let j = i + 1; j < limit; j++)

623 {

624 if (rulesInfo[j].skip)

625 continue;

626

627 // Check if the rules are identical except for the URL filter.

628 if (rulesInfo[i].ruleHash == rulesInfo[j].ruleHash)

629 {

630 let source = rules[i].trigger["url-filter"];

631 let target = rules[j].trigger["url-filter"];

632

633 let edit = closeMatch(source, target, !options.advanced);

634

635 if (edit)

636 {

637 let urlFilter, ruleInfo, match = {edit};

638

639 if (edit.type == "insert")

640 {

641 // Convert the insertion into a deletion and stick it on the target

642 // rule instead. We can only group deletions and substitutions;

643 // therefore insertions must be treated as deletions on the target

644 // rule.

645 urlFilter = target;

646 ruleInfo = rulesInfo[j];

647 match.index = i;

648 edit.type = "delete";

649 }

650 else

651 {

652 urlFilter = source;

653 ruleInfo = rulesInfo[i];

654 match.index = j;

655 }

656

657 // If the edit has an end index, it represents a multiple character

658 // edit.

659 let multiEdit = !!edit.endIndex;

660

661 if (multiEdit)

662 {

663 // We only care about a single multiple character edit because the

664 // number of characters for such a match doesn't matter, we can

665 // only merge with one other rule.

666 if (!ruleInfo.multiEditMatch)

667 ruleInfo.multiEditMatch = match;

668 }

669 else

670 {

671 // For single character edits, multiple rules can be merged into

672 // one. e.g. "ad", "ads", and "adv" can be merged into "ad[sv]?".

673 if (!ruleInfo.matches)

674 ruleInfo.matches = new Array(urlFilter.length + 1);

675

676 // Matches at a particular index. For example, for a source string

677 // "ads", both target strings "ad" (deletion) and "adv"

678 // (substitution) match at index 2, hence they are grouped together

679 // to possibly be merged later into "ad[sv]?".

680 let matchesForIndex = ruleInfo.matches[edit.index];

681

682 if (matchesForIndex)

683 {

684 matchesForIndex.push(match);

685 }

686 else

687 {

688 matchesForIndex = [match];

689 ruleInfo.matches[edit.index] = matchesForIndex;

690 }

691

692 // Keep track of the best set of matches. We later sort by this to

693 // get best results.

694 if (!ruleInfo.bestMatches \|\|

695 matchesForIndex.length > ruleInfo.bestMatches.length)

696 ruleInfo.bestMatches = matchesForIndex;

697 }

698 }

699 }

700 }

701 }

702

703 // Filter out rules that have no matches at all.	934 // Filter out rules that have no matches at all.

704 let candidateRulesInfo = rulesInfo.filter(ruleInfo =>	935 let candidateRulesInfo = rulesInfo.filter(ruleInfo =>

705 {	936 {

706 return ruleInfo.bestMatches \|\| ruleInfo.multiEditMatch	937 return ruleInfo.bestMatches \|\| ruleInfo.multiEditMatch

707 });	938 });

708	939

709 // For best results, we have to sort the candidates by the largest set of	940 // For best results, we have to sort the candidates by the largest set of

710 // matches.	941 // matches.

711 //	942 //

712 // For example, we want "ads", "bds", "adv", "bdv", "adx", and "bdx" to	943 // For example, we want "ads", "bds", "adv", "bdv", "adx", and "bdx" to

(...skipping 48 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
761 if (best.length > 0)	992 if (best.length > 0)

762 {	993 {

763 let urlFilter = rule.trigger["url-filter"];	994 let urlFilter = rule.trigger["url-filter"];

764	995

765 let editIndex = best[0].edit.index;	996 let editIndex = best[0].edit.index;

766	997

767 if (!multiEdit)	998 if (!multiEdit)

768 {	999 {

769 // Merge all the matching rules into this one.	1000 // Merge all the matching rules into this one.

770	1001

771 let characters = [];	1002 let characters = [urlFilter[editIndex]];

772 let quantifier = "";	1003 let quantifier = "";

773	1004

774 for (let match of best)	1005 for (let match of best)

775 {	1006 {

776 if (match.edit.type == "delete")	1007 if (match.edit.type == "delete")

777 {	1008 {

778 quantifier = "?";	1009 quantifier = "?";

779 }	1010 }

780 else	1011 else

781 {	1012 {

782 let character = rules[match.index].trigger["url-filter"][editIndex];	1013 let character = rulesInfo[match.index].rule

783 characters.push(character);	1014 .trigger["url-filter"][editIndex];

	1015

	1016 // Insert any hyphen at the beginning so it gets interpreted as a

	1017 // literal hyphen.

	1018 if (character == "-")

	1019 characters.unshift(character);

	1020 else

	1021 characters.push(character);

784 }	1022 }

785	1023

786 // Mark the target rule as merged so other rules don't try to merge	1024 // Mark the target rule as merged so other rules don't try to merge

787 // it again.	1025 // it again.

788 rulesInfo[match.index].merged = true;	1026 rulesInfo[match.index].merged = true;

789 }	1027 }

790	1028

791 urlFilter = urlFilter.substring(0, editIndex + 1) + quantifier +	1029 urlFilter = urlFilter.substring(0, editIndex + 1) + quantifier +

792 urlFilter.substring(editIndex + 1);	1030 urlFilter.substring(editIndex + 1);

793 if (characters.length > 0)	1031 if (characters.length > 1)

794 {	1032 {

795 urlFilter = urlFilter.substring(0, editIndex) + "[" +	1033 urlFilter = urlFilter.substring(0, editIndex) + "[" +

796 urlFilter[editIndex] + characters.join("") + "]" +	1034 characters.join("") + "]" +

797 urlFilter.substring(editIndex + 1);	1035 urlFilter.substring(editIndex + 1);

798 }	1036 }

799 }	1037 }

800 else	1038 else

801 {	1039 {

802 let editEndIndex = best[0].edit.endIndex;	1040 let editEndIndex = best[0].edit.endIndex;

803	1041

804 // Mark the target rule as merged so other rules don't try to merge it	1042 // Mark the target rule as merged so other rules don't try to merge it

805 // again.	1043 // again.

806 rulesInfo[best[0].index].merged = true;	1044 rulesInfo[best[0].index].merged = true;

807	1045

808 urlFilter = urlFilter.substring(0, editIndex) + "(" +	1046 urlFilter = urlFilter.substring(0, editIndex) + "(" +

809 urlFilter.substring(editIndex, editEndIndex) + ")?" +	1047 urlFilter.substring(editIndex, editEndIndex) + ")?" +

810 urlFilter.substring(editEndIndex);	1048 urlFilter.substring(editEndIndex);

811 }	1049 }

812	1050

813 rule.trigger["url-filter"] = urlFilter;	1051 rule.trigger["url-filter"] = urlFilter;

814	1052

815 // Mark this rule as one that has had other rules merged into it.	1053 // Mark this rule as one that has had other rules merged into it.

816 ruleInfo.mergedInto = true;	1054 ruleInfo.mergedInto = true;

817 }	1055 }

818 }	1056 }

819	1057 }

820 // Filter out rules that have been merged into other rules.	1058

821 return rulesInfo.filter(ruleInfo => !ruleInfo.merged)	1059 function mergeRulesByURLFilter(rulesInfo, exhaustive)

822 .map(ruleInfo => ruleInfo.rule);	1060 {

	1061 return async(rulesInfo, (ruleInfo, index) => () =>

	1062 findMatchesForRuleByURLFilter(rulesInfo, index, exhaustive)

	1063 )

	1064 .then(() => mergeCandidateRulesByURLFilter(rulesInfo));

	1065 }

	1066

	1067 function mergeRulesByArrayProperty(rulesInfo, propertyType, property)

	1068 {

	1069 if (rulesInfo.length <= 1)

	1070 return;

	1071

	1072 let valueSet = new Set(rulesInfo[0].rule[propertyType][property]);

	1073

	1074 for (let i = 1; i < rulesInfo.length; i++)

	1075 {

	1076 for (let value of rulesInfo[i].rule[propertyType][property] \|\| [])

	1077 valueSet.add(value);

	1078

	1079 rulesInfo[i].merged = true;

	1080 }

	1081

	1082 if (valueSet.size > 0)

	1083 rulesInfo[0].rule[propertyType][property] = Array.from(valueSet);

	1084

	1085 rulesInfo[0].mergedInto = true;

	1086 }

	1087

	1088 function groupRulesByMergeableProperty(rulesInfo, propertyType, property)

	1089 {

	1090 let mergeableRulesInfoByGroup = new Map();

	1091

	1092 for (let ruleInfo of rulesInfo)

	1093 {

	1094 let copy = {

	1095 trigger: Object.assign({}, ruleInfo.rule.trigger),

	1096 action: Object.assign({}, ruleInfo.rule.action)

	1097 };

	1098

	1099 delete copy[propertyType][property];

	1100

	1101 let groupKey = JSON.stringify(copy);

	1102

	1103 let mergeableRulesInfo = mergeableRulesInfoByGroup.get(groupKey);

	1104

	1105 if (mergeableRulesInfo)

	1106 mergeableRulesInfo.push(ruleInfo);

	1107 else

	1108 mergeableRulesInfoByGroup.set(groupKey, [ruleInfo]);

	1109 }

	1110

	1111 return mergeableRulesInfoByGroup;

	1112 }

	1113

	1114 function mergeRules(rules, exhaustive)

	1115 {

	1116 let rulesInfo = rules.map(rule => ({rule}));

	1117

	1118 let arrayPropertiesToMergeBy = ["resource-type", "if-domain"];

	1119

	1120 return async(() =>

	1121 {

	1122 let map = groupRulesByMergeableProperty(rulesInfo, "trigger", "url-filter");

	1123 return async(map.values(), mergeableRulesInfo => () =>

	1124 eliminateRedundantRulesByURLFilter(mergeableRulesInfo, exhaustive)

	1125 .then(rulesInfo => mergeRulesByURLFilter(rulesInfo, exhaustive))

	1126 )

	1127 .then(() =>

	1128 {

	1129 // Filter out rules that are redundant or have been merged into other

	1130 // rules.

	1131 rulesInfo = rulesInfo.filter(ruleInfo => !ruleInfo.redundant &&

	1132 !ruleInfo.merged);

	1133 });

	1134 })

	1135 .then(() => async(arrayPropertiesToMergeBy, arrayProperty => () =>

	1136 {

	1137 let map = groupRulesByMergeableProperty(rulesInfo, "trigger",

	1138 arrayProperty);

	1139 return async(map.values(), mergeableRulesInfo => () =>

	1140 mergeRulesByArrayProperty(mergeableRulesInfo, "trigger", arrayProperty)

	1141 )

	1142 .then(() =>

	1143 {

	1144 rulesInfo = rulesInfo.filter(ruleInfo => !ruleInfo.merged);

	1145 });

	1146 }))

	1147 .then(() => rulesInfo.map(ruleInfo => ruleInfo.rule));

823 }	1148 }

824	1149

825 let ContentBlockerList =	1150 let ContentBlockerList =

826 /**	1151 /**

827 * Create a new Adblock Plus filter to content blocker list converter	1152 * Create a new Adblock Plus filter to content blocker list converter

828 *	1153 *

	1154 * @param {object} options Options for content blocker list generation

	1155 *

829 * @constructor	1156 * @constructor

830 */	1157 */

831 exports.ContentBlockerList = function ()	1158 exports.ContentBlockerList = function (options)

832 {	1159 {

	1160 const defaultOptions = {

	1161 merge: "auto"

	1162 };

	1163

	1164 this.options = Object.assign({}, defaultOptions, options);

	1165

833 this.requestFilters = [];	1166 this.requestFilters = [];

834 this.requestExceptions = [];	1167 this.requestExceptions = [];

835 this.elemhideFilters = [];	1168 this.elemhideFilters = [];

836 this.elemhideExceptions = [];	1169 this.elemhideExceptions = [];

	1170 this.genericblockExceptions = [];

	1171 this.generichideExceptions = [];

837 this.elemhideSelectorExceptions = new Map();	1172 this.elemhideSelectorExceptions = new Map();

838 };	1173 };

839	1174

840 /**	1175 /**

841 * Add Adblock Plus filter to be converted	1176 * Add Adblock Plus filter to be converted

842 *	1177 *

843 * @param {Filter} filter Filter to convert	1178 * @param {Filter} filter Filter to convert

844 */	1179 */

845 ContentBlockerList.prototype.addFilter = function(filter)	1180 ContentBlockerList.prototype.addFilter = function(filter)

846 {	1181 {

847 if (filter.sitekeys)	1182 if (filter.sitekeys)

848 return;	1183 return;

849 if (filter instanceof filterClasses.RegExpFilter &&	1184 if (filter instanceof filterClasses.RegExpFilter &&

850 filter.regexpSource == null)	1185 filter.regexpSource == null)

851 return;	1186 return;

852	1187

853 if (filter instanceof filterClasses.BlockingFilter)	1188 if (filter instanceof filterClasses.BlockingFilter)

854 this.requestFilters.push(filter);	1189 this.requestFilters.push(filter);

855	1190

856 if (filter instanceof filterClasses.WhitelistFilter)	1191 if (filter instanceof filterClasses.WhitelistFilter)

857 {	1192 {

858 if (filter.contentType & (typeMap.DOCUMENT \| whitelistableRequestTypes))	1193 if (filter.contentType & (typeMap.DOCUMENT \| whitelistableRequestTypes))

859 this.requestExceptions.push(filter);	1194 this.requestExceptions.push(filter);

860	1195

861 if (filter.contentType & typeMap.ELEMHIDE)	1196 if (filter.contentType & typeMap.GENERICBLOCK)

862 this.elemhideExceptions.push(filter);	1197 this.genericblockExceptions.push(filter);

	1198

	1199 if (filter.contentType & typeMap.ELEMHIDE)

	1200 this.elemhideExceptions.push(filter);

	1201 else if (filter.contentType & typeMap.GENERICHIDE)

	1202 this.generichideExceptions.push(filter);

863 }	1203 }

864	1204

865 if (filter instanceof filterClasses.ElemHideFilter)	1205 if (filter instanceof filterClasses.ElemHideFilter)

866 this.elemhideFilters.push(filter);	1206 this.elemhideFilters.push(filter);

867	1207

868 if (filter instanceof filterClasses.ElemHideException)	1208 if (filter instanceof filterClasses.ElemHideException)

869 {	1209 {

870 let domains = this.elemhideSelectorExceptions[filter.selector];	1210 let domains = this.elemhideSelectorExceptions[filter.selector];

871 if (!domains)	1211 if (!domains)

872 domains = this.elemhideSelectorExceptions[filter.selector] = [];	1212 domains = this.elemhideSelectorExceptions[filter.selector] = [];

873	1213

874 parseDomains(filter.domains, domains, []);	1214 parseDomains(filter.domains, domains, []);

875 }	1215 }

876 };	1216 };

877	1217

878 /**	1218 /**

879 * Generate content blocker list for all filters that were added	1219 * Generate content blocker list for all filters that were added

880 *

881 * @returns {Filter} filter Filter to convert

882 */	1220 */

883 ContentBlockerList.prototype.generateRules = function(options)	1221 ContentBlockerList.prototype.generateRules = function()

884 {	1222 {

885 const defaultOptions = {	1223 let cssRules = [];

886 merge: false,	1224 let cssExceptionRules = [];

887 fastMerge: true,	1225 let blockingRules = [];

888 advancedMerge: null,	1226 let blockingExceptionRules = [];

889 exhaustiveMerge: null	1227

890 };	1228 let ruleGroups = [cssRules, cssExceptionRules,

891	1229 blockingRules, blockingExceptionRules];

892 options = Object.assign({}, defaultOptions, options);	1230

893	1231 let genericSelectors = [];

894 let rules = [];

895

896 let groupedElemhideFilters = new Map();	1232 let groupedElemhideFilters = new Map();

	1233

897 for (let filter of this.elemhideFilters)	1234 for (let filter of this.elemhideFilters)

898 {	1235 {

899 let result = convertElemHideFilter(filter, this.elemhideSelectorExceptions);	1236 let result = convertElemHideFilter(filter, this.elemhideSelectorExceptions);

900 if (!result)	1237 if (!result)

901 continue;	1238 continue;

902	1239

903 if (result.matchDomains.length == 0)	1240 if (result.matchDomains.length == 0)

904 result.matchDomains = ["^https?://"];	1241 {

905	1242 genericSelectors.push(result.selector);

906 for (let matchDomain of result.matchDomains)	1243 }

907 {	1244 else

908 let group = groupedElemhideFilters.get(matchDomain) \|\| [];	1245 {

909 group.push(result.selector);	1246 for (let matchDomain of result.matchDomains)

910 groupedElemhideFilters.set(matchDomain, group);	1247 {

911 }	1248 let group = groupedElemhideFilters.get(matchDomain) \|\| [];

912 }	1249 group.push(result.selector);

	1250 groupedElemhideFilters.set(matchDomain, group);

	1251 }

	1252 }

	1253 }

	1254

	1255 // Separate out the element hiding exceptions that have only a hostname part

	1256 // from the rest. This allows us to implement a workaround for issue #5345

	1257 // (WebKit bug #167423), but as a bonus it also reduces the number of

	1258 // generated rules. The downside is that the exception will only apply to the

	1259 // top-level document, not to iframes. We have to live with this until the

	1260 // WebKit bug is fixed in all supported versions of Safari.

	1261 // https://bugs.webkit.org/show_bug.cgi?id=167423

	1262 //

	1263 // Note that as a result of this workaround we end up with a huge rule set in

	1264 // terms of the amount of memory used. This can cause Node.js to throw

	1265 // "JavaScript heap out of memory". To avoid this, call Node.js with

	1266 // --max_old_space_size=4096

	1267 let elemhideExceptionDomains = extractFilterDomains(this.elemhideExceptions);

	1268

	1269 let genericSelectorExceptionDomains =

	1270 extractFilterDomains(this.generichideExceptions);

	1271 elemhideExceptionDomains.forEach(name =>

	1272 {

	1273 genericSelectorExceptionDomains.add(name);

	1274 });

	1275

	1276 addCSSRules(cssRules, genericSelectors, null,

	1277 genericSelectorExceptionDomains);

	1278

	1279 // Filter out whitelisted domains.

	1280 elemhideExceptionDomains.forEach(domain =>

	1281 groupedElemhideFilters.delete(domain));

913	1282

914 groupedElemhideFilters.forEach((selectors, matchDomain) =>	1283 groupedElemhideFilters.forEach((selectors, matchDomain) =>

915 {	1284 {

916 while (selectors.length)	1285 addCSSRules(cssRules, selectors, matchDomain, elemhideExceptionDomains);

917 {	1286 });

918 let selector = selectors.splice(0, selectorLimit).join(", ");	1287

919	1288 let requestFilterExceptionDomains = [];

920 // As of Safari 9.0 element IDs are matched as lowercase. We work around	1289 for (let filter of this.genericblockExceptions)

921 // this by converting to the attribute format [id="elementID"]	1290 {

922 selector = convertIDSelectorsToAttributeSelectors(selector);	1291 let parsed = parseFilterRegexpSource(filter.regexpSource);

923	1292 if (parsed.hostname)

924 rules.push({	1293 requestFilterExceptionDomains.push(parsed.hostname);

925 trigger: {"url-filter": matchDomain,	1294 }

926 "url-filter-is-case-sensitive": true},	1295

927 action: {type: "css-display-none",	1296 for (let filter of this.requestFilters)

928 selector: selector}	1297 {

	1298 convertFilterAddRules(blockingRules, filter, "block", true,

	1299 requestFilterExceptionDomains);

	1300 }

	1301

	1302 for (let filter of this.requestExceptions)

	1303 {

	1304 convertFilterAddRules(blockingExceptionRules, filter,

	1305 "ignore-previous-rules", true);

	1306 }

	1307

	1308 return async(ruleGroups, (group, index) => () =>

	1309 {

	1310 let next = () =>

	1311 {

	1312 if (index == ruleGroups.length - 1)

	1313 return ruleGroups.reduce((all, rules) => all.concat(rules), []);

	1314 };

	1315

	1316 if (this.options.merge == "all" \|\|

	1317 (this.options.merge == "auto" &&

	1318 ruleGroups.reduce((n, group) => n + group.length, 0) > 50000))

	1319 {

	1320 return mergeRules(ruleGroups[index], this.options.merge == "all")

	1321 .then(rules =>

	1322 {

	1323 ruleGroups[index] = rules;

	1324 return next();

929 });	1325 });

930 }	1326 }

	1327

	1328 return next();

931 });	1329 });

932

933 for (let filter of this.elemhideExceptions)

934 convertFilterAddRules(rules, filter, "ignore-previous-rules", false);

935 for (let filter of this.requestFilters)

936 convertFilterAddRules(rules, filter, "block", true);

937 for (let filter of this.requestExceptions)

938 convertFilterAddRules(rules, filter, "ignore-previous-rules", true);

939

940 rules = rules.filter(rule => !hasNonASCI(rule));

941

942 if (options.merge)

943 {

944 // If the more specific options are specified (e.g. "advanced" and

945 // "exhaustive"), they override the more general options (e.g. "fast").

946 let mergeOptions = {

947 advanced: options.advancedMerge \|\|

948 (!options.fastMerge && options.advancedMerge != false),

949 exhaustive: options.exhaustiveMerge \|\|

950 (!options.fastMerge && options.exhaustiveMerge != false)

951 };

952

953 rules = mergeCloselyMatchingRules(rules, mergeOptions);

954 }

955

956 return rules;

957 };	1330 };

LEFT	RIGHT