lib/abp2blocklist.js - Issue 29426594: Issue 3673 - Merge closely matching rules

Delta Between Two Patch Sets: lib/abp2blocklist.js

Issue 29426594: Issue 3673 - Merge closely matching rules (Closed) Base URL: https://hg.adblockplus.org/abp2blocklist

Left Patch Set: Make generateRules asynchronous Created May 23, 2017, 4:22 p.m.

Right Patch Set: Rebase Created July 28, 2017, 1:31 p.m.

Left:
Right:

Use n/p to move between diff chunks; N/P to move between comments.

Jump to:

Left: Side by side diff | Download
Right: Side by side diff | Download

LEFT	RIGHT
1 /*	1 /*

2 * This file is part of Adblock Plus <https://adblockplus.org/>,	2 * This file is part of Adblock Plus <https://adblockplus.org/>,

3 * Copyright (C) 2006-2017 eyeo GmbH	3 * Copyright (C) 2006-2017 eyeo GmbH

4 *	4 *

5 * Adblock Plus is free software: you can redistribute it and/or modify	5 * Adblock Plus is free software: you can redistribute it and/or modify

6 * it under the terms of the GNU General Public License version 3 as	6 * it under the terms of the GNU General Public License version 3 as

7 * published by the Free Software Foundation.	7 * published by the Free Software Foundation.

8 *	8 *

9 * Adblock Plus is distributed in the hope that it will be useful,	9 * Adblock Plus is distributed in the hope that it will be useful,

10 * but WITHOUT ANY WARRANTY; without even the implied warranty of	10 * but WITHOUT ANY WARRANTY; without even the implied warranty of

11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the	11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

12 * GNU General Public License for more details.	12 * GNU General Public License for more details.

13 *	13 *

14 * You should have received a copy of the GNU General Public License	14 * You should have received a copy of the GNU General Public License

15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.	15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.

16 */	16 */

17	17

18 /** @module abp2blocklist */	18 /** @module abp2blocklist */

19	19

20 "use strict";	20 "use strict";

21	21

22 let filterClasses = require("filterClasses");	22 let filterClasses = require("filterClasses");

23 let tldjs = require("tldjs");

24 let punycode = require("punycode");	23 let punycode = require("punycode");

25	24

26 const selectorLimit = 5000;	25 const selectorLimit = 5000;

27 const typeMap = filterClasses.RegExpFilter.typeMap;	26 const typeMap = filterClasses.RegExpFilter.typeMap;

28 const whitelistableRequestTypes = (typeMap.IMAGE	27

29 \| typeMap.STYLESHEET	28 const httpRequestTypes = typeMap.IMAGE \|

30 \| typeMap.SCRIPT	29 typeMap.STYLESHEET \|

31 \| typeMap.FONT	30 typeMap.SCRIPT \|

32 \| typeMap.MEDIA	31 typeMap.FONT \|

33 \| typeMap.POPUP	32 typeMap.MEDIA \|

34 \| typeMap.OBJECT	33 typeMap.POPUP \|

35 \| typeMap.OBJECT_SUBREQUEST	34 typeMap.OBJECT \|

36 \| typeMap.XMLHTTPREQUEST	35 typeMap.OBJECT_SUBREQUEST \|

37 \| typeMap.PING	36 typeMap.XMLHTTPREQUEST \|

38 \| typeMap.SUBDOCUMENT	37 typeMap.PING \|

39 \| typeMap.OTHER);	38 typeMap.SUBDOCUMENT \|

	39 typeMap.OTHER;

	40 const rawRequestTypes = typeMap.XMLHTTPREQUEST \|

	41 typeMap.WEBSOCKET \|

	42 typeMap.WEBRTC \|

	43 typeMap.OBJECT_SUBREQUEST \|

	44 typeMap.PING \|

	45 typeMap.OTHER;

	46 const whitelistableRequestTypes = httpRequestTypes \|

	47 typeMap.WEBSOCKET \|

	48 typeMap.WEBRTC;

40	49

41 function callLater(func)	50 function callLater(func)

42 {	51 {

43 return new Promise(resolve =>	52 return new Promise(resolve =>

44 {	53 {

45 let call = () => resolve(func());	54 let call = () => resolve(func());

46	55

47 // If this looks like Node.js, call process.nextTick, otherwise call	56 // If this looks like Node.js, call process.nextTick, otherwise call

48 // setTimeout.	57 // setTimeout.

49 if (typeof process != "undefined")	58 if (typeof process != "undefined")

50 process.nextTick(call);	59 process.nextTick(call);

51 else	60 else

52 setTimeout(call, 0);	61 setTimeout(call, 0);

53 });	62 });

54 }	63 }

55	64

56 function async(funcs)	65 function async(callees, mapFunction)

57 {	66 {

58 if (!Array.isArray(funcs))	67 if (!(Symbol.iterator in callees))

59 funcs = Array.from(arguments);	68 callees = [callees];

60	69

61 let lastPause = Date.now();	70 let lastPause = Date.now();

62	71 let index = 0;

63 return funcs.reduce((promise, next) => promise.then(() =>	72

64 {	73 let promise = Promise.resolve();

65 // If it has been 100ms or longer since the last call, take a pause. This	74

66 // keeps the browser from freezing up.	75 for (let next of callees)

67 let now = Date.now();	76 {

68 if (now - lastPause >= 100)	77 let currentIndex = index;

69 {	78

70 lastPause = now;	79 promise = promise.then(() =>

71 return callLater(next);	80 {

72 }	81 if (mapFunction)

73	82 next = mapFunction(next, currentIndex);

74 return next();	83

75 }),	84 // If it has been 100ms or longer since the last call, take a pause. This

76 Promise.resolve());	85 // keeps the browser from freezing up.

	86 let now = Date.now();

	87 if (now - lastPause >= 100)

	88 {

	89 lastPause = now;

	90 return callLater(next);

	91 }

	92

	93 return next();

	94 });

	95

	96 index++;

	97 }

	98

	99 return promise;

77 }	100 }

78	101

79 function parseDomains(domains, included, excluded)	102 function parseDomains(domains, included, excluded)

80 {	103 {

81 for (let domain in domains)	104 for (let domain in domains)

82 {	105 {

83 if (domain != "")	106 if (domain != "")

84 {	107 {

85 let enabled = domains[domain];	108 let enabled = domains[domain];

86 domain = punycode.toASCII(domain.toLowerCase());	109 domain = punycode.toASCII(domain.toLowerCase());

87	110

88 if (!enabled)	111 if (!enabled)

89 excluded.push(domain);	112 excluded.push(domain);

90 else if (!domains[""])	113 else if (!domains[""])

91 included.push(domain);	114 included.push(domain);

92 }	115 }

93 }	116 }

94 }	117 }

95	118

96 function escapeRegExp(s)	119 function escapeRegExp(s)

97 {	120 {

98 return s.replace(/[.*+?^${}()\|[\]\\]/g, "\\$&");	121 return s.replace(/[.*+?^${}()\|[\]\\]/g, "\\$&");

99 }	122 }

100	123

101 function matchDomain(domain)	124 function matchDomain(domain)

102 {	125 {

	126 if (!domain)

	127 return "^https?://";

	128

103 return "^https?://([^/:]*\\.)?" + escapeRegExp(domain).toLowerCase() + "[/:]";	129 return "^https?://([^/:]*\\.)?" + escapeRegExp(domain).toLowerCase() + "[/:]";

	130 }

	131

	132 function getURLSchemes(contentType)

	133 {

	134 // If the given content type includes all supported URL schemes, simply

	135 // return a single generic URL scheme pattern. This minimizes the size of the

	136 // generated rule set. The downside to this is that it will also match

	137 // schemes that we do not want to match (e.g. "ftp://"), but this can be

	138 // mitigated by adding exceptions for those schemes.

	139 if (contentType & typeMap.WEBSOCKET && contentType & typeMap.WEBRTC &&

	140 contentType & httpRequestTypes)

	141 return ["[^:]+:(//)?"];

	142

	143 let urlSchemes = [];

	144

	145 if (contentType & typeMap.WEBSOCKET)

	146 urlSchemes.push("wss?://");

	147

	148 if (contentType & typeMap.WEBRTC)

	149 urlSchemes.push("stuns?:", "turns?:");

	150

	151 if (contentType & httpRequestTypes)

	152 urlSchemes.push("https?://");

	153

	154 return urlSchemes;

	155 }

	156

	157 function findSubdomainsInList(domain, list)

	158 {

	159 let subdomains = [];

	160 let suffixLength = domain.length + 1;

	161

	162 for (let name of list)

	163 {

	164 if (name.length > suffixLength && name.slice(-suffixLength) == "." + domain)

	165 subdomains.push(name.slice(0, -suffixLength));

	166 }

	167

	168 return subdomains;

	169 }

	170

	171 function extractFilterDomains(filters)

	172 {

	173 let domains = new Set();

	174 for (let filter of filters)

	175 {

	176 let parsed = parseFilterRegexpSource(filter.regexpSource);

	177 if (parsed.justHostname)

	178 domains.add(parsed.hostname);

	179 }

	180 return domains;

104 }	181 }

105	182

106 function convertElemHideFilter(filter, elemhideSelectorExceptions)	183 function convertElemHideFilter(filter, elemhideSelectorExceptions)

107 {	184 {

108 let included = [];	185 let included = [];

109 let excluded = [];	186 let excluded = [];

110 let rules = [];

111	187

112 parseDomains(filter.domains, included, excluded);	188 parseDomains(filter.domains, included, excluded);

113	189

114 if (excluded.length == 0 && !(filter.selector in elemhideSelectorExceptions))	190 if (excluded.length == 0 && !(filter.selector in elemhideSelectorExceptions))

115 return {matchDomains: included.map(matchDomain), selector: filter.selector};	191 return {matchDomains: included, selector: filter.selector};

116 }	192 }

117	193

118 /**	194 /**

119 * Parse the given filter "regexpSource" string. Producing a regular expression,	195 * Parse the given filter "regexpSource" string. Producing a regular expression,

120 * extracting the hostname (if any), deciding if the regular expression is safe	196 * extracting the hostname (if any), deciding if the regular expression is safe

121 * to be converted + matched as lower case and noting if the source contains	197 * to be converted + matched as lower case and noting if the source contains

122 * anything after the hostname.)	198 * anything after the hostname.)

123 *	199 *

124 * @param {string} text regexpSource property of a filter	200 * @param {string} text regexpSource property of a filter

	201 * @param {string} urlScheme The URL scheme to use in the regular expression

125 * @returns {object} An object containing a regular expression string, a bool	202 * @returns {object} An object containing a regular expression string, a bool

126 * indicating if the filter can be safely matched as lower	203 * indicating if the filter can be safely matched as lower

127 * case, a hostname string (or undefined) and a bool	204 * case, a hostname string (or undefined) and a bool

128 * indicating if the source only contains a hostname or not:	205 * indicating if the source only contains a hostname or not:

129 * {regexp: "...",	206 * {regexp: "...",

130 * canSafelyMatchAsLowercase: true/false,	207 * canSafelyMatchAsLowercase: true/false,

131 * hostname: "...",	208 * hostname: "...",

132 * justHostname: true/false}	209 * justHostname: true/false}

133 */	210 */

134 function parseFilterRegexpSource(text)	211 function parseFilterRegexpSource(text, urlScheme)

135 {	212 {

136 let regexp = [];	213 let regexp = [];

137 let lastIndex = text.length - 1;	214

	215 // Convert the text into an array of Unicode characters.

	216 //

	217 // In the case of surrogate pairs (the smiley emoji, for example), one

	218 // Unicode code point is represented by two JavaScript characters together.

	219 // We want to iterate over Unicode code points rather than JavaScript

	220 // characters.

	221 let characters = Array.from(text);

	222

	223 let lastIndex = characters.length - 1;

138 let hostname;	224 let hostname;

139 let hostnameStart = null;	225 let hostnameStart = null;

140 let hostnameFinished = false;	226 let hostnameFinished = false;

141 let justHostname = false;	227 let justHostname = false;

142 let canSafelyMatchAsLowercase = false;	228 let canSafelyMatchAsLowercase = false;

143	229

144 for (let i = 0; i < text.length; i++)	230 if (!urlScheme)

145 {	231 urlScheme = getURLSchemes()[0];

146 let c = text[i];	232

	233 for (let i = 0; i < characters.length; i++)

	234 {

	235 let c = characters[i];

147	236

148 if (hostnameFinished)	237 if (hostnameFinished)

149 justHostname = false;	238 justHostname = false;

150	239

151 // If we're currently inside the hostname we have to be careful not to	240 // If we're currently inside the hostname we have to be careful not to

152 // escape any characters until after we have converted it to punycode.	241 // escape any characters until after we have converted it to punycode.

153 if (hostnameStart != null && !hostnameFinished)	242 if (hostnameStart != null && !hostnameFinished)

154 {	243 {

155 let endingChar = (c == "*" \|\| c == "^" \|\|	244 let endingChar = (c == "*" \|\| c == "^" \|\|

156 c == "?" \|\| c == "/" \|\| c == "\|");	245 c == "?" \|\| c == "/" \|\| c == "\|");

157 if (!endingChar && i != lastIndex)	246 if (!endingChar && i != lastIndex)

158 continue;	247 continue;

159	248

160 hostname = punycode.toASCII(	249 hostname = punycode.toASCII(

161 text.substring(hostnameStart, endingChar ? i : i + 1)	250 characters.slice(hostnameStart, endingChar ? i : i + 1).join("")

	251 .toLowerCase()

162 );	252 );

163 hostnameFinished = justHostname = true;	253 hostnameFinished = justHostname = true;

164 regexp.push(escapeRegExp(hostname));	254 regexp.push(escapeRegExp(hostname));

165 if (!endingChar)	255 if (!endingChar)

166 break;	256 break;

167 }	257 }

168	258

169 switch (c)	259 switch (c)

170 {	260 {

171 case "*":	261 case "*":

172 if (regexp.length > 0 && i < lastIndex && text[i + 1] != "*")	262 if (regexp.length > 0 && i < lastIndex && characters[i + 1] != "*")

173 regexp.push(".*");	263 regexp.push(".*");

174 break;	264 break;

175 case "^":	265 case "^":

176 if (i < lastIndex)	266 let alphabet = "a-z";

177 regexp.push(".");	267 // If justHostname is true and we've encountered a "^", it means we're

	268 // still in the hostname part of the URL. Since hostnames are always

	269 // lower case (Punycode), there's no need to include "A-Z" in the

	270 // pattern. Further, subsequent code may lower-case the entire regular

	271 // expression (if the URL contains only the hostname part), leaving us

	272 // with "a-za-z", which would be redundant.

	273 if (!justHostname)

	274 alphabet = "A-Z" + alphabet;

	275 let digits = "0-9";

	276 // Note that the "-" must appear first here in order to retain its

	277 // literal meaning within the brackets.

	278 let specialCharacters = "-_.%";

	279 let separator = "[^" + specialCharacters + alphabet + digits + "]";

	280 if (i == 0)

	281 regexp.push("^" + urlScheme + "(.*" + separator + ")?");

	282 else if (i == lastIndex)

	283 regexp.push("(" + separator + ".*)?$");

	284 else

	285 regexp.push(separator);

178 break;	286 break;

179 case "\|":	287 case "\|":

180 if (i == 0)	288 if (i == 0)

181 {	289 {

182 regexp.push("^");	290 regexp.push("^");

183 break;	291 break;

184 }	292 }

185 if (i == lastIndex)	293 if (i == lastIndex)

186 {	294 {

187 regexp.push("$");	295 regexp.push("$");

188 break;	296 break;

189 }	297 }

190 if (i == 1 && text[0] == "\|")	298 if (i == 1 && characters[0] == "\|")

191 {	299 {

192 hostnameStart = i + 1;	300 hostnameStart = i + 1;

193 canSafelyMatchAsLowercase = true;	301 canSafelyMatchAsLowercase = true;

194 regexp.push("https?://([^/]+\\.)?");	302 regexp.push(urlScheme + "([^/]+\\.)?");

195 break;	303 break;

196 }	304 }

197 regexp.push("\\\|");	305 regexp.push("\\\|");

198 break;	306 break;

199 case "/":	307 case "/":

200 if (!hostnameFinished &&	308 if (!hostnameFinished &&

201 text.charAt(i-2) == ":" && text.charAt(i-1) == "/")	309 characters[i - 2] == ":" && characters[i - 1] == "/")

202 {	310 {

203 hostnameStart = i + 1;	311 hostnameStart = i + 1;

204 canSafelyMatchAsLowercase = true;	312 canSafelyMatchAsLowercase = true;

205 }	313 }

206 regexp.push("/");	314 regexp.push("/");

207 break;	315 break;

208 case ".": case "+": case "$": case "?":	316 case ".": case "+": case "$": case "?":

209 case "{": case "}": case "(": case ")":	317 case "{": case "}": case "(": case ")":

210 case "[": case "]": case "\\":	318 case "[": case "]": case "\\":

211 regexp.push("\\", c);	319 regexp.push("\\", c);

212 break;	320 break;

213 default:	321 default:

214 if (hostnameFinished && (c >= "a" && c <= "z" \|\|	322 if (hostnameFinished && (c >= "a" && c <= "z" \|\|

215 c >= "A" && c <= "Z"))	323 c >= "A" && c <= "Z"))

216 canSafelyMatchAsLowercase = false;	324 canSafelyMatchAsLowercase = false;

217 regexp.push(c);	325 regexp.push(c == "%" ? c : encodeURI(c));

218 }	326 }

219 }	327 }

220	328

221 return {	329 return {

222 regexp: regexp.join(""),	330 regexp: regexp.join(""),

223 canSafelyMatchAsLowercase: canSafelyMatchAsLowercase,	331 canSafelyMatchAsLowercase: canSafelyMatchAsLowercase,

224 hostname: hostname,	332 hostname: hostname,

225 justHostname: justHostname	333 justHostname: justHostname

226 };	334 };

227 }	335 }

228	336

229 function getResourceTypes(filter)	337 function getResourceTypes(contentType)

230 {	338 {

231 let types = [];	339 let types = [];

232	340

233 if (filter.contentType & typeMap.IMAGE)	341 if (contentType & typeMap.IMAGE)

234 types.push("image");	342 types.push("image");

235 if (filter.contentType & typeMap.STYLESHEET)	343 if (contentType & typeMap.STYLESHEET)

236 types.push("style-sheet");	344 types.push("style-sheet");

237 if (filter.contentType & typeMap.SCRIPT)	345 if (contentType & typeMap.SCRIPT)

238 types.push("script");	346 types.push("script");

239 if (filter.contentType & typeMap.FONT)	347 if (contentType & typeMap.FONT)

240 types.push("font");	348 types.push("font");

241 if (filter.contentType & (typeMap.MEDIA \| typeMap.OBJECT))	349 if (contentType & (typeMap.MEDIA \| typeMap.OBJECT))

242 types.push("media");	350 types.push("media");

243 if (filter.contentType & typeMap.POPUP)	351 if (contentType & typeMap.POPUP)

244 types.push("popup");	352 types.push("popup");

245 if (filter.contentType & (typeMap.XMLHTTPREQUEST \|	353 if (contentType & rawRequestTypes)

246 typeMap.OBJECT_SUBREQUEST \|

247 typeMap.PING \|

248 typeMap.OTHER))

249 types.push("raw");	354 types.push("raw");

250 if (filter.contentType & typeMap.SUBDOCUMENT)	355 if (contentType & typeMap.SUBDOCUMENT)

251 types.push("document");	356 types.push("document");

252	357

253 return types;	358 return types;

254 }	359 }

255	360

256 function addDomainPrefix(domains)	361 function makeRuleCopies(trigger, action, urlSchemes)

257 {	362 {

258 let result = [];	363 let copies = [];

259	364

260 for (let domain of domains)	365 // Always make a deep copy of the rule, since rules may have to be

261 {	366 // manipulated individually at a later stage.

262 result.push(domain);	367 let stringifiedTrigger = JSON.stringify(trigger);

263	368

264 if (tldjs.getDomain(domain) == domain)	369 let filterPattern = trigger["url-filter"].substring(1);

265 result.push("www." + domain);	370 let startIndex = 0;

266 }	371

267	372 // If the URL filter already begins with the first URL scheme pattern, skip

268 return result;	373 // it.

269 }	374 if (trigger["url-filter"].startsWith("^" + urlSchemes[0]))

270	375 {

271 function convertFilterAddRules(rules, filter, action, withResourceTypes)	376 filterPattern = filterPattern.substring(urlSchemes[0].length);

272 {	377 startIndex = 1;

273 let parsed = parseFilterRegexpSource(filter.regexpSource);	378 }

	379 else

	380 {

	381 filterPattern = ".*" + filterPattern;

	382 }

	383

	384 for (let i = startIndex; i < urlSchemes.length; i++)

	385 {

	386 let copyTrigger = Object.assign(JSON.parse(stringifiedTrigger), {

	387 "url-filter": "^" + urlSchemes[i] + filterPattern

	388 });

	389 copies.push({trigger: copyTrigger, action});

	390 }

	391

	392 return copies;

	393 }

	394

	395 function excludeTopURLFromTrigger(trigger)

	396 {

	397 trigger["unless-top-url"] = [trigger["url-filter"]];

	398 if (trigger["url-filter-is-case-sensitive"])

	399 trigger["top-url-filter-is-case-sensitive"] = true;

	400 }

	401

	402 function convertFilterAddRules(rules, filter, action, withResourceTypes,

	403 exceptionDomains, contentType)

	404 {

	405 if (!contentType)

	406 contentType = filter.contentType;

	407

	408 // If WebSocket or WebRTC are given along with other options but not

	409 // including all three of WebSocket, WebRTC, and at least one HTTP raw type,

	410 // we must generate multiple rules. For example, for the filter

	411 // "foo$websocket,image", we must generate one rule with "^wss?://" and "raw"

	412 // and another rule with "^https?://" and "image". If we merge the two, we

	413 // end up blocking requests of all HTTP raw types (e.g. XMLHttpRequest)

	414 // inadvertently.

	415 if ((contentType & typeMap.WEBSOCKET && contentType != typeMap.WEBSOCKET &&

	416 !(contentType & typeMap.WEBRTC &&

	417 contentType & rawRequestTypes & httpRequestTypes)) \|\|

	418 (contentType & typeMap.WEBRTC && contentType != typeMap.WEBRTC &&

	419 !(contentType & typeMap.WEBSOCKET &&

	420 contentType & rawRequestTypes & httpRequestTypes)))

	421 {

	422 if (contentType & typeMap.WEBSOCKET)

	423 {

	424 convertFilterAddRules(rules, filter, action, withResourceTypes,

	425 exceptionDomains, typeMap.WEBSOCKET);

	426 }

	427

	428 if (contentType & typeMap.WEBRTC)

	429 {

	430 convertFilterAddRules(rules, filter, action, withResourceTypes,

	431 exceptionDomains, typeMap.WEBRTC);

	432 }

	433

	434 contentType &= ~(typeMap.WEBSOCKET \| typeMap.WEBRTC);

	435

	436 if (!contentType)

	437 return;

	438 }

	439

	440 let urlSchemes = getURLSchemes(contentType);

	441 let parsed = parseFilterRegexpSource(filter.regexpSource, urlSchemes[0]);

274	442

275 // For the special case of $document whitelisting filters with just a domain	443 // For the special case of $document whitelisting filters with just a domain

276 // we can generate an equivalent blocking rule exception using if-domain.	444 // we can generate an equivalent blocking rule exception using if-domain.

277 if (filter instanceof filterClasses.WhitelistFilter &&	445 if (filter instanceof filterClasses.WhitelistFilter &&

278 filter.contentType & typeMap.DOCUMENT &&	446 contentType & typeMap.DOCUMENT &&

279 parsed.justHostname)	447 parsed.justHostname)

280 {	448 {

281 rules.push({	449 rules.push({

282 trigger: {	450 trigger: {

283 "url-filter": ".*",	451 "url-filter": ".*",

284 "if-domain": addDomainPrefix([parsed.hostname])	452 "if-domain": ["*" + parsed.hostname]

285 },	453 },

286 action: {type: "ignore-previous-rules"}	454 action: {type: "ignore-previous-rules"}

287 });	455 });

288 // If the filter contains other supported options we'll need to generate	456 // If the filter contains other supported options we'll need to generate

289 // further rules for it, but if not we can simply return now.	457 // further rules for it, but if not we can simply return now.

290 if (!(filter.contentType & whitelistableRequestTypes))	458 if (!(contentType & whitelistableRequestTypes))

291 return;	459 return;

292 }	460 }

293	461

294 let trigger = {"url-filter": parsed.regexp};	462 let trigger = {"url-filter": parsed.regexp};

295	463

296 // Limit rules to HTTP(S) URLs	464 // If the URL filter begins with one of the URL schemes for this content

297 if (!/^(\^\|http)/i.test(trigger["url-filter"]))	465 // type, we generate additional rules for all the URL scheme patterns;

298 trigger["url-filter"] = "^https?://.*" + trigger["url-filter"];	466 // otherwise, if the start of the URL filter literally matches the first URL

	467 // scheme pattern, we just generate additional rules for the remaining URL

	468 // scheme patterns.

	469 //

	470 // For example, "stun:foo$webrtc" will give us "stun:foo", then we add a "^"

	471 // in front of this and generate two additional rules for

	472 // "^stuns?:.stun:foo" and "^turns?:.stun:foo". On the other hand,

	473 // "\|\|foo$webrtc" will give us "^stuns?:([^/]+\\.)?foo", so we just generate

	474 // "^turns?:([^/]+\\.)?foo" in addition.

	475 //

	476 // Note that the filter can be already anchored to the beginning

	477 // (e.g. "\|stun:foo$webrtc"), in which case we do not generate any additional

	478 // rules.

	479 let needAltRules = trigger["url-filter"][0] != "^" \|\|

	480 trigger["url-filter"].startsWith("^" + urlSchemes[0]);

	481

	482 if (trigger["url-filter"][0] != "^")

	483 {

	484 if (!urlSchemes.some(scheme => new RegExp("^" + scheme)

	485 .test(trigger["url-filter"])))

	486 {

	487 trigger["url-filter"] = urlSchemes[0] + ".*" + trigger["url-filter"];

	488 }

	489

	490 trigger["url-filter"] = "^" + trigger["url-filter"];

	491 }

299	492

300 // For rules containing only a hostname we know that we're matching against	493 // For rules containing only a hostname we know that we're matching against

301 // a lowercase string unless the matchCase option was passed.	494 // a lowercase string unless the matchCase option was passed.

302 if (parsed.canSafelyMatchAsLowercase && !filter.matchCase)	495 if (parsed.canSafelyMatchAsLowercase && !filter.matchCase)

303 trigger["url-filter"] = trigger["url-filter"].toLowerCase();	496 trigger["url-filter"] = trigger["url-filter"].toLowerCase();

304	497

305 if (parsed.canSafelyMatchAsLowercase \|\| filter.matchCase)	498 if (parsed.canSafelyMatchAsLowercase \|\| filter.matchCase)

306 trigger["url-filter-is-case-sensitive"] = true;	499 trigger["url-filter-is-case-sensitive"] = true;

307	500

308 let included = [];	501 let included = [];

309 let excluded = [];	502 let excluded = [];

310	503

311 parseDomains(filter.domains, included, excluded);	504 parseDomains(filter.domains, included, excluded);

312	505

	506 if (exceptionDomains)

	507 excluded = excluded.concat(exceptionDomains);

	508

313 if (withResourceTypes)	509 if (withResourceTypes)

314 {	510 {

315 trigger["resource-type"] = getResourceTypes(filter);	511 let resourceTypes = getResourceTypes(contentType);

316	512

317 if (trigger["resource-type"].length == 0)	513 // Content blocker rules can't differentiate between sub-document requests

	514 // (iframes) and top-level document requests. To avoid too many false

	515 // positives, we prevent rules with no hostname part from blocking document

	516 // requests.

	517 //

	518 // Once Safari 11 becomes our minimum supported version, we could change

	519 // our approach here to use the new "unless-top-url" property instead.

	520 if (filter instanceof filterClasses.BlockingFilter && !parsed.hostname)

	521 resourceTypes = resourceTypes.filter(type => type != "document");

	522

	523 if (resourceTypes.length == 0)

318 return;	524 return;

	525

	526 trigger["resource-type"] = resourceTypes;

319 }	527 }

320	528

321 if (filter.thirdParty != null)	529 if (filter.thirdParty != null)

322 trigger["load-type"] = [filter.thirdParty ? "third-party" : "first-party"];	530 trigger["load-type"] = [filter.thirdParty ? "third-party" : "first-party"];

323	531

	532 let addTopLevelException = false;

	533

324 if (included.length > 0)	534 if (included.length > 0)

325 trigger["if-domain"] = addDomainPrefix(included);	535 {

	536 trigger["if-domain"] = [];

	537

	538 for (let name of included)

	539 {

	540 // If this is a blocking filter or an element hiding filter, add the

	541 // subdomain wildcard only if no subdomains have been excluded.

	542 let notSubdomains = null;

	543 if ((filter instanceof filterClasses.BlockingFilter \|\|

	544 filter instanceof filterClasses.ElemHideFilter) &&

	545 (notSubdomains = findSubdomainsInList(name, excluded)).length > 0)

	546 {

	547 trigger["if-domain"].push(name);

	548

	549 // Add the "www" prefix but only if it hasn't been excluded.

	550 if (!notSubdomains.includes("www"))

	551 trigger["if-domain"].push("www." + name);

	552 }

	553 else

	554 {

	555 trigger["if-domain"].push("*" + name);

	556 }

	557 }

	558 }

326 else if (excluded.length > 0)	559 else if (excluded.length > 0)

327 trigger["unless-domain"] = addDomainPrefix(excluded);	560 {

	561 trigger["unless-domain"] = excluded.map(name => "*" + name);

	562 }

	563 else if (filter instanceof filterClasses.BlockingFilter &&

	564 filter.contentType & typeMap.SUBDOCUMENT && parsed.hostname)

	565 {

	566 // Rules with a hostname part are still allowed to block document requests,

	567 // but we add an exception for top-level documents.

	568 //

	569 // Note that we can only do this if there's no "unless-domain" property for

	570 // now. This also only works in Safari 11 onwards, while older versions

	571 // simply ignore this property. Once Safari 11 becomes our minimum

	572 // supported version, we can merge "unless-domain" into "unless-top-url".

	573 addTopLevelException = true;

	574 excludeTopURLFromTrigger(trigger);

	575 }

328	576

329 rules.push({trigger: trigger, action: {type: action}});	577 rules.push({trigger: trigger, action: {type: action}});

330 }	578

331	579 if (needAltRules)

332 function hasNonASCI(obj)	580 {

333 {	581 // Generate additional rules for any alternative URL schemes.

334 if (typeof obj == "string")	582 for (let altRule of makeRuleCopies(trigger, {type: action}, urlSchemes))

335 {	583 {

336 if (/[^\x00-\x7F]/.test(obj))	584 if (addTopLevelException)

337 return true;	585 excludeTopURLFromTrigger(altRule.trigger);

338 }	586

339	587 rules.push(altRule);

340 if (typeof obj == "object")	588 }

341 {	589 }

342 if (obj instanceof Array)

343 for (let item of obj)

344 if (hasNonASCI(item))

345 return true;

346

347 let names = Object.getOwnPropertyNames(obj);

348 for (let name of names)

349 if (hasNonASCI(obj[name]))

350 return true;

351 }

352

353 return false;

354 }	590 }

355	591

356 function convertIDSelectorsToAttributeSelectors(selector)	592 function convertIDSelectorsToAttributeSelectors(selector)

357 {	593 {

358 // First we figure out where all the IDs are	594 // First we figure out where all the IDs are

359 let sep = "";	595 let sep = "";

360 let start = null;	596 let start = null;

361 let positions = [];	597 let positions = [];

362 for (let i = 0; i < selector.length; i++)	598 for (let i = 0; i < selector.length; i++)

363 {	599 {

(...skipping 33 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
397 {	633 {

398 newSelector.push(selector.substring(i, pos.start));	634 newSelector.push(selector.substring(i, pos.start));

399 newSelector.push('[id=', selector.substring(pos.start + 1, pos.end), ']');	635 newSelector.push('[id=', selector.substring(pos.start + 1, pos.end), ']');

400 i = pos.end;	636 i = pos.end;

401 }	637 }

402 newSelector.push(selector.substring(i));	638 newSelector.push(selector.substring(i));

403	639

404 return newSelector.join("");	640 return newSelector.join("");

405 }	641 }

406	642

	643 function addCSSRules(rules, selectors, domain, exceptionDomains)

	644 {

	645 let unlessDomain = exceptionDomains.size > 0 ? [] : null;

	646

	647 exceptionDomains.forEach(name =>

	648 {

	649 // For domain-specific filters, include the exception domains only if

	650 // they're subdomains of the given domain.

	651 if (!domain \|\| name.substr(-domain.length - 1) == "." + domain)

	652 unlessDomain.push("*" + name);

	653 });

	654

	655 while (selectors.length)

	656 {

	657 let selector = selectors.splice(0, selectorLimit).join(", ");

	658

	659 // As of Safari 9.0 element IDs are matched as lowercase. We work around

	660 // this by converting to the attribute format [id="elementID"]

	661 selector = convertIDSelectorsToAttributeSelectors(selector);

	662

	663 let rule = {

	664 trigger: {"url-filter": matchDomain(domain),

	665 "url-filter-is-case-sensitive": true},

	666 action: {type: "css-display-none",

	667 selector: selector}

	668 };

	669

	670 if (unlessDomain)

	671 rule.trigger["unless-domain"] = unlessDomain;

	672

	673 rules.push(rule);

	674 }

	675 }

	676

407 /**	677 /**

408 * Check if two strings are a close match	678 * Check if two strings are a close match

409 *	679 *

410 * This function returns an edit operation, one of "substitute", "delete", and	680 * This function returns an edit operation, one of "substitute", "delete", and

411 * "insert", along with an index in the source string where the edit must occur	681 * "insert", along with an index in the source string where the edit must occur

412 * in order to arrive at the target string. If the strings are not a close	682 * in order to arrive at the target string. If the strings are not a close

413 * match, it returns null.	683 * match, it returns null.

414 *	684 *

415 * Two strings are considered to be a close match if they are one edit	685 * Two strings are considered to be a close match if they are one edit

416 * operation apart.	686 * operation apart.

(...skipping 38 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
455 // calculation.	725 // calculation.

456 if (diff < 0)	726 if (diff < 0)

457 {	727 {

458 let tmp = s;	728 let tmp = s;

459 s = t;	729 s = t;

460 t = tmp;	730 t = tmp;

461 }	731 }

462	732

463 let edit = null;	733 let edit = null;

464	734

465 let i = 0, j = 0;	735 let i = 0;

	736 let j = 0;

466	737

467 // Start from the beginning and keep going until we hit a character that	738 // Start from the beginning and keep going until we hit a character that

468 // doesn't match.	739 // doesn't match.

469 for (; i < s.length; i++)	740 for (; i < s.length; i++)

470 {	741 {

471 if (s[i] != t[i])	742 if (s[i] != t[i])

472 break;	743 break;

473 }	744 }

474	745

475 // Now do exactly the same from the end, but also stop if we reach the	746 // Now do exactly the same from the end, but also stop if we reach the

(...skipping 43 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
519 {	790 {

520 edit = {type: "insert", index: i};	791 edit = {type: "insert", index: i};

521	792

522 if (diff < -1)	793 if (diff < -1)

523 edit.endIndex = s.length - j;	794 edit.endIndex = s.length - j;

524 }	795 }

525	796

526 return edit;	797 return edit;

527 }	798 }

528	799

529 function eliminateRedundantRulesByURLFilter(rulesInfo)	800 function eliminateRedundantRulesByURLFilter(rulesInfo, exhaustive)

530 {	801 {

531 for (let i = 0; i < rulesInfo.length; i++)	802 const heuristicRange = 1000;

	803

	804 let ol = rulesInfo.length;

	805

	806 // Throw out obviously redundant rules.

	807 return async(rulesInfo, (ruleInfo, index) => () =>

532 {	808 {

533 // If this rule is already marked as redundant, don't bother comparing it	809 // If this rule is already marked as redundant, don't bother comparing it

534 // with other rules.	810 // with other rules.

535 if (rulesInfo[i].redundant)	811 if (rulesInfo[index].redundant)

536 continue;	812 return;

537	813

538 for (let j = i + 1; j < rulesInfo.length; j++)	814 let limit = exhaustive ? rulesInfo.length :

	815 Math.min(index + heuristicRange, rulesInfo.length);

	816

	817 for (let i = index, j = i + 1; j < limit; j++)

539 {	818 {

540 if (rulesInfo[j].redundant)	819 if (rulesInfo[j].redundant)

541 continue;	820 continue;

542	821

543 let source = rulesInfo[i].rule.trigger["url-filter"];	822 let source = rulesInfo[i].rule.trigger["url-filter"];

544 let target = rulesInfo[j].rule.trigger["url-filter"];	823 let target = rulesInfo[j].rule.trigger["url-filter"];

545	824

546 if (source.length >= target.length)	825 if (source.length >= target.length)

547 {	826 {

548 // If one URL filter is a substring of the other starting at the	827 // If one URL filter is a substring of the other starting at the

549 // beginning, the other one is clearly redundant.	828 // beginning, the other one is clearly redundant.

550 if (source.substring(0, target.length) == target)	829 if (source.substring(0, target.length) == target)

551 {	830 {

552 rulesInfo[i].redundant = true;	831 rulesInfo[i].redundant = true;

553 break;	832 break;

554 }	833 }

555 }	834 }

556 else if (target.substring(0, source.length) == source)	835 else if (target.substring(0, source.length) == source)

557 {	836 {

558 rulesInfo[j].redundant = true;	837 rulesInfo[j].redundant = true;

559 }	838 }

560 }	839 }

561 }	840 })

562	841 .then(() => rulesInfo.filter(ruleInfo => !ruleInfo.redundant));

563 return rulesInfo.filter(ruleInfo => !ruleInfo.redundant);	842 }

564 }	843

565	844 function findMatchesForRuleByURLFilter(rulesInfo, index, exhaustive)

566 function mergeRulesByURLFilter(rulesInfo, exhaustive)

567 {	845 {

568 // Closely matching rules are likely to be within a certain range. We only	846 // Closely matching rules are likely to be within a certain range. We only

569 // look for matches within this range by default. If we increase this value,	847 // look for matches within this range by default. If we increase this value,

570 // it can give us more matches and a smaller resulting rule set, but possibly	848 // it can give us more matches and a smaller resulting rule set, but possibly

571 // at a significant performance cost.	849 // at a significant performance cost.

572 //	850 //

573 // If the exhaustive option is true, we simply ignore this value and look for	851 // If the exhaustive option is true, we simply ignore this value and look for

574 // matches throughout the rule set.	852 // matches throughout the rule set.

575 const heuristicRange = 10;	853 const heuristicRange = 1000;

576	854

577 return async(() =>	855 let limit = exhaustive ? rulesInfo.length :

578 {	856 Math.min(index + heuristicRange, rulesInfo.length);

579 if (exhaustive)	857

580 {	858 for (let i = index, j = i + 1; j < limit; j++)

581 // Throw out obviously redundant rules.	859 {

582 rulesInfo = eliminateRedundantRulesByURLFilter(rulesInfo);	860 let source = rulesInfo[i].rule.trigger["url-filter"];

583 }	861 let target = rulesInfo[j].rule.trigger["url-filter"];

584 })	862

585 .then(() =>	863 let edit = closeMatch(source, target);

586 {	864

587 if (rulesInfo.length <= 1)	865 if (edit)

588 return;	866 {

589	867 let urlFilter, ruleInfo, match = {edit};

590 return async(rulesInfo.map((_, i) => () =>	868

591 {	869 if (edit.type == "insert")

592 let limit = exhaustive ? rulesInfo.length :	870 {

593 Math.min(i + heuristicRange, rulesInfo.length);	871 // Convert the insertion into a deletion and stick it on the target

594	872 // rule instead. We can only group deletions and substitutions;

595 for (let j = i + 1; j < limit; j++)	873 // therefore insertions must be treated as deletions on the target

596 {	874 // rule.

597 let source = rulesInfo[i].rule.trigger["url-filter"];	875 urlFilter = target;

598 let target = rulesInfo[j].rule.trigger["url-filter"];	876 ruleInfo = rulesInfo[j];

599	877 match.index = i;

600 let edit = closeMatch(source, target);	878 edit.type = "delete";

601	879 }

602 if (edit)	880 else

	881 {

	882 urlFilter = source;

	883 ruleInfo = rulesInfo[i];

	884 match.index = j;

	885 }

	886

	887 // If the edit has an end index, it represents a multiple character

	888 // edit.

	889 let multiEdit = !!edit.endIndex;

	890

	891 if (multiEdit)

	892 {

	893 // We only care about a single multiple character edit because the

	894 // number of characters for such a match doesn't matter, we can

	895 // only merge with one other rule.

	896 if (!ruleInfo.multiEditMatch)

	897 ruleInfo.multiEditMatch = match;

	898 }

	899 else

	900 {

	901 // For single character edits, multiple rules can be merged into

	902 // one. e.g. "ad", "ads", and "adv" can be merged into "ad[sv]?".

	903 if (!ruleInfo.matches)

	904 ruleInfo.matches = new Array(urlFilter.length);

	905

	906 // Matches at a particular index. For example, for a source string

	907 // "ads", both target strings "ad" (deletion) and "adv"

	908 // (substitution) match at index 2, hence they are grouped together

	909 // to possibly be merged later into "ad[sv]?".

	910 let matchesForIndex = ruleInfo.matches[edit.index];

	911

	912 if (matchesForIndex)

603 {	913 {

604 let urlFilter, ruleInfo, match = {edit};	914 matchesForIndex.push(match);

605	915 }

606 if (edit.type == "insert")	916 else

	917 {

	918 matchesForIndex = [match];

	919 ruleInfo.matches[edit.index] = matchesForIndex;

	920 }

	921

	922 // Keep track of the best set of matches. We later sort by this to

	923 // get best results.

	924 if (!ruleInfo.bestMatches \|\|

	925 matchesForIndex.length > ruleInfo.bestMatches.length)

	926 ruleInfo.bestMatches = matchesForIndex;

	927 }

	928 }

	929 }

	930 }

	931

	932 function mergeCandidateRulesByURLFilter(rulesInfo)

	933 {

	934 // Filter out rules that have no matches at all.

	935 let candidateRulesInfo = rulesInfo.filter(ruleInfo =>

	936 {

	937 return ruleInfo.bestMatches \|\| ruleInfo.multiEditMatch

	938 });

	939

	940 // For best results, we have to sort the candidates by the largest set of

	941 // matches.

	942 //

	943 // For example, we want "ads", "bds", "adv", "bdv", "adx", and "bdx" to

	944 // generate "ad[svx]" and "bd[svx]" (2 rules), not "[ab]ds", "[ab]dv", and

	945 // "[ab]dx" (3 rules).

	946 candidateRulesInfo.sort((ruleInfo1, ruleInfo2) =>

	947 {

	948 let weight1 = ruleInfo1.bestMatches ? ruleInfo1.bestMatches.length :

	949 ruleInfo1.multiEditMatch ? 1 : 0;

	950 let weight2 = ruleInfo2.bestMatches ? ruleInfo2.bestMatches.length :

	951 ruleInfo2.multiEditMatch ? 1 : 0;

	952

	953 return weight2 - weight1;

	954 });

	955

	956 for (let ruleInfo of candidateRulesInfo)

	957 {

	958 let rule = ruleInfo.rule;

	959

	960 // If this rule has already been merged into another rule, we skip it.

	961 if (ruleInfo.merged)

	962 continue;

	963

	964 // Find the best set of rules to group, which is simply the largest set.

	965 let best = (ruleInfo.matches \|\| []).reduce((best, matchesForIndex) =>

	966 {

	967 matchesForIndex = (matchesForIndex \|\| []).filter(match =>

	968 {

	969 // Filter out rules that have either already been merged into other

	970 // rules or have had other rules merged into them.

	971 return !rulesInfo[match.index].merged &&

	972 !rulesInfo[match.index].mergedInto;

	973 });

	974

	975 return matchesForIndex.length > best.length ? matchesForIndex : best;

	976 },

	977 []);

	978

	979 let multiEdit = false;

	980

	981 // If we couldn't find a single rule to merge with, let's see if we have a

	982 // multiple character edit. e.g. we could merge "ad" and "adserver" into

	983 // "ad(server)?".

	984 if (best.length == 0 && ruleInfo.multiEditMatch &&

	985 !rulesInfo[ruleInfo.multiEditMatch.index].merged &&

	986 !rulesInfo[ruleInfo.multiEditMatch.index].mergedInto)

	987 {

	988 best = [ruleInfo.multiEditMatch];

	989 multiEdit = true;

	990 }

	991

	992 if (best.length > 0)

	993 {

	994 let urlFilter = rule.trigger["url-filter"];

	995

	996 let editIndex = best[0].edit.index;

	997

	998 if (!multiEdit)

	999 {

	1000 // Merge all the matching rules into this one.

	1001

	1002 let characters = [urlFilter[editIndex]];

	1003 let quantifier = "";

	1004

	1005 for (let match of best)

	1006 {

	1007 if (match.edit.type == "delete")

607 {	1008 {

608 // Convert the insertion into a deletion and stick it on the target	1009 quantifier = "?";

609 // rule instead. We can only group deletions and substitutions;

610 // therefore insertions must be treated as deletions on the target

611 // rule.

612 urlFilter = target;

613 ruleInfo = rulesInfo[j];

614 match.index = i;

615 edit.type = "delete";

616 }	1010 }

617 else	1011 else

618 {	1012 {

619 urlFilter = source;	1013 let character = rulesInfo[match.index].rule

620 ruleInfo = rulesInfo[i];	1014 .trigger["url-filter"][editIndex];

621 match.index = j;	1015

	1016 // Insert any hyphen at the beginning so it gets interpreted as a

	1017 // literal hyphen.

	1018 if (character == "-")

	1019 characters.unshift(character);

	1020 else

	1021 characters.push(character);

622 }	1022 }

623	1023

624 // If the edit has an end index, it represents a multiple character	1024 // Mark the target rule as merged so other rules don't try to merge

625 // edit.	1025 // it again.

626 let multiEdit = !!edit.endIndex;	1026 rulesInfo[match.index].merged = true;

627	1027 }

628 if (multiEdit)	1028

629 {	1029 urlFilter = urlFilter.substring(0, editIndex + 1) + quantifier +

630 // We only care about a single multiple character edit because the	1030 urlFilter.substring(editIndex + 1);

631 // number of characters for such a match doesn't matter, we can	1031 if (characters.length > 1)

632 // only merge with one other rule.	1032 {

633 if (!ruleInfo.multiEditMatch)	1033 urlFilter = urlFilter.substring(0, editIndex) + "[" +

634 ruleInfo.multiEditMatch = match;	1034 characters.join("") + "]" +

635 }	1035 urlFilter.substring(editIndex + 1);

636 else

637 {

638 // For single character edits, multiple rules can be merged into

639 // one. e.g. "ad", "ads", and "adv" can be merged into "ad[sv]?".

640 if (!ruleInfo.matches)

641 ruleInfo.matches = new Array(urlFilter.length);

642

643 // Matches at a particular index. For example, for a source string

644 // "ads", both target strings "ad" (deletion) and "adv"

645 // (substitution) match at index 2, hence they are grouped together

646 // to possibly be merged later into "ad[sv]?".

647 let matchesForIndex = ruleInfo.matches[edit.index];

648

649 if (matchesForIndex)

650 {

651 matchesForIndex.push(match);

652 }

653 else

654 {

655 matchesForIndex = [match];

656 ruleInfo.matches[edit.index] = matchesForIndex;

657 }

658

659 // Keep track of the best set of matches. We later sort by this to

660 // get best results.

661 if (!ruleInfo.bestMatches \|\|

662 matchesForIndex.length > ruleInfo.bestMatches.length)

663 ruleInfo.bestMatches = matchesForIndex;

664 }

665 }	1036 }

666 }	1037 }

667 }));	1038 else

668 })	1039 {

669 .then(() =>	1040 let editEndIndex = best[0].edit.endIndex;

670 {	1041

671 // Filter out rules that have no matches at all.	1042 // Mark the target rule as merged so other rules don't try to merge it

672 let candidateRulesInfo = rulesInfo.filter(ruleInfo =>	1043 // again.

673 {	1044 rulesInfo[best[0].index].merged = true;

674 return ruleInfo.bestMatches \|\| ruleInfo.multiEditMatch	1045

675 });	1046 urlFilter = urlFilter.substring(0, editIndex) + "(" +

676	1047 urlFilter.substring(editIndex, editEndIndex) + ")?" +

677 // For best results, we have to sort the candidates by the largest set of	1048 urlFilter.substring(editEndIndex);

678 // matches.

679 //

680 // For example, we want "ads", "bds", "adv", "bdv", "adx", and "bdx" to

681 // generate "ad[svx]" and "bd[svx]" (2 rules), not "[ab]ds", "[ab]dv", and

682 // "[ab]dx" (3 rules).

683 candidateRulesInfo.sort((ruleInfo1, ruleInfo2) =>

684 {

685 let weight1 = ruleInfo1.bestMatches ? ruleInfo1.bestMatches.length :

686 ruleInfo1.multiEditMatch ? 1 : 0;

687 let weight2 = ruleInfo2.bestMatches ? ruleInfo2.bestMatches.length :

688 ruleInfo2.multiEditMatch ? 1 : 0;

689

690 return weight2 - weight1;

691 });

692

693 for (let ruleInfo of candidateRulesInfo)

694 {

695 let rule = ruleInfo.rule;

696

697 // If this rule has already been merged into another rule, we skip it.

698 if (ruleInfo.merged)

699 continue;

700

701 // Find the best set of rules to group, which is simply the largest set.

702 let best = (ruleInfo.matches \|\| []).reduce((best, matchesForIndex) =>

703 {

704 matchesForIndex = (matchesForIndex \|\| []).filter(match =>

705 {

706 // Filter out rules that have either already been merged into other

707 // rules or have had other rules merged into them.

708 return !rulesInfo[match.index].merged &&

709 !rulesInfo[match.index].mergedInto;

710 });

711

712 return matchesForIndex.length > best.length ? matchesForIndex : best;

713 },

714 []);

715

716 let multiEdit = false;

717

718 // If we couldn't find a single rule to merge with, let's see if we have a

719 // multiple character edit. e.g. we could merge "ad" and "adserver" into

720 // "ad(server)?".

721 if (best.length == 0 && ruleInfo.multiEditMatch &&

722 !rulesInfo[ruleInfo.multiEditMatch.index].merged &&

723 !rulesInfo[ruleInfo.multiEditMatch.index].mergedInto)

724 {

725 best = [ruleInfo.multiEditMatch];

726 multiEdit = true;

727 }	1049 }

728	1050

729 if (best.length > 0)	1051 rule.trigger["url-filter"] = urlFilter;

730 {	1052

731 let urlFilter = rule.trigger["url-filter"];	1053 // Mark this rule as one that has had other rules merged into it.

732	1054 ruleInfo.mergedInto = true;

733 let editIndex = best[0].edit.index;	1055 }

734	1056 }

735 if (!multiEdit)	1057 }

736 {	1058

737 // Merge all the matching rules into this one.	1059 function mergeRulesByURLFilter(rulesInfo, exhaustive)

738	1060 {

739 let characters = [];	1061 return async(rulesInfo, (ruleInfo, index) => () =>

740 let quantifier = "";	1062 findMatchesForRuleByURLFilter(rulesInfo, index, exhaustive)

741	1063 )

742 for (let match of best)	1064 .then(() => mergeCandidateRulesByURLFilter(rulesInfo));

743 {

744 if (match.edit.type == "delete")

745 {

746 quantifier = "?";

747 }

748 else

749 {

750 let character = rulesInfo[match.index].rule

751 .trigger["url-filter"][editIndex];

752 characters.push(character);

753 }

754

755 // Mark the target rule as merged so other rules don't try to merge

756 // it again.

757 rulesInfo[match.index].merged = true;

758 }

759

760 urlFilter = urlFilter.substring(0, editIndex + 1) + quantifier +

761 urlFilter.substring(editIndex + 1);

762 if (characters.length > 0)

763 {

764 urlFilter = urlFilter.substring(0, editIndex) + "[" +

765 urlFilter[editIndex] + characters.join("") + "]" +

766 urlFilter.substring(editIndex + 1);

767 }

768 }

769 else

770 {

771 let editEndIndex = best[0].edit.endIndex;

772

773 // Mark the target rule as merged so other rules don't try to merge it

774 // again.

775 rulesInfo[best[0].index].merged = true;

776

777 urlFilter = urlFilter.substring(0, editIndex) + "(" +

778 urlFilter.substring(editIndex, editEndIndex) + ")?" +

779 urlFilter.substring(editEndIndex);

780 }

781

782 rule.trigger["url-filter"] = urlFilter;

783

784 // Mark this rule as one that has had other rules merged into it.

785 ruleInfo.mergedInto = true;

786 }

787 }

788 });

789 }	1065 }

790	1066

791 function mergeRulesByArrayProperty(rulesInfo, propertyType, property)	1067 function mergeRulesByArrayProperty(rulesInfo, propertyType, property)

792 {	1068 {

793 if (rulesInfo.length <= 1)	1069 if (rulesInfo.length <= 1)

794 return;	1070 return;

795	1071

796 let oneRuleInfo = rulesInfo.shift();	1072 let valueSet = new Set(rulesInfo[0].rule[propertyType][property]);

797 let valueSet = new Set(oneRuleInfo.rule[propertyType][property]);	1073

798	1074 for (let i = 1; i < rulesInfo.length; i++)

799 for (let ruleInfo of rulesInfo)	1075 {

800 {	1076 for (let value of rulesInfo[i].rule[propertyType][property] \|\| [])

801 if (ruleInfo.rule[propertyType][property])	1077 valueSet.add(value);

802 {	1078

803 for (let value of ruleInfo.rule[propertyType][property])	1079 rulesInfo[i].merged = true;

804 valueSet.add(value);

805 }

806

807 ruleInfo.merged = true;

808 }	1080 }

809	1081

810 if (valueSet.size > 0)	1082 if (valueSet.size > 0)

811 oneRuleInfo.rule[propertyType][property] = Array.from(valueSet);	1083 rulesInfo[0].rule[propertyType][property] = Array.from(valueSet);

812	1084

813 oneRuleInfo.mergedInto = true;	1085 rulesInfo[0].mergedInto = true;

814 }	1086 }

815	1087

816 function groupRulesByMergeableProperty(rulesInfo, propertyType, property)	1088 function groupRulesByMergeableProperty(rulesInfo, propertyType, property)

817 {	1089 {

818 let mergeableRulesInfoByGroup = new Map();	1090 let mergeableRulesInfoByGroup = new Map();

819	1091

820 for (let ruleInfo of rulesInfo)	1092 for (let ruleInfo of rulesInfo)

821 {	1093 {

822 let copy = {	1094 let copy = {

823 trigger: Object.assign({}, ruleInfo.rule.trigger),	1095 trigger: Object.assign({}, ruleInfo.rule.trigger),

(...skipping 12 matching lines...) Expand all Loading...
836 mergeableRulesInfoByGroup.set(groupKey, [ruleInfo]);	1108 mergeableRulesInfoByGroup.set(groupKey, [ruleInfo]);

837 }	1109 }

838	1110

839 return mergeableRulesInfoByGroup;	1111 return mergeableRulesInfoByGroup;

840 }	1112 }

841	1113

842 function mergeRules(rules, exhaustive)	1114 function mergeRules(rules, exhaustive)

843 {	1115 {

844 let rulesInfo = rules.map(rule => ({rule}));	1116 let rulesInfo = rules.map(rule => ({rule}));

845	1117

	1118 let arrayPropertiesToMergeBy = ["resource-type", "if-domain"];

	1119

846 return async(() =>	1120 return async(() =>

847 {	1121 {

848 let map = groupRulesByMergeableProperty(rulesInfo, "trigger", "url-filter");	1122 let map = groupRulesByMergeableProperty(rulesInfo, "trigger", "url-filter");

849 return async(Array.from(map.values()).map(mergeableRulesInfo => () =>	1123 return async(map.values(), mergeableRulesInfo => () =>

850 {	1124 eliminateRedundantRulesByURLFilter(mergeableRulesInfo, exhaustive)

851 if (mergeableRulesInfo.length > 1)	1125 .then(rulesInfo => mergeRulesByURLFilter(rulesInfo, exhaustive))

852 return mergeRulesByURLFilter(mergeableRulesInfo, exhaustive);	1126 )

853 }));	1127 .then(() =>

	1128 {

	1129 // Filter out rules that are redundant or have been merged into other

	1130 // rules.

	1131 rulesInfo = rulesInfo.filter(ruleInfo => !ruleInfo.redundant &&

	1132 !ruleInfo.merged);

	1133 });

854 })	1134 })

855 .then(() =>	1135 .then(() => async(arrayPropertiesToMergeBy, arrayProperty => () =>

856 {

857 // Filter out rules that are redundant or have been merged into other rules.

858 rulesInfo = rulesInfo.filter(ruleInfo => !ruleInfo.redundant &&

859 !ruleInfo.merged);

860 })

861 .then(() => async(["resource-type", "if-domain"].map(arrayProperty => () =>

862 {	1136 {

863 let map = groupRulesByMergeableProperty(rulesInfo, "trigger",	1137 let map = groupRulesByMergeableProperty(rulesInfo, "trigger",

864 arrayProperty);	1138 arrayProperty);

865 return async(Array.from(map.values()).map(mergeableRulesInfo => () =>	1139 return async(map.values(), mergeableRulesInfo => () =>

866 {	1140 mergeRulesByArrayProperty(mergeableRulesInfo, "trigger", arrayProperty)

867 if (mergeableRulesInfo.length > 1)	1141 )

868 mergeRulesByArrayProperty(mergeableRulesInfo, "trigger", arrayProperty);

869 }))

870 .then(() =>	1142 .then(() =>

871 {	1143 {

872 rulesInfo = rulesInfo.filter(ruleInfo => !ruleInfo.merged);	1144 rulesInfo = rulesInfo.filter(ruleInfo => !ruleInfo.merged);

873 });	1145 });

874 })))	1146 }))

875 .then(() => rulesInfo.map(ruleInfo => ruleInfo.rule));	1147 .then(() => rulesInfo.map(ruleInfo => ruleInfo.rule));

876 }	1148 }

877	1149

878 let ContentBlockerList =	1150 let ContentBlockerList =

879 /**	1151 /**

880 * Create a new Adblock Plus filter to content blocker list converter	1152 * Create a new Adblock Plus filter to content blocker list converter

881 *	1153 *

882 * @param {object} options Options for content blocker list generation	1154 * @param {object} options Options for content blocker list generation

883 *	1155 *

884 * @constructor	1156 * @constructor

885 */	1157 */

886 exports.ContentBlockerList = function(options)	1158 exports.ContentBlockerList = function (options)

887 {	1159 {

888 const defaultOptions = {	1160 const defaultOptions = {

889 merge: false,	1161 merge: "auto"

890 exhaustiveMerge: false

891 };	1162 };

892	1163

893 this.options = Object.assign({}, defaultOptions, options);	1164 this.options = Object.assign({}, defaultOptions, options);

894	1165

895 this.requestFilters = [];	1166 this.requestFilters = [];

896 this.requestExceptions = [];	1167 this.requestExceptions = [];

897 this.elemhideFilters = [];	1168 this.elemhideFilters = [];

898 this.elemhideExceptions = [];	1169 this.elemhideExceptions = [];

	1170 this.genericblockExceptions = [];

	1171 this.generichideExceptions = [];

899 this.elemhideSelectorExceptions = new Map();	1172 this.elemhideSelectorExceptions = new Map();

900 };	1173 };

901	1174

902 /**	1175 /**

903 * Add Adblock Plus filter to be converted	1176 * Add Adblock Plus filter to be converted

904 *	1177 *

905 * @param {Filter} filter Filter to convert	1178 * @param {Filter} filter Filter to convert

906 */	1179 */

907 ContentBlockerList.prototype.addFilter = function(filter)	1180 ContentBlockerList.prototype.addFilter = function(filter)

908 {	1181 {

909 if (filter.sitekeys)	1182 if (filter.sitekeys)

910 return;	1183 return;

911 if (filter instanceof filterClasses.RegExpFilter &&	1184 if (filter instanceof filterClasses.RegExpFilter &&

912 filter.regexpSource == null)	1185 filter.regexpSource == null)

913 return;	1186 return;

914	1187

915 if (filter instanceof filterClasses.BlockingFilter)	1188 if (filter instanceof filterClasses.BlockingFilter)

916 this.requestFilters.push(filter);	1189 this.requestFilters.push(filter);

917	1190

918 if (filter instanceof filterClasses.WhitelistFilter)	1191 if (filter instanceof filterClasses.WhitelistFilter)

919 {	1192 {

920 if (filter.contentType & (typeMap.DOCUMENT \| whitelistableRequestTypes))	1193 if (filter.contentType & (typeMap.DOCUMENT \| whitelistableRequestTypes))

921 this.requestExceptions.push(filter);	1194 this.requestExceptions.push(filter);

922	1195

923 if (filter.contentType & typeMap.ELEMHIDE)	1196 if (filter.contentType & typeMap.GENERICBLOCK)

924 this.elemhideExceptions.push(filter);	1197 this.genericblockExceptions.push(filter);

	1198

	1199 if (filter.contentType & typeMap.ELEMHIDE)

	1200 this.elemhideExceptions.push(filter);

	1201 else if (filter.contentType & typeMap.GENERICHIDE)

	1202 this.generichideExceptions.push(filter);

925 }	1203 }

926	1204

927 if (filter instanceof filterClasses.ElemHideFilter)	1205 if (filter instanceof filterClasses.ElemHideFilter)

928 this.elemhideFilters.push(filter);	1206 this.elemhideFilters.push(filter);

929	1207

930 if (filter instanceof filterClasses.ElemHideException)	1208 if (filter instanceof filterClasses.ElemHideException)

931 {	1209 {

932 let domains = this.elemhideSelectorExceptions[filter.selector];	1210 let domains = this.elemhideSelectorExceptions[filter.selector];

933 if (!domains)	1211 if (!domains)

934 domains = this.elemhideSelectorExceptions[filter.selector] = [];	1212 domains = this.elemhideSelectorExceptions[filter.selector] = [];

935	1213

936 parseDomains(filter.domains, domains, []);	1214 parseDomains(filter.domains, domains, []);

937 }	1215 }

938 };	1216 };

939	1217

940 /**	1218 /**

941 * Generate content blocker list for all filters that were added	1219 * Generate content blocker list for all filters that were added

942 */	1220 */

943 ContentBlockerList.prototype.generateRules = function()	1221 ContentBlockerList.prototype.generateRules = function()

944 {	1222 {

945 let cssRules = [];	1223 let cssRules = [];

946 let cssExceptionRules = [];	1224 let cssExceptionRules = [];

947 let blockingRules = [];	1225 let blockingRules = [];

948 let blockingExceptionRules = [];	1226 let blockingExceptionRules = [];

949	1227

950 let ruleGroups = [cssRules, cssExceptionRules,	1228 let ruleGroups = [cssRules, cssExceptionRules,

951 blockingRules, blockingExceptionRules];	1229 blockingRules, blockingExceptionRules];

952	1230

	1231 let genericSelectors = [];

953 let groupedElemhideFilters = new Map();	1232 let groupedElemhideFilters = new Map();

	1233

954 for (let filter of this.elemhideFilters)	1234 for (let filter of this.elemhideFilters)

955 {	1235 {

956 let result = convertElemHideFilter(filter, this.elemhideSelectorExceptions);	1236 let result = convertElemHideFilter(filter, this.elemhideSelectorExceptions);

957 if (!result)	1237 if (!result)

958 continue;	1238 continue;

959	1239

960 if (result.matchDomains.length == 0)	1240 if (result.matchDomains.length == 0)

961 result.matchDomains = ["^https?://"];	1241 {

962	1242 genericSelectors.push(result.selector);

963 for (let matchDomain of result.matchDomains)	1243 }

964 {	1244 else

965 let group = groupedElemhideFilters.get(matchDomain) \|\| [];	1245 {

966 group.push(result.selector);	1246 for (let matchDomain of result.matchDomains)

967 groupedElemhideFilters.set(matchDomain, group);	1247 {

968 }	1248 let group = groupedElemhideFilters.get(matchDomain) \|\| [];

969 }	1249 group.push(result.selector);

	1250 groupedElemhideFilters.set(matchDomain, group);

	1251 }

	1252 }

	1253 }

	1254

	1255 // Separate out the element hiding exceptions that have only a hostname part

	1256 // from the rest. This allows us to implement a workaround for issue #5345

	1257 // (WebKit bug #167423), but as a bonus it also reduces the number of

	1258 // generated rules. The downside is that the exception will only apply to the

	1259 // top-level document, not to iframes. We have to live with this until the

	1260 // WebKit bug is fixed in all supported versions of Safari.

	1261 // https://bugs.webkit.org/show_bug.cgi?id=167423

	1262 //

	1263 // Note that as a result of this workaround we end up with a huge rule set in

	1264 // terms of the amount of memory used. This can cause Node.js to throw

	1265 // "JavaScript heap out of memory". To avoid this, call Node.js with

	1266 // --max_old_space_size=4096

	1267 let elemhideExceptionDomains = extractFilterDomains(this.elemhideExceptions);

	1268

	1269 let genericSelectorExceptionDomains =

	1270 extractFilterDomains(this.generichideExceptions);

	1271 elemhideExceptionDomains.forEach(name =>

	1272 {

	1273 genericSelectorExceptionDomains.add(name);

	1274 });

	1275

	1276 addCSSRules(cssRules, genericSelectors, null,

	1277 genericSelectorExceptionDomains);

	1278

	1279 // Filter out whitelisted domains.

	1280 elemhideExceptionDomains.forEach(domain =>

	1281 groupedElemhideFilters.delete(domain));

970	1282

971 groupedElemhideFilters.forEach((selectors, matchDomain) =>	1283 groupedElemhideFilters.forEach((selectors, matchDomain) =>

972 {	1284 {

973 while (selectors.length)	1285 addCSSRules(cssRules, selectors, matchDomain, elemhideExceptionDomains);

974 {

975 let selector = selectors.splice(0, selectorLimit).join(", ");

976

977 // As of Safari 9.0 element IDs are matched as lowercase. We work around

978 // this by converting to the attribute format [id="elementID"]

979 selector = convertIDSelectorsToAttributeSelectors(selector);

980

981 cssRules.push({

982 trigger: {"url-filter": matchDomain,

983 "url-filter-is-case-sensitive": true},

984 action: {type: "css-display-none",

985 selector: selector}

986 });

987 }

988 });	1286 });

989	1287

990 for (let filter of this.elemhideExceptions)	1288 let requestFilterExceptionDomains = [];

991 {	1289 for (let filter of this.genericblockExceptions)

992 convertFilterAddRules(cssExceptionRules, filter,	1290 {

993 "ignore-previous-rules", false);	1291 let parsed = parseFilterRegexpSource(filter.regexpSource);

	1292 if (parsed.hostname)

	1293 requestFilterExceptionDomains.push(parsed.hostname);

994 }	1294 }

995	1295

996 for (let filter of this.requestFilters)	1296 for (let filter of this.requestFilters)

997 convertFilterAddRules(blockingRules, filter, "block", true);	1297 {

	1298 convertFilterAddRules(blockingRules, filter, "block", true,

	1299 requestFilterExceptionDomains);

	1300 }

998	1301

999 for (let filter of this.requestExceptions)	1302 for (let filter of this.requestExceptions)

1000 {	1303 {

1001 convertFilterAddRules(blockingExceptionRules, filter,	1304 convertFilterAddRules(blockingExceptionRules, filter,

1002 "ignore-previous-rules", true);	1305 "ignore-previous-rules", true);

1003 }	1306 }

1004	1307

1005 return async(ruleGroups.map((group, index) => () =>	1308 return async(ruleGroups, (group, index) => () =>

1006 {	1309 {

1007 let next = () =>	1310 let next = () =>

1008 {	1311 {

1009 if (index == ruleGroups.length - 1)	1312 if (index == ruleGroups.length - 1)

1010 return ruleGroups.reduce((all, rules) => all.concat(rules), []);	1313 return ruleGroups.reduce((all, rules) => all.concat(rules), []);

1011 };	1314 };

1012	1315

1013 ruleGroups[index] = ruleGroups[index].filter(rule => !hasNonASCI(rule));	1316 if (this.options.merge == "all" \|\|

1014	1317 (this.options.merge == "auto" &&

1015 if (this.options.merge)	1318 ruleGroups.reduce((n, group) => n + group.length, 0) > 50000))

1016 {	1319 {

1017 return mergeRules(ruleGroups[index], this.options.exhaustiveMerge)	1320 return mergeRules(ruleGroups[index], this.options.merge == "all")

1018 .then(rules =>	1321 .then(rules =>

1019 {	1322 {

1020 ruleGroups[index] = rules;	1323 ruleGroups[index] = rules;

1021 return next();	1324 return next();

1022 });	1325 });

1023 }	1326 }

1024	1327

1025 return next();	1328 return next();

1026 }));	1329 });

1027 };	1330 };

LEFT	RIGHT