lib/abp2blocklist.js - Issue 29426594: Issue 3673 - Merge closely matching rules

Delta Between Two Patch Sets: lib/abp2blocklist.js

Issue 29426594: Issue 3673 - Merge closely matching rules (Closed) Base URL: https://hg.adblockplus.org/abp2blocklist

Left Patch Set: Move merge options to constructor and update comments Created May 8, 2017, 11:09 p.m.

Right Patch Set: Rebase Created July 28, 2017, 1:31 p.m.

Left:
Right:

Use n/p to move between diff chunks; N/P to move between comments.

Jump to:

Left: Side by side diff | Download
Right: Side by side diff | Download

LEFT	RIGHT
1 /*	1 /*

2 * This file is part of Adblock Plus <https://adblockplus.org/>,	2 * This file is part of Adblock Plus <https://adblockplus.org/>,

3 * Copyright (C) 2006-2017 eyeo GmbH	3 * Copyright (C) 2006-2017 eyeo GmbH

4 *	4 *

5 * Adblock Plus is free software: you can redistribute it and/or modify	5 * Adblock Plus is free software: you can redistribute it and/or modify

6 * it under the terms of the GNU General Public License version 3 as	6 * it under the terms of the GNU General Public License version 3 as

7 * published by the Free Software Foundation.	7 * published by the Free Software Foundation.

8 *	8 *

9 * Adblock Plus is distributed in the hope that it will be useful,	9 * Adblock Plus is distributed in the hope that it will be useful,

10 * but WITHOUT ANY WARRANTY; without even the implied warranty of	10 * but WITHOUT ANY WARRANTY; without even the implied warranty of

11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the	11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

12 * GNU General Public License for more details.	12 * GNU General Public License for more details.

13 *	13 *

14 * You should have received a copy of the GNU General Public License	14 * You should have received a copy of the GNU General Public License

15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.	15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.

16 */	16 */

17	17

18 /** @module abp2blocklist */	18 /** @module abp2blocklist */

19	19

20 "use strict";	20 "use strict";

21	21

22 let filterClasses = require("filterClasses");	22 let filterClasses = require("filterClasses");

23 let tldjs = require("tldjs");

24 let punycode = require("punycode");	23 let punycode = require("punycode");

25	24

26 const selectorLimit = 5000;	25 const selectorLimit = 5000;

27 const typeMap = filterClasses.RegExpFilter.typeMap;	26 const typeMap = filterClasses.RegExpFilter.typeMap;

28 const whitelistableRequestTypes = (typeMap.IMAGE	27

29 \| typeMap.STYLESHEET	28 const httpRequestTypes = typeMap.IMAGE \|

30 \| typeMap.SCRIPT	29 typeMap.STYLESHEET \|

31 \| typeMap.FONT	30 typeMap.SCRIPT \|

32 \| typeMap.MEDIA	31 typeMap.FONT \|

33 \| typeMap.POPUP	32 typeMap.MEDIA \|

34 \| typeMap.OBJECT	33 typeMap.POPUP \|

35 \| typeMap.OBJECT_SUBREQUEST	34 typeMap.OBJECT \|

36 \| typeMap.XMLHTTPREQUEST	35 typeMap.OBJECT_SUBREQUEST \|

37 \| typeMap.PING	36 typeMap.XMLHTTPREQUEST \|

38 \| typeMap.SUBDOCUMENT	37 typeMap.PING \|

39 \| typeMap.OTHER);	38 typeMap.SUBDOCUMENT \|

	39 typeMap.OTHER;

	40 const rawRequestTypes = typeMap.XMLHTTPREQUEST \|

	41 typeMap.WEBSOCKET \|

	42 typeMap.WEBRTC \|

	43 typeMap.OBJECT_SUBREQUEST \|

	44 typeMap.PING \|

	45 typeMap.OTHER;

	46 const whitelistableRequestTypes = httpRequestTypes \|

	47 typeMap.WEBSOCKET \|

	48 typeMap.WEBRTC;

	49

	50 function callLater(func)

	51 {

	52 return new Promise(resolve =>

	53 {

	54 let call = () => resolve(func());

	55

	56 // If this looks like Node.js, call process.nextTick, otherwise call

	57 // setTimeout.

	58 if (typeof process != "undefined")

	59 process.nextTick(call);

	60 else

	61 setTimeout(call, 0);

	62 });

	63 }

	64

	65 function async(callees, mapFunction)

	66 {

	67 if (!(Symbol.iterator in callees))

	68 callees = [callees];

	69

	70 let lastPause = Date.now();

	71 let index = 0;

	72

	73 let promise = Promise.resolve();

	74

	75 for (let next of callees)

	76 {

	77 let currentIndex = index;

	78

	79 promise = promise.then(() =>

	80 {

	81 if (mapFunction)

	82 next = mapFunction(next, currentIndex);

	83

	84 // If it has been 100ms or longer since the last call, take a pause. This

	85 // keeps the browser from freezing up.

	86 let now = Date.now();

	87 if (now - lastPause >= 100)

	88 {

	89 lastPause = now;

	90 return callLater(next);

	91 }

	92

	93 return next();

	94 });

	95

	96 index++;

	97 }

	98

	99 return promise;

	100 }

40	101

41 function parseDomains(domains, included, excluded)	102 function parseDomains(domains, included, excluded)

42 {	103 {

43 for (let domain in domains)	104 for (let domain in domains)

44 {	105 {

45 if (domain != "")	106 if (domain != "")

46 {	107 {

47 let enabled = domains[domain];	108 let enabled = domains[domain];

48 domain = punycode.toASCII(domain.toLowerCase());	109 domain = punycode.toASCII(domain.toLowerCase());

49	110

50 if (!enabled)	111 if (!enabled)

51 excluded.push(domain);	112 excluded.push(domain);

52 else if (!domains[""])	113 else if (!domains[""])

53 included.push(domain);	114 included.push(domain);

54 }	115 }

55 }	116 }

56 }	117 }

57	118

58 function escapeRegExp(s)	119 function escapeRegExp(s)

59 {	120 {

60 return s.replace(/[.*+?^${}()\|[\]\\]/g, "\\$&");	121 return s.replace(/[.*+?^${}()\|[\]\\]/g, "\\$&");

61 }	122 }

62	123

63 function matchDomain(domain)	124 function matchDomain(domain)

64 {	125 {

	126 if (!domain)

	127 return "^https?://";

	128

65 return "^https?://([^/:]*\\.)?" + escapeRegExp(domain).toLowerCase() + "[/:]";	129 return "^https?://([^/:]*\\.)?" + escapeRegExp(domain).toLowerCase() + "[/:]";

	130 }

	131

	132 function getURLSchemes(contentType)

	133 {

	134 // If the given content type includes all supported URL schemes, simply

	135 // return a single generic URL scheme pattern. This minimizes the size of the

	136 // generated rule set. The downside to this is that it will also match

	137 // schemes that we do not want to match (e.g. "ftp://"), but this can be

	138 // mitigated by adding exceptions for those schemes.

	139 if (contentType & typeMap.WEBSOCKET && contentType & typeMap.WEBRTC &&

	140 contentType & httpRequestTypes)

	141 return ["[^:]+:(//)?"];

	142

	143 let urlSchemes = [];

	144

	145 if (contentType & typeMap.WEBSOCKET)

	146 urlSchemes.push("wss?://");

	147

	148 if (contentType & typeMap.WEBRTC)

	149 urlSchemes.push("stuns?:", "turns?:");

	150

	151 if (contentType & httpRequestTypes)

	152 urlSchemes.push("https?://");

	153

	154 return urlSchemes;

	155 }

	156

	157 function findSubdomainsInList(domain, list)

	158 {

	159 let subdomains = [];

	160 let suffixLength = domain.length + 1;

	161

	162 for (let name of list)

	163 {

	164 if (name.length > suffixLength && name.slice(-suffixLength) == "." + domain)

	165 subdomains.push(name.slice(0, -suffixLength));

	166 }

	167

	168 return subdomains;

	169 }

	170

	171 function extractFilterDomains(filters)

	172 {

	173 let domains = new Set();

	174 for (let filter of filters)

	175 {

	176 let parsed = parseFilterRegexpSource(filter.regexpSource);

	177 if (parsed.justHostname)

	178 domains.add(parsed.hostname);

	179 }

	180 return domains;

66 }	181 }

67	182

68 function convertElemHideFilter(filter, elemhideSelectorExceptions)	183 function convertElemHideFilter(filter, elemhideSelectorExceptions)

69 {	184 {

70 let included = [];	185 let included = [];

71 let excluded = [];	186 let excluded = [];

72 let rules = [];

73	187

74 parseDomains(filter.domains, included, excluded);	188 parseDomains(filter.domains, included, excluded);

75	189

76 if (excluded.length == 0 && !(filter.selector in elemhideSelectorExceptions))	190 if (excluded.length == 0 && !(filter.selector in elemhideSelectorExceptions))

77 return {matchDomains: included.map(matchDomain), selector: filter.selector};	191 return {matchDomains: included, selector: filter.selector};

78 }	192 }

79	193

80 /**	194 /**

81 * Parse the given filter "regexpSource" string. Producing a regular expression,	195 * Parse the given filter "regexpSource" string. Producing a regular expression,

82 * extracting the hostname (if any), deciding if the regular expression is safe	196 * extracting the hostname (if any), deciding if the regular expression is safe

83 * to be converted + matched as lower case and noting if the source contains	197 * to be converted + matched as lower case and noting if the source contains

84 * anything after the hostname.)	198 * anything after the hostname.)

85 *	199 *

86 * @param {string} text regexpSource property of a filter	200 * @param {string} text regexpSource property of a filter

	201 * @param {string} urlScheme The URL scheme to use in the regular expression

87 * @returns {object} An object containing a regular expression string, a bool	202 * @returns {object} An object containing a regular expression string, a bool

88 * indicating if the filter can be safely matched as lower	203 * indicating if the filter can be safely matched as lower

89 * case, a hostname string (or undefined) and a bool	204 * case, a hostname string (or undefined) and a bool

90 * indicating if the source only contains a hostname or not:	205 * indicating if the source only contains a hostname or not:

91 * {regexp: "...",	206 * {regexp: "...",

92 * canSafelyMatchAsLowercase: true/false,	207 * canSafelyMatchAsLowercase: true/false,

93 * hostname: "...",	208 * hostname: "...",

94 * justHostname: true/false}	209 * justHostname: true/false}

95 */	210 */

96 function parseFilterRegexpSource(text)	211 function parseFilterRegexpSource(text, urlScheme)

97 {	212 {

98 let regexp = [];	213 let regexp = [];

99 let lastIndex = text.length - 1;	214

	215 // Convert the text into an array of Unicode characters.

	216 //

	217 // In the case of surrogate pairs (the smiley emoji, for example), one

	218 // Unicode code point is represented by two JavaScript characters together.

	219 // We want to iterate over Unicode code points rather than JavaScript

	220 // characters.

	221 let characters = Array.from(text);

	222

	223 let lastIndex = characters.length - 1;

100 let hostname;	224 let hostname;

101 let hostnameStart = null;	225 let hostnameStart = null;

102 let hostnameFinished = false;	226 let hostnameFinished = false;

103 let justHostname = false;	227 let justHostname = false;

104 let canSafelyMatchAsLowercase = false;	228 let canSafelyMatchAsLowercase = false;

105	229

106 for (let i = 0; i < text.length; i++)	230 if (!urlScheme)

107 {	231 urlScheme = getURLSchemes()[0];

108 let c = text[i];	232

	233 for (let i = 0; i < characters.length; i++)

	234 {

	235 let c = characters[i];

109	236

110 if (hostnameFinished)	237 if (hostnameFinished)

111 justHostname = false;	238 justHostname = false;

112	239

113 // If we're currently inside the hostname we have to be careful not to	240 // If we're currently inside the hostname we have to be careful not to

114 // escape any characters until after we have converted it to punycode.	241 // escape any characters until after we have converted it to punycode.

115 if (hostnameStart != null && !hostnameFinished)	242 if (hostnameStart != null && !hostnameFinished)

116 {	243 {

117 let endingChar = (c == "*" \|\| c == "^" \|\|	244 let endingChar = (c == "*" \|\| c == "^" \|\|

118 c == "?" \|\| c == "/" \|\| c == "\|");	245 c == "?" \|\| c == "/" \|\| c == "\|");

119 if (!endingChar && i != lastIndex)	246 if (!endingChar && i != lastIndex)

120 continue;	247 continue;

121	248

122 hostname = punycode.toASCII(	249 hostname = punycode.toASCII(

123 text.substring(hostnameStart, endingChar ? i : i + 1)	250 characters.slice(hostnameStart, endingChar ? i : i + 1).join("")

	251 .toLowerCase()

124 );	252 );

125 hostnameFinished = justHostname = true;	253 hostnameFinished = justHostname = true;

126 regexp.push(escapeRegExp(hostname));	254 regexp.push(escapeRegExp(hostname));

127 if (!endingChar)	255 if (!endingChar)

128 break;	256 break;

129 }	257 }

130	258

131 switch (c)	259 switch (c)

132 {	260 {

133 case "*":	261 case "*":

134 if (regexp.length > 0 && i < lastIndex && text[i + 1] != "*")	262 if (regexp.length > 0 && i < lastIndex && characters[i + 1] != "*")

135 regexp.push(".*");	263 regexp.push(".*");

136 break;	264 break;

137 case "^":	265 case "^":

138 if (i < lastIndex)	266 let alphabet = "a-z";

139 regexp.push(".");	267 // If justHostname is true and we've encountered a "^", it means we're

	268 // still in the hostname part of the URL. Since hostnames are always

	269 // lower case (Punycode), there's no need to include "A-Z" in the

	270 // pattern. Further, subsequent code may lower-case the entire regular

	271 // expression (if the URL contains only the hostname part), leaving us

	272 // with "a-za-z", which would be redundant.

	273 if (!justHostname)

	274 alphabet = "A-Z" + alphabet;

	275 let digits = "0-9";

	276 // Note that the "-" must appear first here in order to retain its

	277 // literal meaning within the brackets.

	278 let specialCharacters = "-_.%";

	279 let separator = "[^" + specialCharacters + alphabet + digits + "]";

	280 if (i == 0)

	281 regexp.push("^" + urlScheme + "(.*" + separator + ")?");

	282 else if (i == lastIndex)

	283 regexp.push("(" + separator + ".*)?$");

	284 else

	285 regexp.push(separator);

140 break;	286 break;

141 case "\|":	287 case "\|":

142 if (i == 0)	288 if (i == 0)

143 {	289 {

144 regexp.push("^");	290 regexp.push("^");

145 break;	291 break;

146 }	292 }

147 if (i == lastIndex)	293 if (i == lastIndex)

148 {	294 {

149 regexp.push("$");	295 regexp.push("$");

150 break;	296 break;

151 }	297 }

152 if (i == 1 && text[0] == "\|")	298 if (i == 1 && characters[0] == "\|")

153 {	299 {

154 hostnameStart = i + 1;	300 hostnameStart = i + 1;

155 canSafelyMatchAsLowercase = true;	301 canSafelyMatchAsLowercase = true;

156 regexp.push("https?://([^/]+\\.)?");	302 regexp.push(urlScheme + "([^/]+\\.)?");

157 break;	303 break;

158 }	304 }

159 regexp.push("\\\|");	305 regexp.push("\\\|");

160 break;	306 break;

161 case "/":	307 case "/":

162 if (!hostnameFinished &&	308 if (!hostnameFinished &&

163 text.charAt(i-2) == ":" && text.charAt(i-1) == "/")	309 characters[i - 2] == ":" && characters[i - 1] == "/")

164 {	310 {

165 hostnameStart = i + 1;	311 hostnameStart = i + 1;

166 canSafelyMatchAsLowercase = true;	312 canSafelyMatchAsLowercase = true;

167 }	313 }

168 regexp.push("/");	314 regexp.push("/");

169 break;	315 break;

170 case ".": case "+": case "$": case "?":	316 case ".": case "+": case "$": case "?":

171 case "{": case "}": case "(": case ")":	317 case "{": case "}": case "(": case ")":

172 case "[": case "]": case "\\":	318 case "[": case "]": case "\\":

173 regexp.push("\\", c);	319 regexp.push("\\", c);

174 break;	320 break;

175 default:	321 default:

176 if (hostnameFinished && (c >= "a" && c <= "z" \|\|	322 if (hostnameFinished && (c >= "a" && c <= "z" \|\|

177 c >= "A" && c <= "Z"))	323 c >= "A" && c <= "Z"))

178 canSafelyMatchAsLowercase = false;	324 canSafelyMatchAsLowercase = false;

179 regexp.push(c);	325 regexp.push(c == "%" ? c : encodeURI(c));

180 }	326 }

181 }	327 }

182	328

183 return {	329 return {

184 regexp: regexp.join(""),	330 regexp: regexp.join(""),

185 canSafelyMatchAsLowercase: canSafelyMatchAsLowercase,	331 canSafelyMatchAsLowercase: canSafelyMatchAsLowercase,

186 hostname: hostname,	332 hostname: hostname,

187 justHostname: justHostname	333 justHostname: justHostname

188 };	334 };

189 }	335 }

190	336

191 function getResourceTypes(filter)	337 function getResourceTypes(contentType)

192 {	338 {

193 let types = [];	339 let types = [];

194	340

195 if (filter.contentType & typeMap.IMAGE)	341 if (contentType & typeMap.IMAGE)

196 types.push("image");	342 types.push("image");

197 if (filter.contentType & typeMap.STYLESHEET)	343 if (contentType & typeMap.STYLESHEET)

198 types.push("style-sheet");	344 types.push("style-sheet");

199 if (filter.contentType & typeMap.SCRIPT)	345 if (contentType & typeMap.SCRIPT)

200 types.push("script");	346 types.push("script");

201 if (filter.contentType & typeMap.FONT)	347 if (contentType & typeMap.FONT)

202 types.push("font");	348 types.push("font");

203 if (filter.contentType & (typeMap.MEDIA \| typeMap.OBJECT))	349 if (contentType & (typeMap.MEDIA \| typeMap.OBJECT))

204 types.push("media");	350 types.push("media");

205 if (filter.contentType & typeMap.POPUP)	351 if (contentType & typeMap.POPUP)

206 types.push("popup");	352 types.push("popup");

207 if (filter.contentType & (typeMap.XMLHTTPREQUEST \|	353 if (contentType & rawRequestTypes)

208 typeMap.OBJECT_SUBREQUEST \|

209 typeMap.PING \|

210 typeMap.OTHER))

211 types.push("raw");	354 types.push("raw");

212 if (filter.contentType & typeMap.SUBDOCUMENT)	355 if (contentType & typeMap.SUBDOCUMENT)

213 types.push("document");	356 types.push("document");

214	357

215 return types;	358 return types;

216 }	359 }

217	360

218 function addDomainPrefix(domains)	361 function makeRuleCopies(trigger, action, urlSchemes)

219 {	362 {

220 let result = [];	363 let copies = [];

221	364

222 for (let domain of domains)	365 // Always make a deep copy of the rule, since rules may have to be

223 {	366 // manipulated individually at a later stage.

224 result.push(domain);	367 let stringifiedTrigger = JSON.stringify(trigger);

225	368

226 if (tldjs.getDomain(domain) == domain)	369 let filterPattern = trigger["url-filter"].substring(1);

227 result.push("www." + domain);	370 let startIndex = 0;

228 }	371

229	372 // If the URL filter already begins with the first URL scheme pattern, skip

230 return result;	373 // it.

231 }	374 if (trigger["url-filter"].startsWith("^" + urlSchemes[0]))

232	375 {

233 function convertFilterAddRules(rules, filter, action, withResourceTypes)	376 filterPattern = filterPattern.substring(urlSchemes[0].length);

234 {	377 startIndex = 1;

235 let parsed = parseFilterRegexpSource(filter.regexpSource);	378 }

	379 else

	380 {

	381 filterPattern = ".*" + filterPattern;

	382 }

	383

	384 for (let i = startIndex; i < urlSchemes.length; i++)

	385 {

	386 let copyTrigger = Object.assign(JSON.parse(stringifiedTrigger), {

	387 "url-filter": "^" + urlSchemes[i] + filterPattern

	388 });

	389 copies.push({trigger: copyTrigger, action});

	390 }

	391

	392 return copies;

	393 }

	394

	395 function excludeTopURLFromTrigger(trigger)

	396 {

	397 trigger["unless-top-url"] = [trigger["url-filter"]];

	398 if (trigger["url-filter-is-case-sensitive"])

	399 trigger["top-url-filter-is-case-sensitive"] = true;

	400 }

	401

	402 function convertFilterAddRules(rules, filter, action, withResourceTypes,

	403 exceptionDomains, contentType)

	404 {

	405 if (!contentType)

	406 contentType = filter.contentType;

	407

	408 // If WebSocket or WebRTC are given along with other options but not

	409 // including all three of WebSocket, WebRTC, and at least one HTTP raw type,

	410 // we must generate multiple rules. For example, for the filter

	411 // "foo$websocket,image", we must generate one rule with "^wss?://" and "raw"

	412 // and another rule with "^https?://" and "image". If we merge the two, we

	413 // end up blocking requests of all HTTP raw types (e.g. XMLHttpRequest)

	414 // inadvertently.

	415 if ((contentType & typeMap.WEBSOCKET && contentType != typeMap.WEBSOCKET &&

	416 !(contentType & typeMap.WEBRTC &&

	417 contentType & rawRequestTypes & httpRequestTypes)) \|\|

	418 (contentType & typeMap.WEBRTC && contentType != typeMap.WEBRTC &&

	419 !(contentType & typeMap.WEBSOCKET &&

	420 contentType & rawRequestTypes & httpRequestTypes)))

	421 {

	422 if (contentType & typeMap.WEBSOCKET)

	423 {

	424 convertFilterAddRules(rules, filter, action, withResourceTypes,

	425 exceptionDomains, typeMap.WEBSOCKET);

	426 }

	427

	428 if (contentType & typeMap.WEBRTC)

	429 {

	430 convertFilterAddRules(rules, filter, action, withResourceTypes,

	431 exceptionDomains, typeMap.WEBRTC);

	432 }

	433

	434 contentType &= ~(typeMap.WEBSOCKET \| typeMap.WEBRTC);

	435

	436 if (!contentType)

	437 return;

	438 }

	439

	440 let urlSchemes = getURLSchemes(contentType);

	441 let parsed = parseFilterRegexpSource(filter.regexpSource, urlSchemes[0]);

236	442

237 // For the special case of $document whitelisting filters with just a domain	443 // For the special case of $document whitelisting filters with just a domain

238 // we can generate an equivalent blocking rule exception using if-domain.	444 // we can generate an equivalent blocking rule exception using if-domain.

239 if (filter instanceof filterClasses.WhitelistFilter &&	445 if (filter instanceof filterClasses.WhitelistFilter &&

240 filter.contentType & typeMap.DOCUMENT &&	446 contentType & typeMap.DOCUMENT &&

241 parsed.justHostname)	447 parsed.justHostname)

242 {	448 {

243 rules.push({	449 rules.push({

244 trigger: {	450 trigger: {

245 "url-filter": ".*",	451 "url-filter": ".*",

246 "if-domain": addDomainPrefix([parsed.hostname])	452 "if-domain": ["*" + parsed.hostname]

247 },	453 },

248 action: {type: "ignore-previous-rules"}	454 action: {type: "ignore-previous-rules"}

249 });	455 });

250 // If the filter contains other supported options we'll need to generate	456 // If the filter contains other supported options we'll need to generate

251 // further rules for it, but if not we can simply return now.	457 // further rules for it, but if not we can simply return now.

252 if (!(filter.contentType & whitelistableRequestTypes))	458 if (!(contentType & whitelistableRequestTypes))

253 return;	459 return;

254 }	460 }

255	461

256 let trigger = {"url-filter": parsed.regexp};	462 let trigger = {"url-filter": parsed.regexp};

257	463

258 // Limit rules to HTTP(S) URLs	464 // If the URL filter begins with one of the URL schemes for this content

259 if (!/^(\^\|http)/i.test(trigger["url-filter"]))	465 // type, we generate additional rules for all the URL scheme patterns;

260 trigger["url-filter"] = "^https?://.*" + trigger["url-filter"];	466 // otherwise, if the start of the URL filter literally matches the first URL

	467 // scheme pattern, we just generate additional rules for the remaining URL

	468 // scheme patterns.

	469 //

	470 // For example, "stun:foo$webrtc" will give us "stun:foo", then we add a "^"

	471 // in front of this and generate two additional rules for

	472 // "^stuns?:.stun:foo" and "^turns?:.stun:foo". On the other hand,

	473 // "\|\|foo$webrtc" will give us "^stuns?:([^/]+\\.)?foo", so we just generate

	474 // "^turns?:([^/]+\\.)?foo" in addition.

	475 //

	476 // Note that the filter can be already anchored to the beginning

	477 // (e.g. "\|stun:foo$webrtc"), in which case we do not generate any additional

	478 // rules.

	479 let needAltRules = trigger["url-filter"][0] != "^" \|\|

	480 trigger["url-filter"].startsWith("^" + urlSchemes[0]);

	481

	482 if (trigger["url-filter"][0] != "^")

	483 {

	484 if (!urlSchemes.some(scheme => new RegExp("^" + scheme)

	485 .test(trigger["url-filter"])))

	486 {

	487 trigger["url-filter"] = urlSchemes[0] + ".*" + trigger["url-filter"];

	488 }

	489

	490 trigger["url-filter"] = "^" + trigger["url-filter"];

	491 }

261	492

262 // For rules containing only a hostname we know that we're matching against	493 // For rules containing only a hostname we know that we're matching against

263 // a lowercase string unless the matchCase option was passed.	494 // a lowercase string unless the matchCase option was passed.

264 if (parsed.canSafelyMatchAsLowercase && !filter.matchCase)	495 if (parsed.canSafelyMatchAsLowercase && !filter.matchCase)

265 trigger["url-filter"] = trigger["url-filter"].toLowerCase();	496 trigger["url-filter"] = trigger["url-filter"].toLowerCase();

266	497

267 if (parsed.canSafelyMatchAsLowercase \|\| filter.matchCase)	498 if (parsed.canSafelyMatchAsLowercase \|\| filter.matchCase)

268 trigger["url-filter-is-case-sensitive"] = true;	499 trigger["url-filter-is-case-sensitive"] = true;

269	500

270 let included = [];	501 let included = [];

271 let excluded = [];	502 let excluded = [];

272	503

273 parseDomains(filter.domains, included, excluded);	504 parseDomains(filter.domains, included, excluded);

274	505

	506 if (exceptionDomains)

	507 excluded = excluded.concat(exceptionDomains);

	508

275 if (withResourceTypes)	509 if (withResourceTypes)

276 {	510 {

277 trigger["resource-type"] = getResourceTypes(filter);	511 let resourceTypes = getResourceTypes(contentType);

278	512

279 if (trigger["resource-type"].length == 0)	513 // Content blocker rules can't differentiate between sub-document requests

	514 // (iframes) and top-level document requests. To avoid too many false

	515 // positives, we prevent rules with no hostname part from blocking document

	516 // requests.

	517 //

	518 // Once Safari 11 becomes our minimum supported version, we could change

	519 // our approach here to use the new "unless-top-url" property instead.

	520 if (filter instanceof filterClasses.BlockingFilter && !parsed.hostname)

	521 resourceTypes = resourceTypes.filter(type => type != "document");

	522

	523 if (resourceTypes.length == 0)

280 return;	524 return;

	525

	526 trigger["resource-type"] = resourceTypes;

281 }	527 }

282	528

283 if (filter.thirdParty != null)	529 if (filter.thirdParty != null)

284 trigger["load-type"] = [filter.thirdParty ? "third-party" : "first-party"];	530 trigger["load-type"] = [filter.thirdParty ? "third-party" : "first-party"];

285	531

	532 let addTopLevelException = false;

	533

286 if (included.length > 0)	534 if (included.length > 0)

287 trigger["if-domain"] = addDomainPrefix(included);	535 {

	536 trigger["if-domain"] = [];

	537

	538 for (let name of included)

	539 {

	540 // If this is a blocking filter or an element hiding filter, add the

	541 // subdomain wildcard only if no subdomains have been excluded.

	542 let notSubdomains = null;

	543 if ((filter instanceof filterClasses.BlockingFilter \|\|

	544 filter instanceof filterClasses.ElemHideFilter) &&

	545 (notSubdomains = findSubdomainsInList(name, excluded)).length > 0)

	546 {

	547 trigger["if-domain"].push(name);

	548

	549 // Add the "www" prefix but only if it hasn't been excluded.

	550 if (!notSubdomains.includes("www"))

	551 trigger["if-domain"].push("www." + name);

	552 }

	553 else

	554 {

	555 trigger["if-domain"].push("*" + name);

	556 }

	557 }

	558 }

288 else if (excluded.length > 0)	559 else if (excluded.length > 0)

289 trigger["unless-domain"] = addDomainPrefix(excluded);	560 {

	561 trigger["unless-domain"] = excluded.map(name => "*" + name);

	562 }

	563 else if (filter instanceof filterClasses.BlockingFilter &&

	564 filter.contentType & typeMap.SUBDOCUMENT && parsed.hostname)

	565 {

	566 // Rules with a hostname part are still allowed to block document requests,

	567 // but we add an exception for top-level documents.

	568 //

	569 // Note that we can only do this if there's no "unless-domain" property for

	570 // now. This also only works in Safari 11 onwards, while older versions

	571 // simply ignore this property. Once Safari 11 becomes our minimum

	572 // supported version, we can merge "unless-domain" into "unless-top-url".

	573 addTopLevelException = true;

	574 excludeTopURLFromTrigger(trigger);

	575 }

290	576

291 rules.push({trigger: trigger, action: {type: action}});	577 rules.push({trigger: trigger, action: {type: action}});

292 }	578

293	579 if (needAltRules)

294 function hasNonASCI(obj)	580 {

295 {	581 // Generate additional rules for any alternative URL schemes.

296 if (typeof obj == "string")	582 for (let altRule of makeRuleCopies(trigger, {type: action}, urlSchemes))

297 {	583 {

298 if (/[^\x00-\x7F]/.test(obj))	584 if (addTopLevelException)

299 return true;	585 excludeTopURLFromTrigger(altRule.trigger);

300 }	586

301	587 rules.push(altRule);

302 if (typeof obj == "object")	588 }

303 {	589 }

304 if (obj instanceof Array)

305 for (let item of obj)

306 if (hasNonASCI(item))

307 return true;

308

309 let names = Object.getOwnPropertyNames(obj);

310 for (let name of names)

311 if (hasNonASCI(obj[name]))

312 return true;

313 }

314

315 return false;

316 }	590 }

317	591

318 function convertIDSelectorsToAttributeSelectors(selector)	592 function convertIDSelectorsToAttributeSelectors(selector)

319 {	593 {

320 // First we figure out where all the IDs are	594 // First we figure out where all the IDs are

321 let sep = "";	595 let sep = "";

322 let start = null;	596 let start = null;

323 let positions = [];	597 let positions = [];

324 for (let i = 0; i < selector.length; i++)	598 for (let i = 0; i < selector.length; i++)

325 {	599 {

(...skipping 33 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
359 {	633 {

360 newSelector.push(selector.substring(i, pos.start));	634 newSelector.push(selector.substring(i, pos.start));

361 newSelector.push('[id=', selector.substring(pos.start + 1, pos.end), ']');	635 newSelector.push('[id=', selector.substring(pos.start + 1, pos.end), ']');

362 i = pos.end;	636 i = pos.end;

363 }	637 }

364 newSelector.push(selector.substring(i));	638 newSelector.push(selector.substring(i));

365	639

366 return newSelector.join("");	640 return newSelector.join("");

367 }	641 }

368	642

	643 function addCSSRules(rules, selectors, domain, exceptionDomains)

	644 {

	645 let unlessDomain = exceptionDomains.size > 0 ? [] : null;

	646

	647 exceptionDomains.forEach(name =>

	648 {

	649 // For domain-specific filters, include the exception domains only if

	650 // they're subdomains of the given domain.

	651 if (!domain \|\| name.substr(-domain.length - 1) == "." + domain)

	652 unlessDomain.push("*" + name);

	653 });

	654

	655 while (selectors.length)

	656 {

	657 let selector = selectors.splice(0, selectorLimit).join(", ");

	658

	659 // As of Safari 9.0 element IDs are matched as lowercase. We work around

	660 // this by converting to the attribute format [id="elementID"]

	661 selector = convertIDSelectorsToAttributeSelectors(selector);

	662

	663 let rule = {

	664 trigger: {"url-filter": matchDomain(domain),

	665 "url-filter-is-case-sensitive": true},

	666 action: {type: "css-display-none",

	667 selector: selector}

	668 };

	669

	670 if (unlessDomain)

	671 rule.trigger["unless-domain"] = unlessDomain;

	672

	673 rules.push(rule);

	674 }

	675 }

	676

369 /**	677 /**

370 * Check if two strings are a close match	678 * Check if two strings are a close match

371 *	679 *

372 * This function returns an edit operation, one of "substitute", "delete", and	680 * This function returns an edit operation, one of "substitute", "delete", and

373 * "insert", along with an index in the source string where the edit must occur	681 * "insert", along with an index in the source string where the edit must occur

374 * in order to arrive at the target string. If the strings are not a close	682 * in order to arrive at the target string. If the strings are not a close

375 * match, it returns null.	683 * match, it returns null.

376 *	684 *

377 * Two strings are considered to be a close match if they are one edit	685 * Two strings are considered to be a close match if they are one edit

378 * operation apart.	686 * operation apart.

(...skipping 38 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
417 // calculation.	725 // calculation.

418 if (diff < 0)	726 if (diff < 0)

419 {	727 {

420 let tmp = s;	728 let tmp = s;

421 s = t;	729 s = t;

422 t = tmp;	730 t = tmp;

423 }	731 }

424	732

425 let edit = null;	733 let edit = null;

426	734

427 let i = 0, j = 0;	735 let i = 0;

	736 let j = 0;

428	737

429 // Start from the beginning and keep going until we hit a character that	738 // Start from the beginning and keep going until we hit a character that

430 // doesn't match.	739 // doesn't match.

431 for (; i < s.length; i++)	740 for (; i < s.length; i++)

432 {	741 {

433 if (s[i] != t[i])	742 if (s[i] != t[i])

434 break;	743 break;

435 }	744 }

436	745

437 // Now do exactly the same from the end, but also stop if we reach the	746 // Now do exactly the same from the end, but also stop if we reach the

(...skipping 43 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
481 {	790 {

482 edit = {type: "insert", index: i};	791 edit = {type: "insert", index: i};

483	792

484 if (diff < -1)	793 if (diff < -1)

485 edit.endIndex = s.length - j;	794 edit.endIndex = s.length - j;

486 }	795 }

487	796

488 return edit;	797 return edit;

489 }	798 }

490	799

491 function eliminateRedundantRulesByURLFilter(rulesInfo)	800 function eliminateRedundantRulesByURLFilter(rulesInfo, exhaustive)

492 {	801 {

493 for (let i = 0; i < rulesInfo.length; i++)	802 const heuristicRange = 1000;

	803

	804 let ol = rulesInfo.length;

	805

	806 // Throw out obviously redundant rules.

	807 return async(rulesInfo, (ruleInfo, index) => () =>

494 {	808 {

495 // If this rule is already marked as redundant, don't bother comparing it	809 // If this rule is already marked as redundant, don't bother comparing it

496 // with other rules.	810 // with other rules.

497 if (rulesInfo[i].redundant)	811 if (rulesInfo[index].redundant)

498 continue;	812 return;

499	813

500 for (let j = i + 1; j < rulesInfo.length; j++)	814 let limit = exhaustive ? rulesInfo.length :

	815 Math.min(index + heuristicRange, rulesInfo.length);

	816

	817 for (let i = index, j = i + 1; j < limit; j++)

501 {	818 {

502 if (rulesInfo[j].redundant)	819 if (rulesInfo[j].redundant)

503 continue;	820 continue;

504	821

505 let source = rulesInfo[i].rule.trigger["url-filter"];	822 let source = rulesInfo[i].rule.trigger["url-filter"];

506 let target = rulesInfo[j].rule.trigger["url-filter"];	823 let target = rulesInfo[j].rule.trigger["url-filter"];

507	824

508 if (source.length >= target.length)	825 if (source.length >= target.length)

509 {	826 {

510 // If one URL filter is a substring of the other starting at the	827 // If one URL filter is a substring of the other starting at the

511 // beginning, the other one is clearly redundant.	828 // beginning, the other one is clearly redundant.

512 if (source.substring(0, target.length) == target)	829 if (source.substring(0, target.length) == target)

513 {	830 {

514 rulesInfo[i].redundant = true;	831 rulesInfo[i].redundant = true;

515 break;	832 break;

516 }	833 }

517 }	834 }

518 else if (target.substring(0, source.length) == source)	835 else if (target.substring(0, source.length) == source)

519 {	836 {

520 rulesInfo[j].redundant = true;	837 rulesInfo[j].redundant = true;

521 }	838 }

522 }	839 }

523 }	840 })

524	841 .then(() => rulesInfo.filter(ruleInfo => !ruleInfo.redundant));

525 return rulesInfo.filter(ruleInfo => !ruleInfo.redundant);	842 }

526 }	843

527	844 function findMatchesForRuleByURLFilter(rulesInfo, index, exhaustive)

528 function mergeRulesByURLFilter(rulesInfo, exhaustive)

529 {	845 {

530 // Closely matching rules are likely to be within a certain range. We only	846 // Closely matching rules are likely to be within a certain range. We only

531 // look for matches within this range by default. If we increase this value,	847 // look for matches within this range by default. If we increase this value,

532 // it can give us more matches and a smaller resulting rule set, but possibly	848 // it can give us more matches and a smaller resulting rule set, but possibly

533 // at a significant performance cost.	849 // at a significant performance cost.

534 //	850 //

535 // If the exhaustive option is true, we simply ignore this value and look for	851 // If the exhaustive option is true, we simply ignore this value and look for

536 // matches throughout the rule set.	852 // matches throughout the rule set.

537 const heuristicRange = 10;	853 const heuristicRange = 1000;

538	854

539 if (exhaustive)	855 let limit = exhaustive ? rulesInfo.length :

540 {	856 Math.min(index + heuristicRange, rulesInfo.length);

541 // Throw out obviously redundant rules.	857

542 rulesInfo = eliminateRedundantRulesByURLFilter(rulesInfo);	858 for (let i = index, j = i + 1; j < limit; j++)

543 }	859 {

544	860 let source = rulesInfo[i].rule.trigger["url-filter"];

545 if (rulesInfo.length <= 1)	861 let target = rulesInfo[j].rule.trigger["url-filter"];

546 return;	862

547	863 let edit = closeMatch(source, target);

548 for (let i = 0; i < rulesInfo.length; i++)	864

549 {	865 if (edit)

550 let limit = exhaustive ? rulesInfo.length :	866 {

551 Math.min(i + heuristicRange, rulesInfo.length);	867 let urlFilter, ruleInfo, match = {edit};

552	868

553 for (let j = i + 1; j < limit; j++)	869 if (edit.type == "insert")

554 {	870 {

555 let source = rulesInfo[i].rule.trigger["url-filter"];	871 // Convert the insertion into a deletion and stick it on the target

556 let target = rulesInfo[j].rule.trigger["url-filter"];	872 // rule instead. We can only group deletions and substitutions;

557	873 // therefore insertions must be treated as deletions on the target

558 let edit = closeMatch(source, target);	874 // rule.

559	875 urlFilter = target;

560 if (edit)	876 ruleInfo = rulesInfo[j];

561 {	877 match.index = i;

562 let urlFilter, ruleInfo, match = {edit};	878 edit.type = "delete";

563	879 }

564 if (edit.type == "insert")	880 else

	881 {

	882 urlFilter = source;

	883 ruleInfo = rulesInfo[i];

	884 match.index = j;

	885 }

	886

	887 // If the edit has an end index, it represents a multiple character

	888 // edit.

	889 let multiEdit = !!edit.endIndex;

	890

	891 if (multiEdit)

	892 {

	893 // We only care about a single multiple character edit because the

	894 // number of characters for such a match doesn't matter, we can

	895 // only merge with one other rule.

	896 if (!ruleInfo.multiEditMatch)

	897 ruleInfo.multiEditMatch = match;

	898 }

	899 else

	900 {

	901 // For single character edits, multiple rules can be merged into

	902 // one. e.g. "ad", "ads", and "adv" can be merged into "ad[sv]?".

	903 if (!ruleInfo.matches)

	904 ruleInfo.matches = new Array(urlFilter.length);

	905

	906 // Matches at a particular index. For example, for a source string

	907 // "ads", both target strings "ad" (deletion) and "adv"

	908 // (substitution) match at index 2, hence they are grouped together

	909 // to possibly be merged later into "ad[sv]?".

	910 let matchesForIndex = ruleInfo.matches[edit.index];

	911

	912 if (matchesForIndex)

565 {	913 {

566 // Convert the insertion into a deletion and stick it on the target	914 matchesForIndex.push(match);

567 // rule instead. We can only group deletions and substitutions;

568 // therefore insertions must be treated as deletions on the target

569 // rule.

570 urlFilter = target;

571 ruleInfo = rulesInfo[j];

572 match.index = i;

573 edit.type = "delete";

574 }	915 }

575 else	916 else

576 {	917 {

577 urlFilter = source;	918 matchesForIndex = [match];

578 ruleInfo = rulesInfo[i];	919 ruleInfo.matches[edit.index] = matchesForIndex;

579 match.index = j;

580 }	920 }

581	921

582 // If the edit has an end index, it represents a multiple character	922 // Keep track of the best set of matches. We later sort by this to

583 // edit.	923 // get best results.

584 let multiEdit = !!edit.endIndex;	924 if (!ruleInfo.bestMatches \|\|

585	925 matchesForIndex.length > ruleInfo.bestMatches.length)

586 if (multiEdit)	926 ruleInfo.bestMatches = matchesForIndex;

587 {

588 // We only care about a single multiple character edit because the

589 // number of characters for such a match doesn't matter, we can

590 // only merge with one other rule.

591 if (!ruleInfo.multiEditMatch)

592 ruleInfo.multiEditMatch = match;

593 }

594 else

595 {

596 // For single character edits, multiple rules can be merged into

597 // one. e.g. "ad", "ads", and "adv" can be merged into "ad[sv]?".

598 if (!ruleInfo.matches)

599 ruleInfo.matches = new Array(urlFilter.length);

600

601 // Matches at a particular index. For example, for a source string

602 // "ads", both target strings "ad" (deletion) and "adv"

603 // (substitution) match at index 2, hence they are grouped together

604 // to possibly be merged later into "ad[sv]?".

605 let matchesForIndex = ruleInfo.matches[edit.index];

606

607 if (matchesForIndex)

608 {

609 matchesForIndex.push(match);

610 }

611 else

612 {

613 matchesForIndex = [match];

614 ruleInfo.matches[edit.index] = matchesForIndex;

615 }

616

617 // Keep track of the best set of matches. We later sort by this to

618 // get best results.

619 if (!ruleInfo.bestMatches \|\|

620 matchesForIndex.length > ruleInfo.bestMatches.length)

621 ruleInfo.bestMatches = matchesForIndex;

622 }

623 }	927 }

624 }	928 }

625 }	929 }

626	930 }

	931

	932 function mergeCandidateRulesByURLFilter(rulesInfo)

	933 {

627 // Filter out rules that have no matches at all.	934 // Filter out rules that have no matches at all.

628 let candidateRulesInfo = rulesInfo.filter(ruleInfo =>	935 let candidateRulesInfo = rulesInfo.filter(ruleInfo =>

629 {	936 {

630 return ruleInfo.bestMatches \|\| ruleInfo.multiEditMatch	937 return ruleInfo.bestMatches \|\| ruleInfo.multiEditMatch

631 });	938 });

632	939

633 // For best results, we have to sort the candidates by the largest set of	940 // For best results, we have to sort the candidates by the largest set of

634 // matches.	941 // matches.

635 //	942 //

636 // For example, we want "ads", "bds", "adv", "bdv", "adx", and "bdx" to	943 // For example, we want "ads", "bds", "adv", "bdv", "adx", and "bdx" to

(...skipping 48 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
685 if (best.length > 0)	992 if (best.length > 0)

686 {	993 {

687 let urlFilter = rule.trigger["url-filter"];	994 let urlFilter = rule.trigger["url-filter"];

688	995

689 let editIndex = best[0].edit.index;	996 let editIndex = best[0].edit.index;

690	997

691 if (!multiEdit)	998 if (!multiEdit)

692 {	999 {

693 // Merge all the matching rules into this one.	1000 // Merge all the matching rules into this one.

694	1001

695 let characters = [];	1002 let characters = [urlFilter[editIndex]];

696 let quantifier = "";	1003 let quantifier = "";

697	1004

698 for (let match of best)	1005 for (let match of best)

699 {	1006 {

700 if (match.edit.type == "delete")	1007 if (match.edit.type == "delete")

701 {	1008 {

702 quantifier = "?";	1009 quantifier = "?";

703 }	1010 }

704 else	1011 else

705 {	1012 {

706 let character = rulesInfo[match.index].rule	1013 let character = rulesInfo[match.index].rule

707 .trigger["url-filter"][editIndex];	1014 .trigger["url-filter"][editIndex];

708 characters.push(character);	1015

	1016 // Insert any hyphen at the beginning so it gets interpreted as a

	1017 // literal hyphen.

	1018 if (character == "-")

	1019 characters.unshift(character);

	1020 else

	1021 characters.push(character);

709 }	1022 }

710	1023

711 // Mark the target rule as merged so other rules don't try to merge	1024 // Mark the target rule as merged so other rules don't try to merge

712 // it again.	1025 // it again.

713 rulesInfo[match.index].merged = true;	1026 rulesInfo[match.index].merged = true;

714 }	1027 }

715	1028

716 urlFilter = urlFilter.substring(0, editIndex + 1) + quantifier +	1029 urlFilter = urlFilter.substring(0, editIndex + 1) + quantifier +

717 urlFilter.substring(editIndex + 1);	1030 urlFilter.substring(editIndex + 1);

718 if (characters.length > 0)	1031 if (characters.length > 1)

719 {	1032 {

720 urlFilter = urlFilter.substring(0, editIndex) + "[" +	1033 urlFilter = urlFilter.substring(0, editIndex) + "[" +

721 urlFilter[editIndex] + characters.join("") + "]" +	1034 characters.join("") + "]" +

722 urlFilter.substring(editIndex + 1);	1035 urlFilter.substring(editIndex + 1);

723 }	1036 }

724 }	1037 }

725 else	1038 else

726 {	1039 {

727 let editEndIndex = best[0].edit.endIndex;	1040 let editEndIndex = best[0].edit.endIndex;

728	1041

729 // Mark the target rule as merged so other rules don't try to merge it	1042 // Mark the target rule as merged so other rules don't try to merge it

730 // again.	1043 // again.

731 rulesInfo[best[0].index].merged = true;	1044 rulesInfo[best[0].index].merged = true;

732	1045

733 urlFilter = urlFilter.substring(0, editIndex) + "(" +	1046 urlFilter = urlFilter.substring(0, editIndex) + "(" +

734 urlFilter.substring(editIndex, editEndIndex) + ")?" +	1047 urlFilter.substring(editIndex, editEndIndex) + ")?" +

735 urlFilter.substring(editEndIndex);	1048 urlFilter.substring(editEndIndex);

736 }	1049 }

737	1050

738 rule.trigger["url-filter"] = urlFilter;	1051 rule.trigger["url-filter"] = urlFilter;

739	1052

740 // Mark this rule as one that has had other rules merged into it.	1053 // Mark this rule as one that has had other rules merged into it.

741 ruleInfo.mergedInto = true;	1054 ruleInfo.mergedInto = true;

742 }	1055 }

743 }	1056 }

744 }	1057 }

745	1058

	1059 function mergeRulesByURLFilter(rulesInfo, exhaustive)

	1060 {

	1061 return async(rulesInfo, (ruleInfo, index) => () =>

	1062 findMatchesForRuleByURLFilter(rulesInfo, index, exhaustive)

	1063 )

	1064 .then(() => mergeCandidateRulesByURLFilter(rulesInfo));

	1065 }

	1066

746 function mergeRulesByArrayProperty(rulesInfo, propertyType, property)	1067 function mergeRulesByArrayProperty(rulesInfo, propertyType, property)

747 {	1068 {

748 if (rulesInfo.length <= 1)	1069 if (rulesInfo.length <= 1)

749 return;	1070 return;

750	1071

751 let set = new Set();	1072 let valueSet = new Set(rulesInfo[0].rule[propertyType][property]);

752	1073

753 rulesInfo.forEach((ruleInfo, index) =>	1074 for (let i = 1; i < rulesInfo.length; i++)

754 {	1075 {

755 if (ruleInfo.rule[propertyType][property])	1076 for (let value of rulesInfo[i].rule[propertyType][property] \|\| [])

756 {	1077 valueSet.add(value);

757 for (let value of ruleInfo.rule[propertyType][property])	1078

758 set.add(value);	1079 rulesInfo[i].merged = true;

759 }	1080 }

760	1081

761 if (index > 0)	1082 if (valueSet.size > 0)

762 ruleInfo.merged = true;	1083 rulesInfo[0].rule[propertyType][property] = Array.from(valueSet);

763 });

764

765 if (set.size > 0)

766 rulesInfo[0].rule[propertyType][property] = Array.from(set);

767	1084

768 rulesInfo[0].mergedInto = true;	1085 rulesInfo[0].mergedInto = true;

769 }	1086 }

770	1087

771 function groupRulesByMergeableProperty(rulesInfo, propertyType, property)	1088 function groupRulesByMergeableProperty(rulesInfo, propertyType, property)

772 {	1089 {

773 let mergeableRulesInfoByGroup = new Map();	1090 let mergeableRulesInfoByGroup = new Map();

774	1091

775 rulesInfo.forEach(ruleInfo =>	1092 for (let ruleInfo of rulesInfo)

776 {	1093 {

777 let copy = {	1094 let copy = {

778 trigger: Object.assign({}, ruleInfo.rule.trigger),	1095 trigger: Object.assign({}, ruleInfo.rule.trigger),

779 action: Object.assign({}, ruleInfo.rule.action)	1096 action: Object.assign({}, ruleInfo.rule.action)

780 };	1097 };

781	1098

782 delete copy[propertyType][property];	1099 delete copy[propertyType][property];

783	1100

784 let groupKey = JSON.stringify(copy);	1101 let groupKey = JSON.stringify(copy);

785	1102

786 let mergeableRulesInfo = mergeableRulesInfoByGroup.get(groupKey);	1103 let mergeableRulesInfo = mergeableRulesInfoByGroup.get(groupKey);

787	1104

788 if (mergeableRulesInfo)	1105 if (mergeableRulesInfo)

789 mergeableRulesInfo.push(ruleInfo);	1106 mergeableRulesInfo.push(ruleInfo);

790 else	1107 else

791 mergeableRulesInfoByGroup.set(groupKey, [ruleInfo]);	1108 mergeableRulesInfoByGroup.set(groupKey, [ruleInfo]);

792 });	1109 }

793	1110

794 return mergeableRulesInfoByGroup;	1111 return mergeableRulesInfoByGroup;

795 }	1112 }

796	1113

797 function mergeRules(rules, options)	1114 function mergeRules(rules, exhaustive)

798 {	1115 {

799 const defaultOptions = {exhaustive: false};
kzar 2017/05/09 10:05:47 Have defaultOptions be a property on ContentBlocke Have defaultOptions be a property on ContentBlockerList.prototype so you can use it both here and in the constructor? Manish Jethani 2017/05/09 15:52:46 Actually the options for the ContentBlockerList co Show quoted text On 2017/05/09 10:05:47, kzar wrote: > Have defaultOptions be a property on ContentBlockerList.prototype so you can use > it both here and in the constructor? Actually the options for the ContentBlockerList constructor are different than the options for the mergeRules function, which is not part of ContentBlockerList at all. For what it's worth I think it might even be a good idea to move the merging code out into a separate module. I don't know if this would be better or worse for performance.
800

801 options = Object.assign({}, defaultOptions, options);

802

803 let rulesInfo = rules.map(rule => ({rule}));	1116 let rulesInfo = rules.map(rule => ({rule}));

804	1117

805 groupRulesByMergeableProperty(rulesInfo, "trigger", "url-filter")	1118 let arrayPropertiesToMergeBy = ["resource-type", "if-domain"];

806 .forEach(mergeableRulesInfo =>	1119

807 {	1120 return async(() =>

808 if (mergeableRulesInfo.length > 1)	1121 {

809 mergeRulesByURLFilter(mergeableRulesInfo, options.exhaustive);	1122 let map = groupRulesByMergeableProperty(rulesInfo, "trigger", "url-filter");

810 });	1123 return async(map.values(), mergeableRulesInfo => () =>

811	1124 eliminateRedundantRulesByURLFilter(mergeableRulesInfo, exhaustive)

812 // Filter out rules that are redundant or have been merged into other rules.	1125 .then(rulesInfo => mergeRulesByURLFilter(rulesInfo, exhaustive))

813 rulesInfo = rulesInfo.filter(ruleInfo => !ruleInfo.redundant &&	1126 )

814 !ruleInfo.merged);	1127 .then(() =>

815	1128 {

816 for (let arrayProperty of ["resource-type", "if-domain"])	1129 // Filter out rules that are redundant or have been merged into other

817 {	1130 // rules.

818 groupRulesByMergeableProperty(rulesInfo, "trigger", arrayProperty)	1131 rulesInfo = rulesInfo.filter(ruleInfo => !ruleInfo.redundant &&

819 .forEach(mergeableRulesInfo =>	1132 !ruleInfo.merged);

820 {

821 if (mergeableRulesInfo.length > 1)

822 mergeRulesByArrayProperty(mergeableRulesInfo, "trigger", arrayProperty);

823 });	1133 });

824	1134 })

825 rulesInfo = rulesInfo.filter(ruleInfo => !ruleInfo.merged);	1135 .then(() => async(arrayPropertiesToMergeBy, arrayProperty => () =>

826 }	1136 {

827	1137 let map = groupRulesByMergeableProperty(rulesInfo, "trigger",

828 return rulesInfo.map(ruleInfo => ruleInfo.rule);	1138 arrayProperty);

	1139 return async(map.values(), mergeableRulesInfo => () =>

	1140 mergeRulesByArrayProperty(mergeableRulesInfo, "trigger", arrayProperty)

	1141 )

	1142 .then(() =>

	1143 {

	1144 rulesInfo = rulesInfo.filter(ruleInfo => !ruleInfo.merged);

	1145 });

	1146 }))

	1147 .then(() => rulesInfo.map(ruleInfo => ruleInfo.rule));

829 }	1148 }

830	1149

831 let ContentBlockerList =	1150 let ContentBlockerList =

832 /**	1151 /**

833 * Create a new Adblock Plus filter to content blocker list converter	1152 * Create a new Adblock Plus filter to content blocker list converter

834 *	1153 *

835 * @param {object} options Options for content blocker list generation	1154 * @param {object} options Options for content blocker list generation

836 *	1155 *

837 * @constructor	1156 * @constructor

838 */	1157 */

839 exports.ContentBlockerList = function(options)	1158 exports.ContentBlockerList = function (options)

840 {	1159 {

841 const defaultOptions = {	1160 const defaultOptions = {

842 merge: false,	1161 merge: "auto"

843 exhaustiveMerge: false

844 };	1162 };

845	1163

846 this.options = Object.assign({}, defaultOptions, options);	1164 this.options = Object.assign({}, defaultOptions, options);

847	1165

848 this.requestFilters = [];	1166 this.requestFilters = [];

849 this.requestExceptions = [];	1167 this.requestExceptions = [];

850 this.elemhideFilters = [];	1168 this.elemhideFilters = [];

851 this.elemhideExceptions = [];	1169 this.elemhideExceptions = [];

	1170 this.genericblockExceptions = [];

	1171 this.generichideExceptions = [];

852 this.elemhideSelectorExceptions = new Map();	1172 this.elemhideSelectorExceptions = new Map();

853 };	1173 };

854	1174

855 /**	1175 /**

856 * Add Adblock Plus filter to be converted	1176 * Add Adblock Plus filter to be converted

857 *	1177 *

858 * @param {Filter} filter Filter to convert	1178 * @param {Filter} filter Filter to convert

859 */	1179 */

860 ContentBlockerList.prototype.addFilter = function(filter)	1180 ContentBlockerList.prototype.addFilter = function(filter)

861 {	1181 {

862 if (filter.sitekeys)	1182 if (filter.sitekeys)

863 return;	1183 return;

864 if (filter instanceof filterClasses.RegExpFilter &&	1184 if (filter instanceof filterClasses.RegExpFilter &&

865 filter.regexpSource == null)	1185 filter.regexpSource == null)

866 return;	1186 return;

867	1187

868 if (filter instanceof filterClasses.BlockingFilter)	1188 if (filter instanceof filterClasses.BlockingFilter)

869 this.requestFilters.push(filter);	1189 this.requestFilters.push(filter);

870	1190

871 if (filter instanceof filterClasses.WhitelistFilter)	1191 if (filter instanceof filterClasses.WhitelistFilter)

872 {	1192 {

873 if (filter.contentType & (typeMap.DOCUMENT \| whitelistableRequestTypes))	1193 if (filter.contentType & (typeMap.DOCUMENT \| whitelistableRequestTypes))

874 this.requestExceptions.push(filter);	1194 this.requestExceptions.push(filter);

875	1195

876 if (filter.contentType & typeMap.ELEMHIDE)	1196 if (filter.contentType & typeMap.GENERICBLOCK)

877 this.elemhideExceptions.push(filter);	1197 this.genericblockExceptions.push(filter);

	1198

	1199 if (filter.contentType & typeMap.ELEMHIDE)

	1200 this.elemhideExceptions.push(filter);

	1201 else if (filter.contentType & typeMap.GENERICHIDE)

	1202 this.generichideExceptions.push(filter);

878 }	1203 }

879	1204

880 if (filter instanceof filterClasses.ElemHideFilter)	1205 if (filter instanceof filterClasses.ElemHideFilter)

881 this.elemhideFilters.push(filter);	1206 this.elemhideFilters.push(filter);

882	1207

883 if (filter instanceof filterClasses.ElemHideException)	1208 if (filter instanceof filterClasses.ElemHideException)

884 {	1209 {

885 let domains = this.elemhideSelectorExceptions[filter.selector];	1210 let domains = this.elemhideSelectorExceptions[filter.selector];

886 if (!domains)	1211 if (!domains)

887 domains = this.elemhideSelectorExceptions[filter.selector] = [];	1212 domains = this.elemhideSelectorExceptions[filter.selector] = [];

888	1213

889 parseDomains(filter.domains, domains, []);	1214 parseDomains(filter.domains, domains, []);

890 }	1215 }

891 };	1216 };

892	1217

893 /**	1218 /**

894 * Generate content blocker list for all filters that were added	1219 * Generate content blocker list for all filters that were added

895 */	1220 */

896 ContentBlockerList.prototype.generateRules = function()	1221 ContentBlockerList.prototype.generateRules = function()

897 {	1222 {

898 let rules = [];	1223 let cssRules = [];

899	1224 let cssExceptionRules = [];

	1225 let blockingRules = [];

	1226 let blockingExceptionRules = [];

	1227

	1228 let ruleGroups = [cssRules, cssExceptionRules,

	1229 blockingRules, blockingExceptionRules];

	1230

	1231 let genericSelectors = [];

900 let groupedElemhideFilters = new Map();	1232 let groupedElemhideFilters = new Map();

	1233

901 for (let filter of this.elemhideFilters)	1234 for (let filter of this.elemhideFilters)

902 {	1235 {

903 let result = convertElemHideFilter(filter, this.elemhideSelectorExceptions);	1236 let result = convertElemHideFilter(filter, this.elemhideSelectorExceptions);

904 if (!result)	1237 if (!result)

905 continue;	1238 continue;

906	1239

907 if (result.matchDomains.length == 0)	1240 if (result.matchDomains.length == 0)

908 result.matchDomains = ["^https?://"];	1241 {

909	1242 genericSelectors.push(result.selector);

910 for (let matchDomain of result.matchDomains)	1243 }

911 {	1244 else

912 let group = groupedElemhideFilters.get(matchDomain) \|\| [];	1245 {

913 group.push(result.selector);	1246 for (let matchDomain of result.matchDomains)

914 groupedElemhideFilters.set(matchDomain, group);	1247 {

915 }	1248 let group = groupedElemhideFilters.get(matchDomain) \|\| [];

916 }	1249 group.push(result.selector);

	1250 groupedElemhideFilters.set(matchDomain, group);

	1251 }

	1252 }

	1253 }

	1254

	1255 // Separate out the element hiding exceptions that have only a hostname part

	1256 // from the rest. This allows us to implement a workaround for issue #5345

	1257 // (WebKit bug #167423), but as a bonus it also reduces the number of

	1258 // generated rules. The downside is that the exception will only apply to the

	1259 // top-level document, not to iframes. We have to live with this until the

	1260 // WebKit bug is fixed in all supported versions of Safari.

	1261 // https://bugs.webkit.org/show_bug.cgi?id=167423

	1262 //

	1263 // Note that as a result of this workaround we end up with a huge rule set in

	1264 // terms of the amount of memory used. This can cause Node.js to throw

	1265 // "JavaScript heap out of memory". To avoid this, call Node.js with

	1266 // --max_old_space_size=4096

	1267 let elemhideExceptionDomains = extractFilterDomains(this.elemhideExceptions);

	1268

	1269 let genericSelectorExceptionDomains =

	1270 extractFilterDomains(this.generichideExceptions);

	1271 elemhideExceptionDomains.forEach(name =>

	1272 {

	1273 genericSelectorExceptionDomains.add(name);

	1274 });

	1275

	1276 addCSSRules(cssRules, genericSelectors, null,

	1277 genericSelectorExceptionDomains);

	1278

	1279 // Filter out whitelisted domains.

	1280 elemhideExceptionDomains.forEach(domain =>

	1281 groupedElemhideFilters.delete(domain));

917	1282

918 groupedElemhideFilters.forEach((selectors, matchDomain) =>	1283 groupedElemhideFilters.forEach((selectors, matchDomain) =>

919 {	1284 {

920 while (selectors.length)	1285 addCSSRules(cssRules, selectors, matchDomain, elemhideExceptionDomains);

921 {	1286 });

922 let selector = selectors.splice(0, selectorLimit).join(", ");	1287

923	1288 let requestFilterExceptionDomains = [];

924 // As of Safari 9.0 element IDs are matched as lowercase. We work around	1289 for (let filter of this.genericblockExceptions)

925 // this by converting to the attribute format [id="elementID"]	1290 {

926 selector = convertIDSelectorsToAttributeSelectors(selector);	1291 let parsed = parseFilterRegexpSource(filter.regexpSource);

927	1292 if (parsed.hostname)

928 rules.push({	1293 requestFilterExceptionDomains.push(parsed.hostname);

929 trigger: {"url-filter": matchDomain,	1294 }

930 "url-filter-is-case-sensitive": true},	1295

931 action: {type: "css-display-none",	1296 for (let filter of this.requestFilters)

932 selector: selector}	1297 {

	1298 convertFilterAddRules(blockingRules, filter, "block", true,

	1299 requestFilterExceptionDomains);

	1300 }

	1301

	1302 for (let filter of this.requestExceptions)

	1303 {

	1304 convertFilterAddRules(blockingExceptionRules, filter,

	1305 "ignore-previous-rules", true);

	1306 }

	1307

	1308 return async(ruleGroups, (group, index) => () =>

	1309 {

	1310 let next = () =>

	1311 {

	1312 if (index == ruleGroups.length - 1)

	1313 return ruleGroups.reduce((all, rules) => all.concat(rules), []);

	1314 };

	1315

	1316 if (this.options.merge == "all" \|\|

	1317 (this.options.merge == "auto" &&

	1318 ruleGroups.reduce((n, group) => n + group.length, 0) > 50000))

	1319 {

	1320 return mergeRules(ruleGroups[index], this.options.merge == "all")

	1321 .then(rules =>

	1322 {

	1323 ruleGroups[index] = rules;

	1324 return next();

933 });	1325 });

934 }	1326 }

	1327

	1328 return next();

935 });	1329 });

936

937 for (let filter of this.elemhideExceptions)

938 convertFilterAddRules(rules, filter, "ignore-previous-rules", false);

939 for (let filter of this.requestFilters)

940 convertFilterAddRules(rules, filter, "block", true);

941 for (let filter of this.requestExceptions)

942 convertFilterAddRules(rules, filter, "ignore-previous-rules", true);

943

944 rules = rules.filter(rule => !hasNonASCI(rule));

945

946 if (this.options.merge)

947 rules = mergeRules(rules, {exhaustive: this.options.exhaustiveMerge});
kzar 2017/05/09 10:05:47 Why wrap the exhaustiveMerge option in an Object h Why wrap the exhaustiveMerge option in an Object here? Manish Jethani 2017/05/09 15:52:47 This is because mergeRules takes an option called Show quoted text On 2017/05/09 10:05:47, kzar wrote: > Why wrap the exhaustiveMerge option in an Object here? This is because mergeRules takes an option called "exhaustive", whereas the ContentBlockerList constructor takes an option called "exhaustiveMerge". The name of the latter is more specific because "exhaustive" could mean just anything to the constructor (exhaustive merging, exhaustive rule generation, exhaustive filter addition, possibly other things the ContentBlockerList class starts to do over time as we add features). On the other hand, the mergeRules function, which could very well be its own module, does one and only one thing: it merges rules. There is no possible ambiguity about the meaning of the word "exhaustive" there. Now all of this would really matter if both ContentBlockerList and mergeRules were public APIs. The latter is not even exposed outside of this file, it really doesn't matter one way or another. kzar 2017/05/09 16:50:58 Well if you passed through the value of exhaustive Show quoted text On 2017/05/09 15:52:47, Manish Jethani wrote: > This is because mergeRules takes an option called > "exhaustive", whereas the > ContentBlockerList constructor takes an option called > "exhaustiveMerge". Well if you passed through the value of exhaustiveMerge to mergeRules without wrapping it in an Object you could call the parameter exhaustive there. You could also get rid of the defaultOptions logic from mergeRules then as well. Show quoted text > ...if both ContentBlockerList and mergeRules were public APIs. The > latter is not even exposed outside of this file... Right, so what benefit is there with this indirection? I could see the purpose of taking an options Object if mergeRules was a public API, that way you could add new options without breaking backwards compatibility, but it isn't. Manish Jethani 2017/05/09 17:32:11 Done. Show quoted text On 2017/05/09 16:50:58, kzar wrote: > On 2017/05/09 15:52:47, Manish Jethani wrote: > > This is because mergeRules takes an option called > > "exhaustive", whereas the > > ContentBlockerList constructor takes an option called > > "exhaustiveMerge". > > Well if you passed through the value of exhaustiveMerge to mergeRules without > wrapping it in an Object you could call the parameter exhaustive there. You > could also get rid of the defaultOptions logic from mergeRules then as well. > > > ...if both ContentBlockerList and mergeRules were public APIs. The > > latter is not even exposed outside of this file... > > Right, so what benefit is there with this indirection? I could see the purpose > of taking an options Object if mergeRules was a public API, that way you could > add new options without breaking backwards compatibility, but it isn't. Done.
948

949 return rules;

950 };	1330 };

LEFT	RIGHT