lib/abp2blocklist.js - Issue 29426594: Issue 3673 - Merge closely matching rules

Delta Between Two Patch Sets: lib/abp2blocklist.js

Issue 29426594: Issue 3673 - Merge closely matching rules (Closed) Base URL: https://hg.adblockplus.org/abp2blocklist

Left Patch Set: Filter out redundant rules before merging Created May 7, 2017, 10:38 p.m.

Right Patch Set: Rebase Created July 28, 2017, 1:31 p.m.

Left:
Right:

Use n/p to move between diff chunks; N/P to move between comments.

Jump to:

Left: Side by side diff | Download
Right: Side by side diff | Download

LEFT	RIGHT
1 /*	1 /*

2 * This file is part of Adblock Plus <https://adblockplus.org/>,	2 * This file is part of Adblock Plus <https://adblockplus.org/>,

3 * Copyright (C) 2006-2017 eyeo GmbH	3 * Copyright (C) 2006-2017 eyeo GmbH

4 *	4 *

5 * Adblock Plus is free software: you can redistribute it and/or modify	5 * Adblock Plus is free software: you can redistribute it and/or modify

6 * it under the terms of the GNU General Public License version 3 as	6 * it under the terms of the GNU General Public License version 3 as

7 * published by the Free Software Foundation.	7 * published by the Free Software Foundation.

8 *	8 *

9 * Adblock Plus is distributed in the hope that it will be useful,	9 * Adblock Plus is distributed in the hope that it will be useful,

10 * but WITHOUT ANY WARRANTY; without even the implied warranty of	10 * but WITHOUT ANY WARRANTY; without even the implied warranty of

11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the	11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

12 * GNU General Public License for more details.	12 * GNU General Public License for more details.

13 *	13 *

14 * You should have received a copy of the GNU General Public License	14 * You should have received a copy of the GNU General Public License

15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.	15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.

16 */	16 */

17	17

18 /** @module abp2blocklist */	18 /** @module abp2blocklist */

19	19

20 "use strict";	20 "use strict";

21	21

22 let filterClasses = require("filterClasses");	22 let filterClasses = require("filterClasses");

23 let tldjs = require("tldjs");

24 let punycode = require("punycode");	23 let punycode = require("punycode");

25	24

26 const selectorLimit = 5000;	25 const selectorLimit = 5000;

27 const typeMap = filterClasses.RegExpFilter.typeMap;	26 const typeMap = filterClasses.RegExpFilter.typeMap;

28 const whitelistableRequestTypes = (typeMap.IMAGE	27

29 \| typeMap.STYLESHEET	28 const httpRequestTypes = typeMap.IMAGE \|

30 \| typeMap.SCRIPT	29 typeMap.STYLESHEET \|

31 \| typeMap.FONT	30 typeMap.SCRIPT \|

32 \| typeMap.MEDIA	31 typeMap.FONT \|

33 \| typeMap.POPUP	32 typeMap.MEDIA \|

34 \| typeMap.OBJECT	33 typeMap.POPUP \|

35 \| typeMap.OBJECT_SUBREQUEST	34 typeMap.OBJECT \|

36 \| typeMap.XMLHTTPREQUEST	35 typeMap.OBJECT_SUBREQUEST \|

37 \| typeMap.PING	36 typeMap.XMLHTTPREQUEST \|

38 \| typeMap.SUBDOCUMENT	37 typeMap.PING \|

39 \| typeMap.OTHER);	38 typeMap.SUBDOCUMENT \|

	39 typeMap.OTHER;

	40 const rawRequestTypes = typeMap.XMLHTTPREQUEST \|

	41 typeMap.WEBSOCKET \|

	42 typeMap.WEBRTC \|

	43 typeMap.OBJECT_SUBREQUEST \|

	44 typeMap.PING \|

	45 typeMap.OTHER;

	46 const whitelistableRequestTypes = httpRequestTypes \|

	47 typeMap.WEBSOCKET \|

	48 typeMap.WEBRTC;

	49

	50 function callLater(func)

	51 {

	52 return new Promise(resolve =>

	53 {

	54 let call = () => resolve(func());

	55

	56 // If this looks like Node.js, call process.nextTick, otherwise call

	57 // setTimeout.

	58 if (typeof process != "undefined")

	59 process.nextTick(call);

	60 else

	61 setTimeout(call, 0);

	62 });

	63 }

	64

	65 function async(callees, mapFunction)

	66 {

	67 if (!(Symbol.iterator in callees))

	68 callees = [callees];

	69

	70 let lastPause = Date.now();

	71 let index = 0;

	72

	73 let promise = Promise.resolve();

	74

	75 for (let next of callees)

	76 {

	77 let currentIndex = index;

	78

	79 promise = promise.then(() =>

	80 {

	81 if (mapFunction)

	82 next = mapFunction(next, currentIndex);

	83

	84 // If it has been 100ms or longer since the last call, take a pause. This

	85 // keeps the browser from freezing up.

	86 let now = Date.now();

	87 if (now - lastPause >= 100)

	88 {

	89 lastPause = now;

	90 return callLater(next);

	91 }

	92

	93 return next();

	94 });

	95

	96 index++;

	97 }

	98

	99 return promise;

	100 }

40	101

41 function parseDomains(domains, included, excluded)	102 function parseDomains(domains, included, excluded)

42 {	103 {

43 for (let domain in domains)	104 for (let domain in domains)

44 {	105 {

45 if (domain != "")	106 if (domain != "")

46 {	107 {

47 let enabled = domains[domain];	108 let enabled = domains[domain];

48 domain = punycode.toASCII(domain.toLowerCase());	109 domain = punycode.toASCII(domain.toLowerCase());

49	110

50 if (!enabled)	111 if (!enabled)

51 excluded.push(domain);	112 excluded.push(domain);

52 else if (!domains[""])	113 else if (!domains[""])

53 included.push(domain);	114 included.push(domain);

54 }	115 }

55 }	116 }

56 }	117 }

57	118

58 function escapeRegExp(s)	119 function escapeRegExp(s)

59 {	120 {

60 return s.replace(/[.*+?^${}()\|[\]\\]/g, "\\$&");	121 return s.replace(/[.*+?^${}()\|[\]\\]/g, "\\$&");

61 }	122 }

62	123

63 function matchDomain(domain)	124 function matchDomain(domain)

64 {	125 {

	126 if (!domain)

	127 return "^https?://";

	128

65 return "^https?://([^/:]*\\.)?" + escapeRegExp(domain).toLowerCase() + "[/:]";	129 return "^https?://([^/:]*\\.)?" + escapeRegExp(domain).toLowerCase() + "[/:]";

	130 }

	131

	132 function getURLSchemes(contentType)

	133 {

	134 // If the given content type includes all supported URL schemes, simply

	135 // return a single generic URL scheme pattern. This minimizes the size of the

	136 // generated rule set. The downside to this is that it will also match

	137 // schemes that we do not want to match (e.g. "ftp://"), but this can be

	138 // mitigated by adding exceptions for those schemes.

	139 if (contentType & typeMap.WEBSOCKET && contentType & typeMap.WEBRTC &&

	140 contentType & httpRequestTypes)

	141 return ["[^:]+:(//)?"];

	142

	143 let urlSchemes = [];

	144

	145 if (contentType & typeMap.WEBSOCKET)

	146 urlSchemes.push("wss?://");

	147

	148 if (contentType & typeMap.WEBRTC)

	149 urlSchemes.push("stuns?:", "turns?:");

	150

	151 if (contentType & httpRequestTypes)

	152 urlSchemes.push("https?://");

	153

	154 return urlSchemes;

	155 }

	156

	157 function findSubdomainsInList(domain, list)

	158 {

	159 let subdomains = [];

	160 let suffixLength = domain.length + 1;

	161

	162 for (let name of list)

	163 {

	164 if (name.length > suffixLength && name.slice(-suffixLength) == "." + domain)

	165 subdomains.push(name.slice(0, -suffixLength));

	166 }

	167

	168 return subdomains;

	169 }

	170

	171 function extractFilterDomains(filters)

	172 {

	173 let domains = new Set();

	174 for (let filter of filters)

	175 {

	176 let parsed = parseFilterRegexpSource(filter.regexpSource);

	177 if (parsed.justHostname)

	178 domains.add(parsed.hostname);

	179 }

	180 return domains;

66 }	181 }

67	182

68 function convertElemHideFilter(filter, elemhideSelectorExceptions)	183 function convertElemHideFilter(filter, elemhideSelectorExceptions)

69 {	184 {

70 let included = [];	185 let included = [];

71 let excluded = [];	186 let excluded = [];

72 let rules = [];

73	187

74 parseDomains(filter.domains, included, excluded);	188 parseDomains(filter.domains, included, excluded);

75	189

76 if (excluded.length == 0 && !(filter.selector in elemhideSelectorExceptions))	190 if (excluded.length == 0 && !(filter.selector in elemhideSelectorExceptions))

77 return {matchDomains: included.map(matchDomain), selector: filter.selector};	191 return {matchDomains: included, selector: filter.selector};

78 }	192 }

79	193

80 /**	194 /**

81 * Parse the given filter "regexpSource" string. Producing a regular expression,	195 * Parse the given filter "regexpSource" string. Producing a regular expression,

82 * extracting the hostname (if any), deciding if the regular expression is safe	196 * extracting the hostname (if any), deciding if the regular expression is safe

83 * to be converted + matched as lower case and noting if the source contains	197 * to be converted + matched as lower case and noting if the source contains

84 * anything after the hostname.)	198 * anything after the hostname.)

85 *	199 *

86 * @param {string} text regexpSource property of a filter	200 * @param {string} text regexpSource property of a filter

	201 * @param {string} urlScheme The URL scheme to use in the regular expression

87 * @returns {object} An object containing a regular expression string, a bool	202 * @returns {object} An object containing a regular expression string, a bool

88 * indicating if the filter can be safely matched as lower	203 * indicating if the filter can be safely matched as lower

89 * case, a hostname string (or undefined) and a bool	204 * case, a hostname string (or undefined) and a bool

90 * indicating if the source only contains a hostname or not:	205 * indicating if the source only contains a hostname or not:

91 * {regexp: "...",	206 * {regexp: "...",

92 * canSafelyMatchAsLowercase: true/false,	207 * canSafelyMatchAsLowercase: true/false,

93 * hostname: "...",	208 * hostname: "...",

94 * justHostname: true/false}	209 * justHostname: true/false}

95 */	210 */

96 function parseFilterRegexpSource(text)	211 function parseFilterRegexpSource(text, urlScheme)

97 {	212 {

98 let regexp = [];	213 let regexp = [];

99 let lastIndex = text.length - 1;	214

	215 // Convert the text into an array of Unicode characters.

	216 //

	217 // In the case of surrogate pairs (the smiley emoji, for example), one

	218 // Unicode code point is represented by two JavaScript characters together.

	219 // We want to iterate over Unicode code points rather than JavaScript

	220 // characters.

	221 let characters = Array.from(text);

	222

	223 let lastIndex = characters.length - 1;

100 let hostname;	224 let hostname;

101 let hostnameStart = null;	225 let hostnameStart = null;

102 let hostnameFinished = false;	226 let hostnameFinished = false;

103 let justHostname = false;	227 let justHostname = false;

104 let canSafelyMatchAsLowercase = false;	228 let canSafelyMatchAsLowercase = false;

105	229

106 for (let i = 0; i < text.length; i++)	230 if (!urlScheme)

107 {	231 urlScheme = getURLSchemes()[0];

108 let c = text[i];	232

	233 for (let i = 0; i < characters.length; i++)

	234 {

	235 let c = characters[i];

109	236

110 if (hostnameFinished)	237 if (hostnameFinished)

111 justHostname = false;	238 justHostname = false;

112	239

113 // If we're currently inside the hostname we have to be careful not to	240 // If we're currently inside the hostname we have to be careful not to

114 // escape any characters until after we have converted it to punycode.	241 // escape any characters until after we have converted it to punycode.

115 if (hostnameStart != null && !hostnameFinished)	242 if (hostnameStart != null && !hostnameFinished)

116 {	243 {

117 let endingChar = (c == "*" \|\| c == "^" \|\|	244 let endingChar = (c == "*" \|\| c == "^" \|\|

118 c == "?" \|\| c == "/" \|\| c == "\|");	245 c == "?" \|\| c == "/" \|\| c == "\|");

119 if (!endingChar && i != lastIndex)	246 if (!endingChar && i != lastIndex)

120 continue;	247 continue;

121	248

122 hostname = punycode.toASCII(	249 hostname = punycode.toASCII(

123 text.substring(hostnameStart, endingChar ? i : i + 1)	250 characters.slice(hostnameStart, endingChar ? i : i + 1).join("")

	251 .toLowerCase()

124 );	252 );

125 hostnameFinished = justHostname = true;	253 hostnameFinished = justHostname = true;

126 regexp.push(escapeRegExp(hostname));	254 regexp.push(escapeRegExp(hostname));

127 if (!endingChar)	255 if (!endingChar)

128 break;	256 break;

129 }	257 }

130	258

131 switch (c)	259 switch (c)

132 {	260 {

133 case "*":	261 case "*":

134 if (regexp.length > 0 && i < lastIndex && text[i + 1] != "*")	262 if (regexp.length > 0 && i < lastIndex && characters[i + 1] != "*")

135 regexp.push(".*");	263 regexp.push(".*");

136 break;	264 break;

137 case "^":	265 case "^":

138 if (i < lastIndex)	266 let alphabet = "a-z";

139 regexp.push(".");	267 // If justHostname is true and we've encountered a "^", it means we're

	268 // still in the hostname part of the URL. Since hostnames are always

	269 // lower case (Punycode), there's no need to include "A-Z" in the

	270 // pattern. Further, subsequent code may lower-case the entire regular

	271 // expression (if the URL contains only the hostname part), leaving us

	272 // with "a-za-z", which would be redundant.

	273 if (!justHostname)

	274 alphabet = "A-Z" + alphabet;

	275 let digits = "0-9";

	276 // Note that the "-" must appear first here in order to retain its

	277 // literal meaning within the brackets.

	278 let specialCharacters = "-_.%";

	279 let separator = "[^" + specialCharacters + alphabet + digits + "]";

	280 if (i == 0)

	281 regexp.push("^" + urlScheme + "(.*" + separator + ")?");

	282 else if (i == lastIndex)

	283 regexp.push("(" + separator + ".*)?$");

	284 else

	285 regexp.push(separator);

140 break;	286 break;

141 case "\|":	287 case "\|":

142 if (i == 0)	288 if (i == 0)

143 {	289 {

144 regexp.push("^");	290 regexp.push("^");

145 break;	291 break;

146 }	292 }

147 if (i == lastIndex)	293 if (i == lastIndex)

148 {	294 {

149 regexp.push("$");	295 regexp.push("$");

150 break;	296 break;

151 }	297 }

152 if (i == 1 && text[0] == "\|")	298 if (i == 1 && characters[0] == "\|")

153 {	299 {

154 hostnameStart = i + 1;	300 hostnameStart = i + 1;

155 canSafelyMatchAsLowercase = true;	301 canSafelyMatchAsLowercase = true;

156 regexp.push("https?://([^/]+\\.)?");	302 regexp.push(urlScheme + "([^/]+\\.)?");

157 break;	303 break;

158 }	304 }

159 regexp.push("\\\|");	305 regexp.push("\\\|");

160 break;	306 break;

161 case "/":	307 case "/":

162 if (!hostnameFinished &&	308 if (!hostnameFinished &&

163 text.charAt(i-2) == ":" && text.charAt(i-1) == "/")	309 characters[i - 2] == ":" && characters[i - 1] == "/")

164 {	310 {

165 hostnameStart = i + 1;	311 hostnameStart = i + 1;

166 canSafelyMatchAsLowercase = true;	312 canSafelyMatchAsLowercase = true;

167 }	313 }

168 regexp.push("/");	314 regexp.push("/");

169 break;	315 break;

170 case ".": case "+": case "$": case "?":	316 case ".": case "+": case "$": case "?":

171 case "{": case "}": case "(": case ")":	317 case "{": case "}": case "(": case ")":

172 case "[": case "]": case "\\":	318 case "[": case "]": case "\\":

173 regexp.push("\\", c);	319 regexp.push("\\", c);

174 break;	320 break;

175 default:	321 default:

176 if (hostnameFinished && (c >= "a" && c <= "z" \|\|	322 if (hostnameFinished && (c >= "a" && c <= "z" \|\|

177 c >= "A" && c <= "Z"))	323 c >= "A" && c <= "Z"))

178 canSafelyMatchAsLowercase = false;	324 canSafelyMatchAsLowercase = false;

179 regexp.push(c);	325 regexp.push(c == "%" ? c : encodeURI(c));

180 }	326 }

181 }	327 }

182	328

183 return {	329 return {

184 regexp: regexp.join(""),	330 regexp: regexp.join(""),

185 canSafelyMatchAsLowercase: canSafelyMatchAsLowercase,	331 canSafelyMatchAsLowercase: canSafelyMatchAsLowercase,

186 hostname: hostname,	332 hostname: hostname,

187 justHostname: justHostname	333 justHostname: justHostname

188 };	334 };

189 }	335 }

190	336

191 function getResourceTypes(filter)	337 function getResourceTypes(contentType)

192 {	338 {

193 let types = [];	339 let types = [];

194	340

195 if (filter.contentType & typeMap.IMAGE)	341 if (contentType & typeMap.IMAGE)

196 types.push("image");	342 types.push("image");

197 if (filter.contentType & typeMap.STYLESHEET)	343 if (contentType & typeMap.STYLESHEET)

198 types.push("style-sheet");	344 types.push("style-sheet");

199 if (filter.contentType & typeMap.SCRIPT)	345 if (contentType & typeMap.SCRIPT)

200 types.push("script");	346 types.push("script");

201 if (filter.contentType & typeMap.FONT)	347 if (contentType & typeMap.FONT)

202 types.push("font");	348 types.push("font");

203 if (filter.contentType & (typeMap.MEDIA \| typeMap.OBJECT))	349 if (contentType & (typeMap.MEDIA \| typeMap.OBJECT))

204 types.push("media");	350 types.push("media");

205 if (filter.contentType & typeMap.POPUP)	351 if (contentType & typeMap.POPUP)

206 types.push("popup");	352 types.push("popup");

207 if (filter.contentType & (typeMap.XMLHTTPREQUEST \|	353 if (contentType & rawRequestTypes)

208 typeMap.OBJECT_SUBREQUEST \|

209 typeMap.PING \|

210 typeMap.OTHER))

211 types.push("raw");	354 types.push("raw");

212 if (filter.contentType & typeMap.SUBDOCUMENT)	355 if (contentType & typeMap.SUBDOCUMENT)

213 types.push("document");	356 types.push("document");

214	357

215 return types;	358 return types;

216 }	359 }

217	360

218 function addDomainPrefix(domains)	361 function makeRuleCopies(trigger, action, urlSchemes)

219 {	362 {

220 let result = [];	363 let copies = [];

221	364

222 for (let domain of domains)	365 // Always make a deep copy of the rule, since rules may have to be

223 {	366 // manipulated individually at a later stage.

224 result.push(domain);	367 let stringifiedTrigger = JSON.stringify(trigger);

225	368

226 if (tldjs.getDomain(domain) == domain)	369 let filterPattern = trigger["url-filter"].substring(1);

227 result.push("www." + domain);	370 let startIndex = 0;

228 }	371

229	372 // If the URL filter already begins with the first URL scheme pattern, skip

230 return result;	373 // it.

231 }	374 if (trigger["url-filter"].startsWith("^" + urlSchemes[0]))

232	375 {

233 function convertFilterAddRules(rules, filter, action, withResourceTypes)	376 filterPattern = filterPattern.substring(urlSchemes[0].length);

234 {	377 startIndex = 1;

235 let parsed = parseFilterRegexpSource(filter.regexpSource);	378 }

	379 else

	380 {

	381 filterPattern = ".*" + filterPattern;

	382 }

	383

	384 for (let i = startIndex; i < urlSchemes.length; i++)

	385 {

	386 let copyTrigger = Object.assign(JSON.parse(stringifiedTrigger), {

	387 "url-filter": "^" + urlSchemes[i] + filterPattern

	388 });

	389 copies.push({trigger: copyTrigger, action});

	390 }

	391

	392 return copies;

	393 }

	394

	395 function excludeTopURLFromTrigger(trigger)

	396 {

	397 trigger["unless-top-url"] = [trigger["url-filter"]];

	398 if (trigger["url-filter-is-case-sensitive"])

	399 trigger["top-url-filter-is-case-sensitive"] = true;

	400 }

	401

	402 function convertFilterAddRules(rules, filter, action, withResourceTypes,

	403 exceptionDomains, contentType)

	404 {

	405 if (!contentType)

	406 contentType = filter.contentType;

	407

	408 // If WebSocket or WebRTC are given along with other options but not

	409 // including all three of WebSocket, WebRTC, and at least one HTTP raw type,

	410 // we must generate multiple rules. For example, for the filter

	411 // "foo$websocket,image", we must generate one rule with "^wss?://" and "raw"

	412 // and another rule with "^https?://" and "image". If we merge the two, we

	413 // end up blocking requests of all HTTP raw types (e.g. XMLHttpRequest)

	414 // inadvertently.

	415 if ((contentType & typeMap.WEBSOCKET && contentType != typeMap.WEBSOCKET &&

	416 !(contentType & typeMap.WEBRTC &&

	417 contentType & rawRequestTypes & httpRequestTypes)) \|\|

	418 (contentType & typeMap.WEBRTC && contentType != typeMap.WEBRTC &&

	419 !(contentType & typeMap.WEBSOCKET &&

	420 contentType & rawRequestTypes & httpRequestTypes)))

	421 {

	422 if (contentType & typeMap.WEBSOCKET)

	423 {

	424 convertFilterAddRules(rules, filter, action, withResourceTypes,

	425 exceptionDomains, typeMap.WEBSOCKET);

	426 }

	427

	428 if (contentType & typeMap.WEBRTC)

	429 {

	430 convertFilterAddRules(rules, filter, action, withResourceTypes,

	431 exceptionDomains, typeMap.WEBRTC);

	432 }

	433

	434 contentType &= ~(typeMap.WEBSOCKET \| typeMap.WEBRTC);

	435

	436 if (!contentType)

	437 return;

	438 }

	439

	440 let urlSchemes = getURLSchemes(contentType);

	441 let parsed = parseFilterRegexpSource(filter.regexpSource, urlSchemes[0]);

236	442

237 // For the special case of $document whitelisting filters with just a domain	443 // For the special case of $document whitelisting filters with just a domain

238 // we can generate an equivalent blocking rule exception using if-domain.	444 // we can generate an equivalent blocking rule exception using if-domain.

239 if (filter instanceof filterClasses.WhitelistFilter &&	445 if (filter instanceof filterClasses.WhitelistFilter &&

240 filter.contentType & typeMap.DOCUMENT &&	446 contentType & typeMap.DOCUMENT &&

241 parsed.justHostname)	447 parsed.justHostname)

242 {	448 {

243 rules.push({	449 rules.push({

244 trigger: {	450 trigger: {

245 "url-filter": ".*",	451 "url-filter": ".*",

246 "if-domain": addDomainPrefix([parsed.hostname])	452 "if-domain": ["*" + parsed.hostname]

247 },	453 },

248 action: {type: "ignore-previous-rules"}	454 action: {type: "ignore-previous-rules"}

249 });	455 });

250 // If the filter contains other supported options we'll need to generate	456 // If the filter contains other supported options we'll need to generate

251 // further rules for it, but if not we can simply return now.	457 // further rules for it, but if not we can simply return now.

252 if (!(filter.contentType & whitelistableRequestTypes))	458 if (!(contentType & whitelistableRequestTypes))

253 return;	459 return;

254 }	460 }

255	461

256 let trigger = {"url-filter": parsed.regexp};	462 let trigger = {"url-filter": parsed.regexp};

257	463

258 // Limit rules to HTTP(S) URLs	464 // If the URL filter begins with one of the URL schemes for this content

259 if (!/^(\^\|http)/i.test(trigger["url-filter"]))	465 // type, we generate additional rules for all the URL scheme patterns;

260 trigger["url-filter"] = "^https?://.*" + trigger["url-filter"];	466 // otherwise, if the start of the URL filter literally matches the first URL

	467 // scheme pattern, we just generate additional rules for the remaining URL

	468 // scheme patterns.

	469 //

	470 // For example, "stun:foo$webrtc" will give us "stun:foo", then we add a "^"

	471 // in front of this and generate two additional rules for

	472 // "^stuns?:.stun:foo" and "^turns?:.stun:foo". On the other hand,

	473 // "\|\|foo$webrtc" will give us "^stuns?:([^/]+\\.)?foo", so we just generate

	474 // "^turns?:([^/]+\\.)?foo" in addition.

	475 //

	476 // Note that the filter can be already anchored to the beginning

	477 // (e.g. "\|stun:foo$webrtc"), in which case we do not generate any additional

	478 // rules.

	479 let needAltRules = trigger["url-filter"][0] != "^" \|\|

	480 trigger["url-filter"].startsWith("^" + urlSchemes[0]);

	481

	482 if (trigger["url-filter"][0] != "^")

	483 {

	484 if (!urlSchemes.some(scheme => new RegExp("^" + scheme)

	485 .test(trigger["url-filter"])))

	486 {

	487 trigger["url-filter"] = urlSchemes[0] + ".*" + trigger["url-filter"];

	488 }

	489

	490 trigger["url-filter"] = "^" + trigger["url-filter"];

	491 }

261	492

262 // For rules containing only a hostname we know that we're matching against	493 // For rules containing only a hostname we know that we're matching against

263 // a lowercase string unless the matchCase option was passed.	494 // a lowercase string unless the matchCase option was passed.

264 if (parsed.canSafelyMatchAsLowercase && !filter.matchCase)	495 if (parsed.canSafelyMatchAsLowercase && !filter.matchCase)

265 trigger["url-filter"] = trigger["url-filter"].toLowerCase();	496 trigger["url-filter"] = trigger["url-filter"].toLowerCase();

266	497

267 if (parsed.canSafelyMatchAsLowercase \|\| filter.matchCase)	498 if (parsed.canSafelyMatchAsLowercase \|\| filter.matchCase)

268 trigger["url-filter-is-case-sensitive"] = true;	499 trigger["url-filter-is-case-sensitive"] = true;

269	500

270 let included = [];	501 let included = [];

271 let excluded = [];	502 let excluded = [];

272	503

273 parseDomains(filter.domains, included, excluded);	504 parseDomains(filter.domains, included, excluded);

274	505

	506 if (exceptionDomains)

	507 excluded = excluded.concat(exceptionDomains);

	508

275 if (withResourceTypes)	509 if (withResourceTypes)

276 {	510 {

277 trigger["resource-type"] = getResourceTypes(filter);	511 let resourceTypes = getResourceTypes(contentType);

278	512

279 if (trigger["resource-type"].length == 0)	513 // Content blocker rules can't differentiate between sub-document requests

	514 // (iframes) and top-level document requests. To avoid too many false

	515 // positives, we prevent rules with no hostname part from blocking document

	516 // requests.

	517 //

	518 // Once Safari 11 becomes our minimum supported version, we could change

	519 // our approach here to use the new "unless-top-url" property instead.

	520 if (filter instanceof filterClasses.BlockingFilter && !parsed.hostname)

	521 resourceTypes = resourceTypes.filter(type => type != "document");

	522

	523 if (resourceTypes.length == 0)

280 return;	524 return;

	525

	526 trigger["resource-type"] = resourceTypes;

281 }	527 }

282	528

283 if (filter.thirdParty != null)	529 if (filter.thirdParty != null)

284 trigger["load-type"] = [filter.thirdParty ? "third-party" : "first-party"];	530 trigger["load-type"] = [filter.thirdParty ? "third-party" : "first-party"];

285	531

	532 let addTopLevelException = false;

	533

286 if (included.length > 0)	534 if (included.length > 0)

287 trigger["if-domain"] = addDomainPrefix(included);	535 {

	536 trigger["if-domain"] = [];

	537

	538 for (let name of included)

	539 {

	540 // If this is a blocking filter or an element hiding filter, add the

	541 // subdomain wildcard only if no subdomains have been excluded.

	542 let notSubdomains = null;

	543 if ((filter instanceof filterClasses.BlockingFilter \|\|

	544 filter instanceof filterClasses.ElemHideFilter) &&

	545 (notSubdomains = findSubdomainsInList(name, excluded)).length > 0)

	546 {

	547 trigger["if-domain"].push(name);

	548

	549 // Add the "www" prefix but only if it hasn't been excluded.

	550 if (!notSubdomains.includes("www"))

	551 trigger["if-domain"].push("www." + name);

	552 }

	553 else

	554 {

	555 trigger["if-domain"].push("*" + name);

	556 }

	557 }

	558 }

288 else if (excluded.length > 0)	559 else if (excluded.length > 0)

289 trigger["unless-domain"] = addDomainPrefix(excluded);	560 {

	561 trigger["unless-domain"] = excluded.map(name => "*" + name);

	562 }

	563 else if (filter instanceof filterClasses.BlockingFilter &&

	564 filter.contentType & typeMap.SUBDOCUMENT && parsed.hostname)

	565 {

	566 // Rules with a hostname part are still allowed to block document requests,

	567 // but we add an exception for top-level documents.

	568 //

	569 // Note that we can only do this if there's no "unless-domain" property for

	570 // now. This also only works in Safari 11 onwards, while older versions

	571 // simply ignore this property. Once Safari 11 becomes our minimum

	572 // supported version, we can merge "unless-domain" into "unless-top-url".

	573 addTopLevelException = true;

	574 excludeTopURLFromTrigger(trigger);

	575 }

290	576

291 rules.push({trigger: trigger, action: {type: action}});	577 rules.push({trigger: trigger, action: {type: action}});

292 }	578

293	579 if (needAltRules)

294 function hasNonASCI(obj)	580 {

295 {	581 // Generate additional rules for any alternative URL schemes.

296 if (typeof obj == "string")	582 for (let altRule of makeRuleCopies(trigger, {type: action}, urlSchemes))

297 {	583 {

298 if (/[^\x00-\x7F]/.test(obj))	584 if (addTopLevelException)

299 return true;	585 excludeTopURLFromTrigger(altRule.trigger);

300 }	586

301	587 rules.push(altRule);

302 if (typeof obj == "object")	588 }

303 {	589 }

304 if (obj instanceof Array)

305 for (let item of obj)

306 if (hasNonASCI(item))

307 return true;

308

309 let names = Object.getOwnPropertyNames(obj);

310 for (let name of names)

311 if (hasNonASCI(obj[name]))

312 return true;

313 }

314

315 return false;

316 }	590 }

317	591

318 function convertIDSelectorsToAttributeSelectors(selector)	592 function convertIDSelectorsToAttributeSelectors(selector)

319 {	593 {

320 // First we figure out where all the IDs are	594 // First we figure out where all the IDs are

321 let sep = "";	595 let sep = "";

322 let start = null;	596 let start = null;

323 let positions = [];	597 let positions = [];

324 for (let i = 0; i < selector.length; i++)	598 for (let i = 0; i < selector.length; i++)

325 {	599 {

(...skipping 33 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
359 {	633 {

360 newSelector.push(selector.substring(i, pos.start));	634 newSelector.push(selector.substring(i, pos.start));

361 newSelector.push('[id=', selector.substring(pos.start + 1, pos.end), ']');	635 newSelector.push('[id=', selector.substring(pos.start + 1, pos.end), ']');

362 i = pos.end;	636 i = pos.end;

363 }	637 }

364 newSelector.push(selector.substring(i));	638 newSelector.push(selector.substring(i));

365	639

366 return newSelector.join("");	640 return newSelector.join("");

367 }	641 }

368	642

	643 function addCSSRules(rules, selectors, domain, exceptionDomains)

	644 {

	645 let unlessDomain = exceptionDomains.size > 0 ? [] : null;

	646

	647 exceptionDomains.forEach(name =>

	648 {

	649 // For domain-specific filters, include the exception domains only if

	650 // they're subdomains of the given domain.

	651 if (!domain \|\| name.substr(-domain.length - 1) == "." + domain)

	652 unlessDomain.push("*" + name);

	653 });

	654

	655 while (selectors.length)

	656 {

	657 let selector = selectors.splice(0, selectorLimit).join(", ");

	658

	659 // As of Safari 9.0 element IDs are matched as lowercase. We work around

	660 // this by converting to the attribute format [id="elementID"]

	661 selector = convertIDSelectorsToAttributeSelectors(selector);

	662

	663 let rule = {

	664 trigger: {"url-filter": matchDomain(domain),

	665 "url-filter-is-case-sensitive": true},

	666 action: {type: "css-display-none",

	667 selector: selector}

	668 };

	669

	670 if (unlessDomain)

	671 rule.trigger["unless-domain"] = unlessDomain;

	672

	673 rules.push(rule);

	674 }

	675 }

	676

	677 /**

	678 * Check if two strings are a close match

	679 *

	680 * This function returns an edit operation, one of "substitute", "delete", and

	681 * "insert", along with an index in the source string where the edit must occur

	682 * in order to arrive at the target string. If the strings are not a close

	683 * match, it returns null.

	684 *

	685 * Two strings are considered to be a close match if they are one edit

	686 * operation apart.

	687 *

	688 * Deletions or insertions of a contiguous range of characters from one string

	689 * into the other, at the same index, are treated as a single edit. For

	690 * example, "internal" and "international" are considered to be one edit apart

	691 * and therefore a close match.

	692 *

	693 * A few things to note:

	694 *

	695 * 1) This function does not care about the format of the input strings. For

	696 * example, the caller may pass in regular expressions, where "[ab]" and

	697 * "[bc]" could be considered to be a close match, since the order within the

	698 * brackets doesn't matter. This function will still return null for this set

	699 * of inputs since they are two edits apart.

	700 *

	701 * 2) To be friendly to calling code that might be passing in regular

	702 * expressions, this function will simply return null if it encounters a

	703 * special character (e.g. "\", "?", "+", etc.) in the delta. For example,

	704 * given "Hello" and "Hello, how are you?", it will return null.

	705 *

	706 * 3) If the caller does indeed pass in regular expressions, it must make the

	707 * important assumption that the parts where two such regular expressions may

	708 * differ can always be treated as normal strings. For example,

	709 * "^https?://example.com/ads" and "^https?://example.com/adv" differ only in

	710 * the last character, therefore the regular expressions can safely be merged

	711 * into "^https?://example.com/ad[sv]".

	712 *

	713 * @param {string} s The source string

	714 * @param {string} t The target string

	715 *

	716 * @returns {object} An object describing the single edit operation that must

	717 * occur in the source string in order to arrive at the

	718 * target string

	719 */

369 function closeMatch(s, t)	720 function closeMatch(s, t)

370 {	721 {

371 // This function returns an edit operation, one of "substitute", "delete",
kzar 2017/05/08 08:13:03 Mind using the JSDoc syntax for the comment explai Mind using the JSDoc syntax for the comment explaining this function? Manish Jethani 2017/05/08 23:12:48 Done. Show quoted text On 2017/05/08 08:13:03, kzar wrote: > Mind using the JSDoc syntax for the comment explaining this function? Done.
372 // and "insert", along with an index in the source string where the edit must

373 // occur in order to arrive at the target string. If the strings are not a

374 // close match, it returns null.

375 //

376 // Two strings are considered to be a close match if they are one edit

377 // operation apart.

378 //

379 // Deletions or insertions of a contiguous range of characters from one

380 // string into the other, at the same index, are treated as a single edit.

381 // For example, "internal" and "international" are considered to be one edit

382 // apart and therefore a close match.

383

384 // A few things to note:

385 //

386 // 1) This function does not care about the format of the input strings.

387 // For example, the caller may pass in regular expressions, where "[ab]"

388 // and "[bc]" could be considered to be a close match, since the order

389 // within the brackets doesn't matter. This function will still return null

390 // for this set of inputs since they are two edits apart.

391 //

392 // 2) To be friendly to calling code that might be passing in regular

393 // expressions, this function will simply return null if it encounters a

394 // special character (e.g. "\", "?", "+", etc.) in the delta. For example,

395 // given "Hello" and "Hello, how are you?", it will return null.

396 //

397 // 3) If the caller does indeed pass in regular expressions, it must make

398 // the important assumption that the parts where two such regular

399 // expressions may differ can always be treated as normal strings. For

400 // example, "^https?://./ads" and "^https?://./adv" differ only in the

401 // last character, therefore the regular expressions can safely be merged

402 // into "^https?://.*/ad[sv]".

403

404 let diff = s.length - t.length;	722 let diff = s.length - t.length;

405	723

406 // If target is longer than source, swap them for the purpose of our	724 // If target is longer than source, swap them for the purpose of our

407 // calculation.	725 // calculation.

408 if (diff < 0)	726 if (diff < 0)

409 {	727 {

410 let tmp = s;	728 let tmp = s;

411 s = t;	729 s = t;

412 t = tmp;	730 t = tmp;

413 }	731 }

414	732

415 let edit = null;	733 let edit = null;

416	734

417 let i = 0, j = 0;	735 let i = 0;

418	736 let j = 0;

	737

	738 // Start from the beginning and keep going until we hit a character that

	739 // doesn't match.

419 for (; i < s.length; i++)	740 for (; i < s.length; i++)

420 {	741 {

421 if (s[i] != t[i])	742 if (s[i] != t[i])

422 break;	743 break;

423 }	744 }

424	745

	746 // Now do exactly the same from the end, but also stop if we reach the

	747 // position where we terminated the previous loop.

425 for (; j < t.length; j++)	748 for (; j < t.length; j++)

426 {	749 {

427 if (t.length - j == i \|\| s[s.length - j - 1] != t[t.length - j - 1])	750 if (t.length - j == i \|\| s[s.length - j - 1] != t[t.length - j - 1])
kzar 2017/05/08 08:13:02 I find this part hard to grok, could you add a com I find this part hard to grok, could you add a comment? Manish Jethani 2017/05/08 23:12:48 Done. Show quoted text On 2017/05/08 08:13:02, kzar wrote: > I find this part hard to grok, could you add a comment? Done.
428 break;	751 break;

429 }	752 }

430	753

431 if (diff == 0)	754 if (diff == 0)

432 {	755 {

	756 // If the strings are equal in length and the delta isn't exactly one

	757 // character, it's not a close match.

433 if (t.length - j - i != 1)	758 if (t.length - j - i != 1)

434 return null;	759 return null;

435 }	760 }

436 else if (i != t.length - j)	761 else if (i != t.length - j)

437 {	762 {

	763 // For strings of unequal length, if we haven't found a match for every

	764 // single character in the shorter string counting from both the beginning

	765 // and the end, it's not a close match.

438 return null;	766 return null;

439 }	767 }

440	768

441 for (let k = i; k < s.length - j; k++)	769 for (let k = i; k < s.length - j; k++)

442 {	770 {

443 // If there are any special characters in the delta, bail.	771 // If the delta contains any special characters, it's not a close match.
kzar 2017/05/08 08:13:02 Nit: IMO this comment doesn't add much. Nit: IMO this comment doesn't add much. Manish Jethani 2017/05/08 23:12:48 I'm slightly rephrasing here, but since I've added Show quoted text On 2017/05/08 08:13:02, kzar wrote: > Nit: IMO this comment doesn't add much. I'm slightly rephrasing here, but since I've added comments all over the function now, I'll leave this here. kzar 2017/05/09 10:05:46 Fair enough. Thanks for those extra comments, they Show quoted text On 2017/05/08 23:12:48, Manish Jethani wrote: > On 2017/05/08 08:13:02, kzar wrote: > > Nit: IMO this comment doesn't add much. > > I'm slightly rephrasing here, but since I've added comments all over the > function now, I'll leave this here. Fair enough. Thanks for those extra comments, they helped me grok the algorithm.
444 if (s[k] == "." \|\| s[k] == "+" \|\| s[k] == "$" \|\| s[k] == "?" \|\|	772 if (s[k] == "." \|\| s[k] == "+" \|\| s[k] == "$" \|\| s[k] == "?" \|\|

445 s[k] == "{" \|\| s[k] == "}" \|\| s[k] == "(" \|\| s[k] == ")" \|\|	773 s[k] == "{" \|\| s[k] == "}" \|\| s[k] == "(" \|\| s[k] == ")" \|\|

446 s[k] == "[" \|\| s[k] == "]" \|\| s[k] == "\\")	774 s[k] == "[" \|\| s[k] == "]" \|\| s[k] == "\\")

447 return null;	775 return null;

448 }	776 }

449	777

450 if (diff == 0)	778 if (diff == 0)

451 {	779 {

452 edit = {type: "substitute", index: i};	780 edit = {type: "substitute", index: i};

453 }	781 }

454 else if (diff > 0)	782 else if (diff > 0)

455 {	783 {

456 edit = {type: "delete", index: i};	784 edit = {type: "delete", index: i};

457	785

458 if (diff > 1)	786 if (diff > 1)

459 edit.endIndex = s.length - j;	787 edit.endIndex = s.length - j;

460 }	788 }

461 else	789 else

462 {	790 {

463 edit = {type: "insert", index: i};	791 edit = {type: "insert", index: i};

464	792

465 if (diff < -1)	793 if (diff < -1)

466 edit.endIndex = s.length - j;	794 edit.endIndex = s.length - j;

467 }	795 }

468	796

469 return edit;	797 return edit;

470 }	798 }

471	799

472 function eliminateRedundantRulesByURLFilter(rulesInfo)	800 function eliminateRedundantRulesByURLFilter(rulesInfo, exhaustive)

473 {	801 {

474 for (let i = 0; i < rulesInfo.length; i++)	802 const heuristicRange = 1000;

	803

	804 let ol = rulesInfo.length;

	805

	806 // Throw out obviously redundant rules.

	807 return async(rulesInfo, (ruleInfo, index) => () =>

475 {	808 {

476 // If this rule is already marked as redundant, don't bother comparing it	809 // If this rule is already marked as redundant, don't bother comparing it

477 // with other rules.	810 // with other rules.

478 if (rulesInfo[i].redundant)	811 if (rulesInfo[index].redundant)

479 continue;	812 return;

480	813

481 for (let j = i + 1; j < rulesInfo.length; j++)	814 let limit = exhaustive ? rulesInfo.length :

	815 Math.min(index + heuristicRange, rulesInfo.length);

	816

	817 for (let i = index, j = i + 1; j < limit; j++)

482 {	818 {

483 if (rulesInfo[j].redundant)	819 if (rulesInfo[j].redundant)

484 continue;	820 continue;

485	821

486 let source = rulesInfo[i].rule.trigger["url-filter"];	822 let source = rulesInfo[i].rule.trigger["url-filter"];

487 let target = rulesInfo[j].rule.trigger["url-filter"];	823 let target = rulesInfo[j].rule.trigger["url-filter"];

488	824

489 if (source.length >= target.length)	825 if (source.length >= target.length)

490 {	826 {

491 // If one URL filter is a substring of the other starting at the	827 // If one URL filter is a substring of the other starting at the

492 // beginning, the other one is clearly redundant.	828 // beginning, the other one is clearly redundant.

493 if (source.substring(0, target.length) == target)	829 if (source.substring(0, target.length) == target)

494 {	830 {

495 rulesInfo[i].redundant = true;	831 rulesInfo[i].redundant = true;

496 break;	832 break;

497 }	833 }

498 }	834 }

499 else if (target.substring(0, source.length) == source)	835 else if (target.substring(0, source.length) == source)

500 {	836 {

501 rulesInfo[j].redundant = true;	837 rulesInfo[j].redundant = true;

502 }	838 }

503 }	839 }

504 }	840 })

505	841 .then(() => rulesInfo.filter(ruleInfo => !ruleInfo.redundant));

506 return rulesInfo.filter(ruleInfo => !ruleInfo.redundant);	842 }

507 }	843

508	844 function findMatchesForRuleByURLFilter(rulesInfo, index, exhaustive)

509 function mergeRulesByURLFilter(rulesInfo, exhaustive)

510 {	845 {

511 // Closely matching rules are likely to be within a certain range. We only	846 // Closely matching rules are likely to be within a certain range. We only

512 // look for matches within this range. If we increase this value, it can give	847 // look for matches within this range by default. If we increase this value,

513 // us more matches and a smaller resulting rule set, but possibly at a	848 // it can give us more matches and a smaller resulting rule set, but possibly

514 // significant performance cost.	849 // at a significant performance cost.

515 const heuristicRange = 10;	850 //
kzar 2017/05/08 08:13:03 Could you mention about the behaviour (or lack the Could you mention about the behaviour (or lack thereof) of heursiticRange when exhaustive is true in this comment? Manish Jethani 2017/05/08 23:12:48 Done. Show quoted text On 2017/05/08 08:13:03, kzar wrote: > Could you mention about the behaviour (or lack thereof) of heursiticRange when > exhaustive is true in this comment? Done.
516	851 // If the exhaustive option is true, we simply ignore this value and look for

517 if (exhaustive)	852 // matches throughout the rule set.

518 // Throw out obviously redundant rules.	853 const heuristicRange = 1000;
kzar 2017/05/08 08:13:02 Nit: Please surround with braces since it spans mu Nit: Please surround with braces since it spans multiple lines. Manish Jethani 2017/05/08 23:12:48 Done. Show quoted text On 2017/05/08 08:13:02, kzar wrote: > Nit: Please surround with braces since it spans multiple lines. Done.
519 rulesInfo = eliminateRedundantRulesByURLFilter(rulesInfo);	854

520	855 let limit = exhaustive ? rulesInfo.length :

521 if (rulesInfo.length <= 1)	856 Math.min(index + heuristicRange, rulesInfo.length);

522 return;	857

523	858 for (let i = index, j = i + 1; j < limit; j++)

524 for (let i = 0; i < rulesInfo.length; i++)	859 {

525 {	860 let source = rulesInfo[i].rule.trigger["url-filter"];

526 let limit = exhaustive ? rulesInfo.length :	861 let target = rulesInfo[j].rule.trigger["url-filter"];

527 Math.min(i + heuristicRange, rulesInfo.length);	862

528	863 let edit = closeMatch(source, target);

529 for (let j = i + 1; j < limit; j++)	864

530 {	865 if (edit)

531 let source = rulesInfo[i].rule.trigger["url-filter"];	866 {

532 let target = rulesInfo[j].rule.trigger["url-filter"];	867 let urlFilter, ruleInfo, match = {edit};

533	868

534 let edit = closeMatch(source, target);	869 if (edit.type == "insert")

535	870 {

536 if (edit)	871 // Convert the insertion into a deletion and stick it on the target

537 {	872 // rule instead. We can only group deletions and substitutions;

538 let urlFilter, ruleInfo, match = {edit};	873 // therefore insertions must be treated as deletions on the target

539	874 // rule.

540 if (edit.type == "insert")	875 urlFilter = target;

	876 ruleInfo = rulesInfo[j];

	877 match.index = i;

	878 edit.type = "delete";

	879 }

	880 else

	881 {

	882 urlFilter = source;

	883 ruleInfo = rulesInfo[i];

	884 match.index = j;

	885 }

	886

	887 // If the edit has an end index, it represents a multiple character

	888 // edit.

	889 let multiEdit = !!edit.endIndex;

	890

	891 if (multiEdit)

	892 {

	893 // We only care about a single multiple character edit because the

	894 // number of characters for such a match doesn't matter, we can

	895 // only merge with one other rule.

	896 if (!ruleInfo.multiEditMatch)

	897 ruleInfo.multiEditMatch = match;

	898 }

	899 else

	900 {

	901 // For single character edits, multiple rules can be merged into

	902 // one. e.g. "ad", "ads", and "adv" can be merged into "ad[sv]?".

	903 if (!ruleInfo.matches)

	904 ruleInfo.matches = new Array(urlFilter.length);

	905

	906 // Matches at a particular index. For example, for a source string

	907 // "ads", both target strings "ad" (deletion) and "adv"

	908 // (substitution) match at index 2, hence they are grouped together

	909 // to possibly be merged later into "ad[sv]?".

	910 let matchesForIndex = ruleInfo.matches[edit.index];

	911

	912 if (matchesForIndex)

541 {	913 {

542 // Convert the insertion into a deletion and stick it on the target	914 matchesForIndex.push(match);

543 // rule instead. We can only group deletions and substitutions;

544 // therefore insertions must be treated as deletions on the target

545 // rule.

546 urlFilter = target;

547 ruleInfo = rulesInfo[j];

548 match.index = i;

549 edit.type = "delete";

550 }	915 }

551 else	916 else

552 {	917 {

553 urlFilter = source;	918 matchesForIndex = [match];

554 ruleInfo = rulesInfo[i];	919 ruleInfo.matches[edit.index] = matchesForIndex;

555 match.index = j;

556 }	920 }

557	921

558 // If the edit has an end index, it represents a multiple character	922 // Keep track of the best set of matches. We later sort by this to

559 // edit.	923 // get best results.

560 let multiEdit = !!edit.endIndex;	924 if (!ruleInfo.bestMatches \|\|

561	925 matchesForIndex.length > ruleInfo.bestMatches.length)

562 if (multiEdit)	926 ruleInfo.bestMatches = matchesForIndex;

563 {

564 // We only care about a single multiple character edit because the

565 // number of characters for such a match doesn't matter, we can

566 // only merge with one other rule.

567 if (!ruleInfo.multiEditMatch)

568 ruleInfo.multiEditMatch = match;

569 }

570 else

571 {

572 // For single character edits, multiple rules can be merged into

573 // one. e.g. "ad", "ads", and "adv" can be merged into "ad[sv]?".

574 if (!ruleInfo.matches)

575 ruleInfo.matches = new Array(urlFilter.length);

576

577 // Matches at a particular index. For example, for a source string

578 // "ads", both target strings "ad" (deletion) and "adv"

579 // (substitution) match at index 2, hence they are grouped together

580 // to possibly be merged later into "ad[sv]?".

581 let matchesForIndex = ruleInfo.matches[edit.index];

582

583 if (matchesForIndex)

584 {

585 matchesForIndex.push(match);

586 }

587 else

588 {

589 matchesForIndex = [match];

590 ruleInfo.matches[edit.index] = matchesForIndex;

591 }

592

593 // Keep track of the best set of matches. We later sort by this to

594 // get best results.

595 if (!ruleInfo.bestMatches \|\|

596 matchesForIndex.length > ruleInfo.bestMatches.length)

597 ruleInfo.bestMatches = matchesForIndex;

598 }

599 }	927 }

600 }	928 }

601 }	929 }

602	930 }

	931

	932 function mergeCandidateRulesByURLFilter(rulesInfo)

	933 {

603 // Filter out rules that have no matches at all.	934 // Filter out rules that have no matches at all.

604 let candidateRulesInfo = rulesInfo.filter(ruleInfo =>	935 let candidateRulesInfo = rulesInfo.filter(ruleInfo =>

605 {	936 {

606 return ruleInfo.bestMatches \|\| ruleInfo.multiEditMatch	937 return ruleInfo.bestMatches \|\| ruleInfo.multiEditMatch

607 });	938 });

608	939

609 // For best results, we have to sort the candidates by the largest set of	940 // For best results, we have to sort the candidates by the largest set of

610 // matches.	941 // matches.

611 //	942 //

612 // For example, we want "ads", "bds", "adv", "bdv", "adx", and "bdx" to	943 // For example, we want "ads", "bds", "adv", "bdv", "adx", and "bdx" to

(...skipping 48 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
661 if (best.length > 0)	992 if (best.length > 0)

662 {	993 {

663 let urlFilter = rule.trigger["url-filter"];	994 let urlFilter = rule.trigger["url-filter"];

664	995

665 let editIndex = best[0].edit.index;	996 let editIndex = best[0].edit.index;

666	997

667 if (!multiEdit)	998 if (!multiEdit)

668 {	999 {

669 // Merge all the matching rules into this one.	1000 // Merge all the matching rules into this one.

670	1001

671 let characters = [];	1002 let characters = [urlFilter[editIndex]];

672 let quantifier = "";	1003 let quantifier = "";

673	1004

674 for (let match of best)	1005 for (let match of best)

675 {	1006 {

676 if (match.edit.type == "delete")	1007 if (match.edit.type == "delete")

677 {	1008 {

678 quantifier = "?";	1009 quantifier = "?";

679 }	1010 }

680 else	1011 else

681 {	1012 {

682 let character = rulesInfo[match.index].rule	1013 let character = rulesInfo[match.index].rule

683 .trigger["url-filter"][editIndex];	1014 .trigger["url-filter"][editIndex];

684 characters.push(character);	1015

	1016 // Insert any hyphen at the beginning so it gets interpreted as a

	1017 // literal hyphen.

	1018 if (character == "-")

	1019 characters.unshift(character);

	1020 else

	1021 characters.push(character);

685 }	1022 }

686	1023

687 // Mark the target rule as merged so other rules don't try to merge	1024 // Mark the target rule as merged so other rules don't try to merge

688 // it again.	1025 // it again.

689 rulesInfo[match.index].merged = true;	1026 rulesInfo[match.index].merged = true;

690 }	1027 }

691	1028

692 urlFilter = urlFilter.substring(0, editIndex + 1) + quantifier +	1029 urlFilter = urlFilter.substring(0, editIndex + 1) + quantifier +

693 urlFilter.substring(editIndex + 1);	1030 urlFilter.substring(editIndex + 1);

694 if (characters.length > 0)	1031 if (characters.length > 1)

695 {	1032 {

696 urlFilter = urlFilter.substring(0, editIndex) + "[" +	1033 urlFilter = urlFilter.substring(0, editIndex) + "[" +

697 urlFilter[editIndex] + characters.join("") + "]" +	1034 characters.join("") + "]" +

698 urlFilter.substring(editIndex + 1);	1035 urlFilter.substring(editIndex + 1);

699 }	1036 }

700 }	1037 }

701 else	1038 else

702 {	1039 {

703 let editEndIndex = best[0].edit.endIndex;	1040 let editEndIndex = best[0].edit.endIndex;

704	1041

705 // Mark the target rule as merged so other rules don't try to merge it	1042 // Mark the target rule as merged so other rules don't try to merge it

706 // again.	1043 // again.

707 rulesInfo[best[0].index].merged = true;	1044 rulesInfo[best[0].index].merged = true;

708	1045

709 urlFilter = urlFilter.substring(0, editIndex) + "(" +	1046 urlFilter = urlFilter.substring(0, editIndex) + "(" +

710 urlFilter.substring(editIndex, editEndIndex) + ")?" +	1047 urlFilter.substring(editIndex, editEndIndex) + ")?" +

711 urlFilter.substring(editEndIndex);	1048 urlFilter.substring(editEndIndex);

712 }	1049 }

713	1050

714 rule.trigger["url-filter"] = urlFilter;	1051 rule.trigger["url-filter"] = urlFilter;

715	1052

716 // Mark this rule as one that has had other rules merged into it.	1053 // Mark this rule as one that has had other rules merged into it.

717 ruleInfo.mergedInto = true;	1054 ruleInfo.mergedInto = true;

718 }	1055 }

719 }	1056 }

720 }	1057 }

721	1058

	1059 function mergeRulesByURLFilter(rulesInfo, exhaustive)

	1060 {

	1061 return async(rulesInfo, (ruleInfo, index) => () =>

	1062 findMatchesForRuleByURLFilter(rulesInfo, index, exhaustive)

	1063 )

	1064 .then(() => mergeCandidateRulesByURLFilter(rulesInfo));

	1065 }

	1066

722 function mergeRulesByArrayProperty(rulesInfo, propertyType, property)	1067 function mergeRulesByArrayProperty(rulesInfo, propertyType, property)

723 {	1068 {

724 if (rulesInfo.length <= 1)	1069 if (rulesInfo.length <= 1)

725 return;	1070 return;

726	1071

727 let set = new Set();	1072 let valueSet = new Set(rulesInfo[0].rule[propertyType][property]);

728	1073

729 rulesInfo.forEach((ruleInfo, index) =>	1074 for (let i = 1; i < rulesInfo.length; i++)

730 {	1075 {

731 if (ruleInfo.rule[propertyType][property])	1076 for (let value of rulesInfo[i].rule[propertyType][property] \|\| [])

732 {	1077 valueSet.add(value);

733 for (let value of ruleInfo.rule[propertyType][property])	1078

734 set.add(value);	1079 rulesInfo[i].merged = true;

735 }	1080 }

736	1081

737 if (index > 0)	1082 if (valueSet.size > 0)

738 ruleInfo.merged = true;	1083 rulesInfo[0].rule[propertyType][property] = Array.from(valueSet);

739 });

740

741 if (set.size > 0)

742 rulesInfo[0].rule[propertyType][property] = Array.from(set);

743	1084

744 rulesInfo[0].mergedInto = true;	1085 rulesInfo[0].mergedInto = true;

745 }	1086 }

746	1087

747 function groupRulesByMergeableProperty(rulesInfo, propertyType, property)	1088 function groupRulesByMergeableProperty(rulesInfo, propertyType, property)

748 {	1089 {

749 let mergeableRulesInfoByGroup = new Map();	1090 let mergeableRulesInfoByGroup = new Map();

750	1091

751 rulesInfo.forEach(ruleInfo =>	1092 for (let ruleInfo of rulesInfo)

752 {	1093 {

753 let copy = {	1094 let copy = {

754 trigger: Object.assign({}, ruleInfo.rule.trigger),	1095 trigger: Object.assign({}, ruleInfo.rule.trigger),

755 action: Object.assign({}, ruleInfo.rule.action)	1096 action: Object.assign({}, ruleInfo.rule.action)

756 };	1097 };

757	1098

758 delete copy[propertyType][property];	1099 delete copy[propertyType][property];

759	1100

760 let groupKey = JSON.stringify(copy);	1101 let groupKey = JSON.stringify(copy);

761	1102

762 let mergeableRulesInfo = mergeableRulesInfoByGroup.get(groupKey);	1103 let mergeableRulesInfo = mergeableRulesInfoByGroup.get(groupKey);

763	1104

764 if (mergeableRulesInfo)	1105 if (mergeableRulesInfo)

765 mergeableRulesInfo.push(ruleInfo);	1106 mergeableRulesInfo.push(ruleInfo);

766 else	1107 else

767 mergeableRulesInfoByGroup.set(groupKey, [ruleInfo]);	1108 mergeableRulesInfoByGroup.set(groupKey, [ruleInfo]);

768 });	1109 }

769	1110

770 return mergeableRulesInfoByGroup;	1111 return mergeableRulesInfoByGroup;

771 }	1112 }

772	1113

773 function mergeRules(rules, options)	1114 function mergeRules(rules, exhaustive)

774 {	1115 {

775 const defaultOptions = {exhaustive: false};

776

777 options = Object.assign({}, defaultOptions, options);

778

779 let rulesInfo = rules.map(rule => ({rule}));	1116 let rulesInfo = rules.map(rule => ({rule}));
kzar 2017/05/08 08:13:02 What's the purpose of this line? What's the purpose of this line? Manish Jethani 2017/05/08 14:03:59 The purpose of this line is to create one "ruleInf Show quoted text On 2017/05/08 08:13:02, kzar wrote: > What's the purpose of this line? The purpose of this line is to create one "ruleInfo" object for each rule, where the ruleInfo object has one property, "rule", pointing to the rule it belongs to. Essentially this: let rulesInfo = new Array(rules.length); for (let i = 0; i < rules.length; i++) rulesInfo[i] = {rule: rule}; kzar 2017/05/09 10:05:46 Argh, sorry that was an especially dumb question. Show quoted text On 2017/05/08 14:03:59, Manish Jethani wrote: > On 2017/05/08 08:13:02, kzar wrote: > > What's the purpose of this line? > > The purpose of this line is to create one "ruleInfo" object for each rule, where > the ruleInfo object has one property, "rule", pointing to the rule it belongs > to. > > Essentially this: > > let rulesInfo = new Array(rules.length); > for (let i = 0; i < rules.length; i++) > rulesInfo[i] = {rule: rule}; Argh, sorry that was an especially dumb question. In my defence a cat jumped on my head at 4am that morning, I couldn't get back to sleep and was kind of dazed all day! Manish Jethani 2017/05/09 15:52:46 Haha, no worries :) Show quoted text On 2017/05/09 10:05:46, kzar wrote: > Argh, sorry that was an especially dumb question. In my defence a cat jumped on > my head at 4am that morning, I couldn't get back to sleep and was kind of dazed > all day! Haha, no worries :)
780	1117

781 groupRulesByMergeableProperty(rulesInfo, "trigger", "url-filter")	1118 let arrayPropertiesToMergeBy = ["resource-type", "if-domain"];

782 .forEach(mergeableRulesInfo =>	1119
kzar 2017/05/08 08:13:03 Any reason you used forEach instead of for ... of Any reason you used forEach instead of for ... of ... here (and below)? We prefer the latter unless there's a good reason. Oh, there is one problem with that though, the JS Hydra transpiler produces code that evaluates the right hand part of for ... of ... loops for each iteration. So bear that in mind if you do use those, we often assigned the value to a variable first to avoid expensive operations from being performed repeatedly! Manish Jethani 2017/05/08 14:04:00 It was for (let value of map.values()) { ... } at Show quoted text On 2017/05/08 08:13:03, kzar wrote: > Any reason you used forEach instead of for ... of ... here (and below)? We > prefer the latter unless there's a good reason. It was for (let value of map.values()) { ... } at first, but as you said JS Hydra can't handle non-array iterables. It looks for the length property on the iterable, trying to convert this into a regular for loop with an index. i.e. for (let i = 0; i < map.values().length; i++) { let value = map.values()[i]; ... } The only other way to do this is: let mapValues = Array.from(map.values()); for (let value of mapValues) { ... } If we simply call the forEach method on the map object, that does the job as well, without creating another intermediate array. I thought this was better because these intermediate arrays would again take up more memory. kzar 2017/05/09 10:05:46 Well groupRulesByMergeableProperty creates the arr Show quoted text On 2017/05/08 14:04:00, Manish Jethani wrote: > On 2017/05/08 08:13:03, kzar wrote: > > Any reason you used forEach instead of for ... of ... here (and below)? We > > prefer the latter unless there's a good reason. > > It was for (let value of map.values()) { ... } at first, but as you said JS > Hydra can't handle non-array iterables. It looks for the length property on the > iterable, trying to convert this into a regular for loop with an index. > > i.e. for (let i = 0; i < map.values().length; i++) { let value = > map.values()[i]; ... } > > The only other way to do this is: > > let mapValues = Array.from(map.values()); > for (let value of mapValues) { ... } > > If we simply call the forEach method on the map object, that does the job as > well, without creating another intermediate array. I thought this was better > because these intermediate arrays would again take up more memory. Well groupRulesByMergeableProperty creates the array either way, so assuming you didn't keep a reference to that after the second array is created I don't see that it would use more memory. That said I don't care too much and IMO this is a pretty reasonable use of forEach. Manish Jethani 2017/05/09 15:52:46 The intermediate array here would an array of the Show quoted text On 2017/05/09 10:05:46, kzar wrote: > On 2017/05/08 14:04:00, Manish Jethani wrote: > > On 2017/05/08 08:13:03, kzar wrote: > > > Any reason you used forEach instead of for ... of ... here (and below)? We > > > prefer the latter unless there's a good reason. > > > > It was for (let value of map.values()) { ... } at first, but as you said JS > > Hydra can't handle non-array iterables. It looks for the length property on > the > > iterable, trying to convert this into a regular for loop with an index. > > > > i.e. for (let i = 0; i < map.values().length; i++) { let value = > > map.values()[i]; ... } > > > > The only other way to do this is: > > > > let mapValues = Array.from(map.values()); > > for (let value of mapValues) { ... } > > > > If we simply call the forEach method on the map object, that does the job as > > well, without creating another intermediate array. I thought this was better > > because these intermediate arrays would again take up more memory. > > Well groupRulesByMergeableProperty creates the array either way, so assuming you > didn't keep a reference to that after the second array is created I don't see > that it would use more memory. That said I don't care too much and IMO this is a > pretty reasonable use of forEach. The intermediate array here would an array of the arrays created inside groupRulesByMergeableProperty. Since you brought this up, I noticed that we're using a forEach in that function itself where we could use for..of. I've changed it now both in groupRulesByMergeableProperty and mergeRulesByArrayProperty.
783 {	1120 return async(() =>

784 if (mergeableRulesInfo.length > 1)	1121 {

785 mergeRulesByURLFilter(mergeableRulesInfo, options.exhaustive);	1122 let map = groupRulesByMergeableProperty(rulesInfo, "trigger", "url-filter");

786 });	1123 return async(map.values(), mergeableRulesInfo => () =>

787	1124 eliminateRedundantRulesByURLFilter(mergeableRulesInfo, exhaustive)

788 // Filter out rules that are redundant or have been merged into other rules.	1125 .then(rulesInfo => mergeRulesByURLFilter(rulesInfo, exhaustive))

789 rulesInfo = rulesInfo.filter(ruleInfo => !ruleInfo.redundant &&	1126 )

790 !ruleInfo.merged);	1127 .then(() =>

791	1128 {

792 for (let arrayProperty of ["resource-type", "if-domain"])	1129 // Filter out rules that are redundant or have been merged into other

793 {	1130 // rules.

794 groupRulesByMergeableProperty(rulesInfo, "trigger", arrayProperty)	1131 rulesInfo = rulesInfo.filter(ruleInfo => !ruleInfo.redundant &&

795 .forEach(mergeableRulesInfo =>	1132 !ruleInfo.merged);
kzar 2017/05/08 08:13:02 This logic seems pretty much the same as above, I This logic seems pretty much the same as above, I wonder if we could avoid duplicating it somehow? Manish Jethani 2017/05/08 14:03:59 I suppose we could merge them into one, but then w Show quoted text On 2017/05/08 08:13:02, kzar wrote: > This logic seems pretty much the same as above, I wonder if we could avoid > duplicating it somehow? I suppose we could merge them into one, but then we'd end up with an if...else or a function lookup inside that loop. They're also sufficiently different that I thought it made sense to keep them separate. The first one is really expensive, the second one is relatively very cheap. I could still merge them if you feel like that would be better. We'll also have to check performance again. I've found making small changes can hurt performance in a big way. For example, this: obj[flag ? "prop1" : "prop2"] = value; Is way, way slower than this: if (flag) obj.prop1 = value; else obj.prop2 = value; For whatever reason. kzar 2017/05/09 10:05:46 Acknowledged. Show quoted text On 2017/05/08 14:03:59, Manish Jethani wrote: > On 2017/05/08 08:13:02, kzar wrote: > > This logic seems pretty much the same as above, I wonder if we could avoid > > duplicating it somehow? > > I suppose we could merge them into one, but then we'd end up with an if...else > or a function lookup inside that loop. They're also sufficiently different that > I thought it made sense to keep them separate. The first one is really > expensive, the second one is relatively very cheap. > > I could still merge them if you feel like that would be better. We'll also have > to check performance again. I've found making small changes can hurt performance > in a big way. > > For example, this: > > obj[flag ? "prop1" : "prop2"] = value; > > Is way, way slower than this: > > if (flag) > obj.prop1 = value; > else > obj.prop2 = value; > > For whatever reason. Acknowledged.
796 {

797 if (mergeableRulesInfo.length > 1)

798 mergeRulesByArrayProperty(mergeableRulesInfo, "trigger", arrayProperty);

799 });	1133 });

800	1134 })

801 rulesInfo = rulesInfo.filter(ruleInfo => !ruleInfo.merged);	1135 .then(() => async(arrayPropertiesToMergeBy, arrayProperty => () =>
kzar 2017/05/08 08:13:02 Maybe a dumb question, but instead of setting the Maybe a dumb question, but instead of setting the merged or redundant property on ruleInfo Objects and then filtering those away couldn't we just delete them immediately instead? Manish Jethani 2017/05/08 14:03:58 I could try that, but then we'd have to adjust the Show quoted text On 2017/05/08 08:13:02, kzar wrote: > Maybe a dumb question, but instead of setting the merged or redundant property > on ruleInfo Objects and then filtering those away couldn't we just delete them > immediately instead? I could try that, but then we'd have to adjust the indexes i and j every time we delete something from the array. Also deleting individual elements from an array should be way more expensive than marking them first and then filtering them out later using the filter method. kzar 2017/05/09 10:05:46 Acknowledged. Show quoted text On 2017/05/08 14:03:58, Manish Jethani wrote: > On 2017/05/08 08:13:02, kzar wrote: > > Maybe a dumb question, but instead of setting the merged or redundant property > > on ruleInfo Objects and then filtering those away couldn't we just delete them > > immediately instead? > > I could try that, but then we'd have to adjust the indexes i and j every time we > delete something from the array. Also deleting individual elements from an array > should be way more expensive than marking them first and then filtering them out > later using the filter method. Acknowledged.
802 }	1136 {

803	1137 let map = groupRulesByMergeableProperty(rulesInfo, "trigger",

804 return rulesInfo.map(ruleInfo => ruleInfo.rule);	1138 arrayProperty);

	1139 return async(map.values(), mergeableRulesInfo => () =>

	1140 mergeRulesByArrayProperty(mergeableRulesInfo, "trigger", arrayProperty)

	1141 )

	1142 .then(() =>

	1143 {

	1144 rulesInfo = rulesInfo.filter(ruleInfo => !ruleInfo.merged);

	1145 });

	1146 }))

	1147 .then(() => rulesInfo.map(ruleInfo => ruleInfo.rule));

805 }	1148 }

806	1149

807 let ContentBlockerList =	1150 let ContentBlockerList =

808 /**	1151 /**

809 * Create a new Adblock Plus filter to content blocker list converter	1152 * Create a new Adblock Plus filter to content blocker list converter

810 *	1153 *

	1154 * @param {object} options Options for content blocker list generation

	1155 *

811 * @constructor	1156 * @constructor

812 */	1157 */

813 exports.ContentBlockerList = function ()	1158 exports.ContentBlockerList = function (options)

814 {	1159 {

	1160 const defaultOptions = {

	1161 merge: "auto"

	1162 };

	1163

	1164 this.options = Object.assign({}, defaultOptions, options);

	1165

815 this.requestFilters = [];	1166 this.requestFilters = [];

816 this.requestExceptions = [];	1167 this.requestExceptions = [];

817 this.elemhideFilters = [];	1168 this.elemhideFilters = [];

818 this.elemhideExceptions = [];	1169 this.elemhideExceptions = [];

	1170 this.genericblockExceptions = [];

	1171 this.generichideExceptions = [];

819 this.elemhideSelectorExceptions = new Map();	1172 this.elemhideSelectorExceptions = new Map();

820 };	1173 };

821	1174

822 /**	1175 /**

823 * Add Adblock Plus filter to be converted	1176 * Add Adblock Plus filter to be converted

824 *	1177 *

825 * @param {Filter} filter Filter to convert	1178 * @param {Filter} filter Filter to convert

826 */	1179 */

827 ContentBlockerList.prototype.addFilter = function(filter)	1180 ContentBlockerList.prototype.addFilter = function(filter)

828 {	1181 {

829 if (filter.sitekeys)	1182 if (filter.sitekeys)

830 return;	1183 return;

831 if (filter instanceof filterClasses.RegExpFilter &&	1184 if (filter instanceof filterClasses.RegExpFilter &&

832 filter.regexpSource == null)	1185 filter.regexpSource == null)

833 return;	1186 return;

834	1187

835 if (filter instanceof filterClasses.BlockingFilter)	1188 if (filter instanceof filterClasses.BlockingFilter)

836 this.requestFilters.push(filter);	1189 this.requestFilters.push(filter);

837	1190

838 if (filter instanceof filterClasses.WhitelistFilter)	1191 if (filter instanceof filterClasses.WhitelistFilter)

839 {	1192 {

840 if (filter.contentType & (typeMap.DOCUMENT \| whitelistableRequestTypes))	1193 if (filter.contentType & (typeMap.DOCUMENT \| whitelistableRequestTypes))

841 this.requestExceptions.push(filter);	1194 this.requestExceptions.push(filter);

842	1195

843 if (filter.contentType & typeMap.ELEMHIDE)	1196 if (filter.contentType & typeMap.GENERICBLOCK)

844 this.elemhideExceptions.push(filter);	1197 this.genericblockExceptions.push(filter);

	1198

	1199 if (filter.contentType & typeMap.ELEMHIDE)

	1200 this.elemhideExceptions.push(filter);

	1201 else if (filter.contentType & typeMap.GENERICHIDE)

	1202 this.generichideExceptions.push(filter);

845 }	1203 }

846	1204

847 if (filter instanceof filterClasses.ElemHideFilter)	1205 if (filter instanceof filterClasses.ElemHideFilter)

848 this.elemhideFilters.push(filter);	1206 this.elemhideFilters.push(filter);

849	1207

850 if (filter instanceof filterClasses.ElemHideException)	1208 if (filter instanceof filterClasses.ElemHideException)

851 {	1209 {

852 let domains = this.elemhideSelectorExceptions[filter.selector];	1210 let domains = this.elemhideSelectorExceptions[filter.selector];

853 if (!domains)	1211 if (!domains)

854 domains = this.elemhideSelectorExceptions[filter.selector] = [];	1212 domains = this.elemhideSelectorExceptions[filter.selector] = [];

855	1213

856 parseDomains(filter.domains, domains, []);	1214 parseDomains(filter.domains, domains, []);

857 }	1215 }

858 };	1216 };

859	1217

860 /**	1218 /**

861 * Generate content blocker list for all filters that were added	1219 * Generate content blocker list for all filters that were added

862 *

863 * @returns {Filter} filter Filter to convert

864 */	1220 */

865 ContentBlockerList.prototype.generateRules = function(options)	1221 ContentBlockerList.prototype.generateRules = function()

866 {	1222 {

867 const defaultOptions = {	1223 let cssRules = [];

868 merge: false,	1224 let cssExceptionRules = [];

869 exhaustiveMerge: false	1225 let blockingRules = [];

870 };	1226 let blockingExceptionRules = [];

871	1227

872 options = Object.assign({}, defaultOptions, options);	1228 let ruleGroups = [cssRules, cssExceptionRules,

873	1229 blockingRules, blockingExceptionRules];

874 let rules = [];	1230

875	1231 let genericSelectors = [];

876 let groupedElemhideFilters = new Map();	1232 let groupedElemhideFilters = new Map();

	1233

877 for (let filter of this.elemhideFilters)	1234 for (let filter of this.elemhideFilters)

878 {	1235 {

879 let result = convertElemHideFilter(filter, this.elemhideSelectorExceptions);	1236 let result = convertElemHideFilter(filter, this.elemhideSelectorExceptions);

880 if (!result)	1237 if (!result)

881 continue;	1238 continue;

882	1239

883 if (result.matchDomains.length == 0)	1240 if (result.matchDomains.length == 0)

884 result.matchDomains = ["^https?://"];	1241 {

885	1242 genericSelectors.push(result.selector);

886 for (let matchDomain of result.matchDomains)	1243 }

887 {	1244 else

888 let group = groupedElemhideFilters.get(matchDomain) \|\| [];	1245 {

889 group.push(result.selector);	1246 for (let matchDomain of result.matchDomains)

890 groupedElemhideFilters.set(matchDomain, group);	1247 {

891 }	1248 let group = groupedElemhideFilters.get(matchDomain) \|\| [];

892 }	1249 group.push(result.selector);

	1250 groupedElemhideFilters.set(matchDomain, group);

	1251 }

	1252 }

	1253 }

	1254

	1255 // Separate out the element hiding exceptions that have only a hostname part

	1256 // from the rest. This allows us to implement a workaround for issue #5345

	1257 // (WebKit bug #167423), but as a bonus it also reduces the number of

	1258 // generated rules. The downside is that the exception will only apply to the

	1259 // top-level document, not to iframes. We have to live with this until the

	1260 // WebKit bug is fixed in all supported versions of Safari.

	1261 // https://bugs.webkit.org/show_bug.cgi?id=167423

	1262 //

	1263 // Note that as a result of this workaround we end up with a huge rule set in

	1264 // terms of the amount of memory used. This can cause Node.js to throw

	1265 // "JavaScript heap out of memory". To avoid this, call Node.js with

	1266 // --max_old_space_size=4096

	1267 let elemhideExceptionDomains = extractFilterDomains(this.elemhideExceptions);

	1268

	1269 let genericSelectorExceptionDomains =

	1270 extractFilterDomains(this.generichideExceptions);

	1271 elemhideExceptionDomains.forEach(name =>

	1272 {

	1273 genericSelectorExceptionDomains.add(name);

	1274 });

	1275

	1276 addCSSRules(cssRules, genericSelectors, null,

	1277 genericSelectorExceptionDomains);

	1278

	1279 // Filter out whitelisted domains.

	1280 elemhideExceptionDomains.forEach(domain =>

	1281 groupedElemhideFilters.delete(domain));

893	1282

894 groupedElemhideFilters.forEach((selectors, matchDomain) =>	1283 groupedElemhideFilters.forEach((selectors, matchDomain) =>

895 {	1284 {

896 while (selectors.length)	1285 addCSSRules(cssRules, selectors, matchDomain, elemhideExceptionDomains);

897 {	1286 });

898 let selector = selectors.splice(0, selectorLimit).join(", ");	1287

899	1288 let requestFilterExceptionDomains = [];

900 // As of Safari 9.0 element IDs are matched as lowercase. We work around	1289 for (let filter of this.genericblockExceptions)

901 // this by converting to the attribute format [id="elementID"]	1290 {

902 selector = convertIDSelectorsToAttributeSelectors(selector);	1291 let parsed = parseFilterRegexpSource(filter.regexpSource);

903	1292 if (parsed.hostname)

904 rules.push({	1293 requestFilterExceptionDomains.push(parsed.hostname);

905 trigger: {"url-filter": matchDomain,	1294 }

906 "url-filter-is-case-sensitive": true},	1295

907 action: {type: "css-display-none",	1296 for (let filter of this.requestFilters)

908 selector: selector}	1297 {

	1298 convertFilterAddRules(blockingRules, filter, "block", true,

	1299 requestFilterExceptionDomains);

	1300 }

	1301

	1302 for (let filter of this.requestExceptions)

	1303 {

	1304 convertFilterAddRules(blockingExceptionRules, filter,

	1305 "ignore-previous-rules", true);

	1306 }

	1307

	1308 return async(ruleGroups, (group, index) => () =>

	1309 {

	1310 let next = () =>

	1311 {

	1312 if (index == ruleGroups.length - 1)

	1313 return ruleGroups.reduce((all, rules) => all.concat(rules), []);

	1314 };

	1315

	1316 if (this.options.merge == "all" \|\|

	1317 (this.options.merge == "auto" &&

	1318 ruleGroups.reduce((n, group) => n + group.length, 0) > 50000))

	1319 {

	1320 return mergeRules(ruleGroups[index], this.options.merge == "all")

	1321 .then(rules =>

	1322 {

	1323 ruleGroups[index] = rules;

	1324 return next();

909 });	1325 });

910 }	1326 }

	1327

	1328 return next();

911 });	1329 });

912

913 for (let filter of this.elemhideExceptions)

914 convertFilterAddRules(rules, filter, "ignore-previous-rules", false);

915 for (let filter of this.requestFilters)

916 convertFilterAddRules(rules, filter, "block", true);

917 for (let filter of this.requestExceptions)

918 convertFilterAddRules(rules, filter, "ignore-previous-rules", true);

919

920 rules = rules.filter(rule => !hasNonASCI(rule));

921

922 if (options.merge)

923 {

924 let mergeOptions = {
kzar 2017/05/08 08:13:02 Couldn't you just pass options.exhaustiveMerge str Couldn't you just pass options.exhaustiveMerge straight through instead of putting it inside this Object? Also wouldn't it make more sense to take the merging options in the constructor instead of this method? I think the defaultOptions logic above looks especially out of place here. Manish Jethani 2017/05/08 23:12:48 Done. Show quoted text On 2017/05/08 08:13:02, kzar wrote: > Couldn't you just pass options.exhaustiveMerge straight through instead of > putting it inside this Object? > > Also wouldn't it make more sense to take the merging options in the constructor > instead of this method? I think the defaultOptions logic above looks especially > out of place here. Done.
925 exhaustive: options.exhaustiveMerge

926 };

927

928 rules = mergeRules(rules, mergeOptions);

929 }

930

931 return rules;

932 };	1330 };

LEFT	RIGHT