lib/abp2blocklist.js - Issue 29426594: Issue 3673 - Merge closely matching rules

Delta Between Two Patch Sets: lib/abp2blocklist.js

Issue 29426594: Issue 3673 - Merge closely matching rules (Closed) Base URL: https://hg.adblockplus.org/abp2blocklist

Left Patch Set: Add advanced merge support Created May 3, 2017, 4:44 a.m.

Right Patch Set: Rebase Created July 28, 2017, 1:31 p.m.

Left:
Right:

Use n/p to move between diff chunks; N/P to move between comments.

Jump to:

Left: Side by side diff | Download
Right: Side by side diff | Download

LEFT	RIGHT
1 /*	1 /*

2 * This file is part of Adblock Plus <https://adblockplus.org/>,	2 * This file is part of Adblock Plus <https://adblockplus.org/>,

3 * Copyright (C) 2006-2017 eyeo GmbH	3 * Copyright (C) 2006-2017 eyeo GmbH

4 *	4 *

5 * Adblock Plus is free software: you can redistribute it and/or modify	5 * Adblock Plus is free software: you can redistribute it and/or modify

6 * it under the terms of the GNU General Public License version 3 as	6 * it under the terms of the GNU General Public License version 3 as

7 * published by the Free Software Foundation.	7 * published by the Free Software Foundation.

8 *	8 *

9 * Adblock Plus is distributed in the hope that it will be useful,	9 * Adblock Plus is distributed in the hope that it will be useful,

10 * but WITHOUT ANY WARRANTY; without even the implied warranty of	10 * but WITHOUT ANY WARRANTY; without even the implied warranty of

11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the	11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

12 * GNU General Public License for more details.	12 * GNU General Public License for more details.

13 *	13 *

14 * You should have received a copy of the GNU General Public License	14 * You should have received a copy of the GNU General Public License

15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.	15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.

16 */	16 */

17	17

18 /** @module abp2blocklist */	18 /** @module abp2blocklist */

19	19

20 "use strict";	20 "use strict";

21	21

22 let filterClasses = require("filterClasses");	22 let filterClasses = require("filterClasses");

23 let tldjs = require("tldjs");

24 let punycode = require("punycode");	23 let punycode = require("punycode");

25	24

26 const selectorLimit = 5000;	25 const selectorLimit = 5000;

27 const typeMap = filterClasses.RegExpFilter.typeMap;	26 const typeMap = filterClasses.RegExpFilter.typeMap;

28 const whitelistableRequestTypes = (typeMap.IMAGE	27

29 \| typeMap.STYLESHEET	28 const httpRequestTypes = typeMap.IMAGE \|

30 \| typeMap.SCRIPT	29 typeMap.STYLESHEET \|

31 \| typeMap.FONT	30 typeMap.SCRIPT \|

32 \| typeMap.MEDIA	31 typeMap.FONT \|

33 \| typeMap.POPUP	32 typeMap.MEDIA \|

34 \| typeMap.OBJECT	33 typeMap.POPUP \|

35 \| typeMap.OBJECT_SUBREQUEST	34 typeMap.OBJECT \|

36 \| typeMap.XMLHTTPREQUEST	35 typeMap.OBJECT_SUBREQUEST \|

37 \| typeMap.PING	36 typeMap.XMLHTTPREQUEST \|

38 \| typeMap.SUBDOCUMENT	37 typeMap.PING \|

39 \| typeMap.OTHER);	38 typeMap.SUBDOCUMENT \|

	39 typeMap.OTHER;

	40 const rawRequestTypes = typeMap.XMLHTTPREQUEST \|

	41 typeMap.WEBSOCKET \|

	42 typeMap.WEBRTC \|

	43 typeMap.OBJECT_SUBREQUEST \|

	44 typeMap.PING \|

	45 typeMap.OTHER;

	46 const whitelistableRequestTypes = httpRequestTypes \|

	47 typeMap.WEBSOCKET \|

	48 typeMap.WEBRTC;

	49

	50 function callLater(func)

	51 {

	52 return new Promise(resolve =>

	53 {

	54 let call = () => resolve(func());

	55

	56 // If this looks like Node.js, call process.nextTick, otherwise call

	57 // setTimeout.

	58 if (typeof process != "undefined")

	59 process.nextTick(call);

	60 else

	61 setTimeout(call, 0);

	62 });

	63 }

	64

	65 function async(callees, mapFunction)

	66 {

	67 if (!(Symbol.iterator in callees))

	68 callees = [callees];

	69

	70 let lastPause = Date.now();

	71 let index = 0;

	72

	73 let promise = Promise.resolve();

	74

	75 for (let next of callees)

	76 {

	77 let currentIndex = index;

	78

	79 promise = promise.then(() =>

	80 {

	81 if (mapFunction)

	82 next = mapFunction(next, currentIndex);

	83

	84 // If it has been 100ms or longer since the last call, take a pause. This

	85 // keeps the browser from freezing up.

	86 let now = Date.now();

	87 if (now - lastPause >= 100)

	88 {

	89 lastPause = now;

	90 return callLater(next);

	91 }

	92

	93 return next();

	94 });

	95

	96 index++;

	97 }

	98

	99 return promise;

	100 }

40	101

41 function parseDomains(domains, included, excluded)	102 function parseDomains(domains, included, excluded)

42 {	103 {

43 for (let domain in domains)	104 for (let domain in domains)

44 {	105 {

45 if (domain != "")	106 if (domain != "")

46 {	107 {

47 let enabled = domains[domain];	108 let enabled = domains[domain];

48 domain = punycode.toASCII(domain.toLowerCase());	109 domain = punycode.toASCII(domain.toLowerCase());

49	110

50 if (!enabled)	111 if (!enabled)

51 excluded.push(domain);	112 excluded.push(domain);

52 else if (!domains[""])	113 else if (!domains[""])

53 included.push(domain);	114 included.push(domain);

54 }	115 }

55 }	116 }

56 }	117 }

57	118

58 function escapeRegExp(s)	119 function escapeRegExp(s)

59 {	120 {

60 return s.replace(/[.*+?^${}()\|[\]\\]/g, "\\$&");	121 return s.replace(/[.*+?^${}()\|[\]\\]/g, "\\$&");

61 }	122 }

62	123

63 function matchDomain(domain)	124 function matchDomain(domain)

64 {	125 {

	126 if (!domain)

	127 return "^https?://";

	128

65 return "^https?://([^/:]*\\.)?" + escapeRegExp(domain).toLowerCase() + "[/:]";	129 return "^https?://([^/:]*\\.)?" + escapeRegExp(domain).toLowerCase() + "[/:]";

	130 }

	131

	132 function getURLSchemes(contentType)

	133 {

	134 // If the given content type includes all supported URL schemes, simply

	135 // return a single generic URL scheme pattern. This minimizes the size of the

	136 // generated rule set. The downside to this is that it will also match

	137 // schemes that we do not want to match (e.g. "ftp://"), but this can be

	138 // mitigated by adding exceptions for those schemes.

	139 if (contentType & typeMap.WEBSOCKET && contentType & typeMap.WEBRTC &&

	140 contentType & httpRequestTypes)

	141 return ["[^:]+:(//)?"];

	142

	143 let urlSchemes = [];

	144

	145 if (contentType & typeMap.WEBSOCKET)

	146 urlSchemes.push("wss?://");

	147

	148 if (contentType & typeMap.WEBRTC)

	149 urlSchemes.push("stuns?:", "turns?:");

	150

	151 if (contentType & httpRequestTypes)

	152 urlSchemes.push("https?://");

	153

	154 return urlSchemes;

	155 }

	156

	157 function findSubdomainsInList(domain, list)

	158 {

	159 let subdomains = [];

	160 let suffixLength = domain.length + 1;

	161

	162 for (let name of list)

	163 {

	164 if (name.length > suffixLength && name.slice(-suffixLength) == "." + domain)

	165 subdomains.push(name.slice(0, -suffixLength));

	166 }

	167

	168 return subdomains;

	169 }

	170

	171 function extractFilterDomains(filters)

	172 {

	173 let domains = new Set();

	174 for (let filter of filters)

	175 {

	176 let parsed = parseFilterRegexpSource(filter.regexpSource);

	177 if (parsed.justHostname)

	178 domains.add(parsed.hostname);

	179 }

	180 return domains;

66 }	181 }

67	182

68 function convertElemHideFilter(filter, elemhideSelectorExceptions)	183 function convertElemHideFilter(filter, elemhideSelectorExceptions)

69 {	184 {

70 let included = [];	185 let included = [];

71 let excluded = [];	186 let excluded = [];

72 let rules = [];

73	187

74 parseDomains(filter.domains, included, excluded);	188 parseDomains(filter.domains, included, excluded);

75	189

76 if (excluded.length == 0 && !(filter.selector in elemhideSelectorExceptions))	190 if (excluded.length == 0 && !(filter.selector in elemhideSelectorExceptions))

77 return {matchDomains: included.map(matchDomain), selector: filter.selector};	191 return {matchDomains: included, selector: filter.selector};

78 }	192 }

79	193

80 /**	194 /**

81 * Parse the given filter "regexpSource" string. Producing a regular expression,	195 * Parse the given filter "regexpSource" string. Producing a regular expression,

82 * extracting the hostname (if any), deciding if the regular expression is safe	196 * extracting the hostname (if any), deciding if the regular expression is safe

83 * to be converted + matched as lower case and noting if the source contains	197 * to be converted + matched as lower case and noting if the source contains

84 * anything after the hostname.)	198 * anything after the hostname.)

85 *	199 *

86 * @param {string} text regexpSource property of a filter	200 * @param {string} text regexpSource property of a filter

	201 * @param {string} urlScheme The URL scheme to use in the regular expression

87 * @returns {object} An object containing a regular expression string, a bool	202 * @returns {object} An object containing a regular expression string, a bool

88 * indicating if the filter can be safely matched as lower	203 * indicating if the filter can be safely matched as lower

89 * case, a hostname string (or undefined) and a bool	204 * case, a hostname string (or undefined) and a bool

90 * indicating if the source only contains a hostname or not:	205 * indicating if the source only contains a hostname or not:

91 * {regexp: "...",	206 * {regexp: "...",

92 * canSafelyMatchAsLowercase: true/false,	207 * canSafelyMatchAsLowercase: true/false,

93 * hostname: "...",	208 * hostname: "...",

94 * justHostname: true/false}	209 * justHostname: true/false}

95 */	210 */

96 function parseFilterRegexpSource(text)	211 function parseFilterRegexpSource(text, urlScheme)

97 {	212 {

98 let regexp = [];	213 let regexp = [];

99 let lastIndex = text.length - 1;	214

	215 // Convert the text into an array of Unicode characters.

	216 //

	217 // In the case of surrogate pairs (the smiley emoji, for example), one

	218 // Unicode code point is represented by two JavaScript characters together.

	219 // We want to iterate over Unicode code points rather than JavaScript

	220 // characters.

	221 let characters = Array.from(text);

	222

	223 let lastIndex = characters.length - 1;

100 let hostname;	224 let hostname;

101 let hostnameStart = null;	225 let hostnameStart = null;

102 let hostnameFinished = false;	226 let hostnameFinished = false;

103 let justHostname = false;	227 let justHostname = false;

104 let canSafelyMatchAsLowercase = false;	228 let canSafelyMatchAsLowercase = false;

105	229

106 for (let i = 0; i < text.length; i++)	230 if (!urlScheme)

107 {	231 urlScheme = getURLSchemes()[0];

108 let c = text[i];	232

	233 for (let i = 0; i < characters.length; i++)

	234 {

	235 let c = characters[i];

109	236

110 if (hostnameFinished)	237 if (hostnameFinished)

111 justHostname = false;	238 justHostname = false;

112	239

113 // If we're currently inside the hostname we have to be careful not to	240 // If we're currently inside the hostname we have to be careful not to

114 // escape any characters until after we have converted it to punycode.	241 // escape any characters until after we have converted it to punycode.

115 if (hostnameStart != null && !hostnameFinished)	242 if (hostnameStart != null && !hostnameFinished)

116 {	243 {

117 let endingChar = (c == "*" \|\| c == "^" \|\|	244 let endingChar = (c == "*" \|\| c == "^" \|\|

118 c == "?" \|\| c == "/" \|\| c == "\|");	245 c == "?" \|\| c == "/" \|\| c == "\|");

119 if (!endingChar && i != lastIndex)	246 if (!endingChar && i != lastIndex)

120 continue;	247 continue;

121	248

122 hostname = punycode.toASCII(	249 hostname = punycode.toASCII(

123 text.substring(hostnameStart, endingChar ? i : i + 1)	250 characters.slice(hostnameStart, endingChar ? i : i + 1).join("")

	251 .toLowerCase()

124 );	252 );

125 hostnameFinished = justHostname = true;	253 hostnameFinished = justHostname = true;

126 regexp.push(escapeRegExp(hostname));	254 regexp.push(escapeRegExp(hostname));

127 if (!endingChar)	255 if (!endingChar)

128 break;	256 break;

129 }	257 }

130	258

131 switch (c)	259 switch (c)

132 {	260 {

133 case "*":	261 case "*":

134 if (regexp.length > 0 && i < lastIndex && text[i + 1] != "*")	262 if (regexp.length > 0 && i < lastIndex && characters[i + 1] != "*")

135 regexp.push(".*");	263 regexp.push(".*");

136 break;	264 break;

137 case "^":	265 case "^":

138 if (i < lastIndex)	266 let alphabet = "a-z";

139 regexp.push(".");	267 // If justHostname is true and we've encountered a "^", it means we're

	268 // still in the hostname part of the URL. Since hostnames are always

	269 // lower case (Punycode), there's no need to include "A-Z" in the

	270 // pattern. Further, subsequent code may lower-case the entire regular

	271 // expression (if the URL contains only the hostname part), leaving us

	272 // with "a-za-z", which would be redundant.

	273 if (!justHostname)

	274 alphabet = "A-Z" + alphabet;

	275 let digits = "0-9";

	276 // Note that the "-" must appear first here in order to retain its

	277 // literal meaning within the brackets.

	278 let specialCharacters = "-_.%";

	279 let separator = "[^" + specialCharacters + alphabet + digits + "]";

	280 if (i == 0)

	281 regexp.push("^" + urlScheme + "(.*" + separator + ")?");

	282 else if (i == lastIndex)

	283 regexp.push("(" + separator + ".*)?$");

	284 else

	285 regexp.push(separator);

140 break;	286 break;

141 case "\|":	287 case "\|":

142 if (i == 0)	288 if (i == 0)

143 {	289 {

144 regexp.push("^");	290 regexp.push("^");

145 break;	291 break;

146 }	292 }

147 if (i == lastIndex)	293 if (i == lastIndex)

148 {	294 {

149 regexp.push("$");	295 regexp.push("$");

150 break;	296 break;

151 }	297 }

152 if (i == 1 && text[0] == "\|")	298 if (i == 1 && characters[0] == "\|")

153 {	299 {

154 hostnameStart = i + 1;	300 hostnameStart = i + 1;

155 canSafelyMatchAsLowercase = true;	301 canSafelyMatchAsLowercase = true;

156 regexp.push("https?://([^/]+\\.)?");	302 regexp.push(urlScheme + "([^/]+\\.)?");

157 break;	303 break;

158 }	304 }

159 regexp.push("\\\|");	305 regexp.push("\\\|");

160 break;	306 break;

161 case "/":	307 case "/":

162 if (!hostnameFinished &&	308 if (!hostnameFinished &&

163 text.charAt(i-2) == ":" && text.charAt(i-1) == "/")	309 characters[i - 2] == ":" && characters[i - 1] == "/")

164 {	310 {

165 hostnameStart = i + 1;	311 hostnameStart = i + 1;

166 canSafelyMatchAsLowercase = true;	312 canSafelyMatchAsLowercase = true;

167 }	313 }

168 regexp.push("/");	314 regexp.push("/");

169 break;	315 break;

170 case ".": case "+": case "$": case "?":	316 case ".": case "+": case "$": case "?":

171 case "{": case "}": case "(": case ")":	317 case "{": case "}": case "(": case ")":

172 case "[": case "]": case "\\":	318 case "[": case "]": case "\\":

173 regexp.push("\\", c);	319 regexp.push("\\", c);

174 break;	320 break;

175 default:	321 default:

176 if (hostnameFinished && (c >= "a" && c <= "z" \|\|	322 if (hostnameFinished && (c >= "a" && c <= "z" \|\|

177 c >= "A" && c <= "Z"))	323 c >= "A" && c <= "Z"))

178 canSafelyMatchAsLowercase = false;	324 canSafelyMatchAsLowercase = false;

179 regexp.push(c);	325 regexp.push(c == "%" ? c : encodeURI(c));

180 }	326 }

181 }	327 }

182	328

183 return {	329 return {

184 regexp: regexp.join(""),	330 regexp: regexp.join(""),

185 canSafelyMatchAsLowercase: canSafelyMatchAsLowercase,	331 canSafelyMatchAsLowercase: canSafelyMatchAsLowercase,

186 hostname: hostname,	332 hostname: hostname,

187 justHostname: justHostname	333 justHostname: justHostname

188 };	334 };

189 }	335 }

190	336

191 function getResourceTypes(filter)	337 function getResourceTypes(contentType)

192 {	338 {

193 let types = [];	339 let types = [];

194	340

195 if (filter.contentType & typeMap.IMAGE)	341 if (contentType & typeMap.IMAGE)

196 types.push("image");	342 types.push("image");

197 if (filter.contentType & typeMap.STYLESHEET)	343 if (contentType & typeMap.STYLESHEET)

198 types.push("style-sheet");	344 types.push("style-sheet");

199 if (filter.contentType & typeMap.SCRIPT)	345 if (contentType & typeMap.SCRIPT)

200 types.push("script");	346 types.push("script");

201 if (filter.contentType & typeMap.FONT)	347 if (contentType & typeMap.FONT)

202 types.push("font");	348 types.push("font");

203 if (filter.contentType & (typeMap.MEDIA \| typeMap.OBJECT))	349 if (contentType & (typeMap.MEDIA \| typeMap.OBJECT))

204 types.push("media");	350 types.push("media");

205 if (filter.contentType & typeMap.POPUP)	351 if (contentType & typeMap.POPUP)

206 types.push("popup");	352 types.push("popup");

207 if (filter.contentType & (typeMap.XMLHTTPREQUEST \|	353 if (contentType & rawRequestTypes)

208 typeMap.OBJECT_SUBREQUEST \|

209 typeMap.PING \|

210 typeMap.OTHER))

211 types.push("raw");	354 types.push("raw");

212 if (filter.contentType & typeMap.SUBDOCUMENT)	355 if (contentType & typeMap.SUBDOCUMENT)

213 types.push("document");	356 types.push("document");

214	357

215 return types;	358 return types;

216 }	359 }

217	360

218 function addDomainPrefix(domains)	361 function makeRuleCopies(trigger, action, urlSchemes)

219 {	362 {

220 let result = [];	363 let copies = [];

221	364

222 for (let domain of domains)	365 // Always make a deep copy of the rule, since rules may have to be

223 {	366 // manipulated individually at a later stage.

224 result.push(domain);	367 let stringifiedTrigger = JSON.stringify(trigger);

225	368

226 if (tldjs.getDomain(domain) == domain)	369 let filterPattern = trigger["url-filter"].substring(1);

227 result.push("www." + domain);	370 let startIndex = 0;

228 }	371

229	372 // If the URL filter already begins with the first URL scheme pattern, skip

230 return result;	373 // it.

231 }	374 if (trigger["url-filter"].startsWith("^" + urlSchemes[0]))

232	375 {

233 function convertFilterAddRules(rules, filter, action, withResourceTypes)	376 filterPattern = filterPattern.substring(urlSchemes[0].length);

234 {	377 startIndex = 1;

235 let parsed = parseFilterRegexpSource(filter.regexpSource);	378 }

	379 else

	380 {

	381 filterPattern = ".*" + filterPattern;

	382 }

	383

	384 for (let i = startIndex; i < urlSchemes.length; i++)

	385 {

	386 let copyTrigger = Object.assign(JSON.parse(stringifiedTrigger), {

	387 "url-filter": "^" + urlSchemes[i] + filterPattern

	388 });

	389 copies.push({trigger: copyTrigger, action});

	390 }

	391

	392 return copies;

	393 }

	394

	395 function excludeTopURLFromTrigger(trigger)

	396 {

	397 trigger["unless-top-url"] = [trigger["url-filter"]];

	398 if (trigger["url-filter-is-case-sensitive"])

	399 trigger["top-url-filter-is-case-sensitive"] = true;

	400 }

	401

	402 function convertFilterAddRules(rules, filter, action, withResourceTypes,

	403 exceptionDomains, contentType)

	404 {

	405 if (!contentType)

	406 contentType = filter.contentType;

	407

	408 // If WebSocket or WebRTC are given along with other options but not

	409 // including all three of WebSocket, WebRTC, and at least one HTTP raw type,

	410 // we must generate multiple rules. For example, for the filter

	411 // "foo$websocket,image", we must generate one rule with "^wss?://" and "raw"

	412 // and another rule with "^https?://" and "image". If we merge the two, we

	413 // end up blocking requests of all HTTP raw types (e.g. XMLHttpRequest)

	414 // inadvertently.

	415 if ((contentType & typeMap.WEBSOCKET && contentType != typeMap.WEBSOCKET &&

	416 !(contentType & typeMap.WEBRTC &&

	417 contentType & rawRequestTypes & httpRequestTypes)) \|\|

	418 (contentType & typeMap.WEBRTC && contentType != typeMap.WEBRTC &&

	419 !(contentType & typeMap.WEBSOCKET &&

	420 contentType & rawRequestTypes & httpRequestTypes)))

	421 {

	422 if (contentType & typeMap.WEBSOCKET)

	423 {

	424 convertFilterAddRules(rules, filter, action, withResourceTypes,

	425 exceptionDomains, typeMap.WEBSOCKET);

	426 }

	427

	428 if (contentType & typeMap.WEBRTC)

	429 {

	430 convertFilterAddRules(rules, filter, action, withResourceTypes,

	431 exceptionDomains, typeMap.WEBRTC);

	432 }

	433

	434 contentType &= ~(typeMap.WEBSOCKET \| typeMap.WEBRTC);

	435

	436 if (!contentType)

	437 return;

	438 }

	439

	440 let urlSchemes = getURLSchemes(contentType);

	441 let parsed = parseFilterRegexpSource(filter.regexpSource, urlSchemes[0]);

236	442

237 // For the special case of $document whitelisting filters with just a domain	443 // For the special case of $document whitelisting filters with just a domain

238 // we can generate an equivalent blocking rule exception using if-domain.	444 // we can generate an equivalent blocking rule exception using if-domain.

239 if (filter instanceof filterClasses.WhitelistFilter &&	445 if (filter instanceof filterClasses.WhitelistFilter &&

240 filter.contentType & typeMap.DOCUMENT &&	446 contentType & typeMap.DOCUMENT &&

241 parsed.justHostname)	447 parsed.justHostname)

242 {	448 {

243 rules.push({	449 rules.push({

244 trigger: {	450 trigger: {

245 "url-filter": ".*",	451 "url-filter": ".*",

246 "if-domain": addDomainPrefix([parsed.hostname])	452 "if-domain": ["*" + parsed.hostname]

247 },	453 },

248 action: {type: "ignore-previous-rules"}	454 action: {type: "ignore-previous-rules"}

249 });	455 });

250 // If the filter contains other supported options we'll need to generate	456 // If the filter contains other supported options we'll need to generate

251 // further rules for it, but if not we can simply return now.	457 // further rules for it, but if not we can simply return now.

252 if (!(filter.contentType & whitelistableRequestTypes))	458 if (!(contentType & whitelistableRequestTypes))

253 return;	459 return;

254 }	460 }

255	461

256 let trigger = {"url-filter": parsed.regexp};	462 let trigger = {"url-filter": parsed.regexp};

257	463

258 // Limit rules to HTTP(S) URLs	464 // If the URL filter begins with one of the URL schemes for this content

259 if (!/^(\^\|http)/i.test(trigger["url-filter"]))	465 // type, we generate additional rules for all the URL scheme patterns;

260 trigger["url-filter"] = "^https?://.*" + trigger["url-filter"];	466 // otherwise, if the start of the URL filter literally matches the first URL

	467 // scheme pattern, we just generate additional rules for the remaining URL

	468 // scheme patterns.

	469 //

	470 // For example, "stun:foo$webrtc" will give us "stun:foo", then we add a "^"

	471 // in front of this and generate two additional rules for

	472 // "^stuns?:.stun:foo" and "^turns?:.stun:foo". On the other hand,

	473 // "\|\|foo$webrtc" will give us "^stuns?:([^/]+\\.)?foo", so we just generate

	474 // "^turns?:([^/]+\\.)?foo" in addition.

	475 //

	476 // Note that the filter can be already anchored to the beginning

	477 // (e.g. "\|stun:foo$webrtc"), in which case we do not generate any additional

	478 // rules.

	479 let needAltRules = trigger["url-filter"][0] != "^" \|\|

	480 trigger["url-filter"].startsWith("^" + urlSchemes[0]);

	481

	482 if (trigger["url-filter"][0] != "^")

	483 {

	484 if (!urlSchemes.some(scheme => new RegExp("^" + scheme)

	485 .test(trigger["url-filter"])))

	486 {

	487 trigger["url-filter"] = urlSchemes[0] + ".*" + trigger["url-filter"];

	488 }

	489

	490 trigger["url-filter"] = "^" + trigger["url-filter"];

	491 }

261	492

262 // For rules containing only a hostname we know that we're matching against	493 // For rules containing only a hostname we know that we're matching against

263 // a lowercase string unless the matchCase option was passed.	494 // a lowercase string unless the matchCase option was passed.

264 if (parsed.canSafelyMatchAsLowercase && !filter.matchCase)	495 if (parsed.canSafelyMatchAsLowercase && !filter.matchCase)

265 trigger["url-filter"] = trigger["url-filter"].toLowerCase();	496 trigger["url-filter"] = trigger["url-filter"].toLowerCase();

266	497

267 if (parsed.canSafelyMatchAsLowercase \|\| filter.matchCase)	498 if (parsed.canSafelyMatchAsLowercase \|\| filter.matchCase)

268 trigger["url-filter-is-case-sensitive"] = true;	499 trigger["url-filter-is-case-sensitive"] = true;

269	500

270 let included = [];	501 let included = [];

271 let excluded = [];	502 let excluded = [];

272	503

273 parseDomains(filter.domains, included, excluded);	504 parseDomains(filter.domains, included, excluded);

274	505

	506 if (exceptionDomains)

	507 excluded = excluded.concat(exceptionDomains);

	508

275 if (withResourceTypes)	509 if (withResourceTypes)

276 {	510 {

277 trigger["resource-type"] = getResourceTypes(filter);	511 let resourceTypes = getResourceTypes(contentType);

278	512

279 if (trigger["resource-type"].length == 0)	513 // Content blocker rules can't differentiate between sub-document requests

	514 // (iframes) and top-level document requests. To avoid too many false

	515 // positives, we prevent rules with no hostname part from blocking document

	516 // requests.

	517 //

	518 // Once Safari 11 becomes our minimum supported version, we could change

	519 // our approach here to use the new "unless-top-url" property instead.

	520 if (filter instanceof filterClasses.BlockingFilter && !parsed.hostname)

	521 resourceTypes = resourceTypes.filter(type => type != "document");

	522

	523 if (resourceTypes.length == 0)

280 return;	524 return;

	525

	526 trigger["resource-type"] = resourceTypes;

281 }	527 }

282	528

283 if (filter.thirdParty != null)	529 if (filter.thirdParty != null)

284 trigger["load-type"] = [filter.thirdParty ? "third-party" : "first-party"];	530 trigger["load-type"] = [filter.thirdParty ? "third-party" : "first-party"];

285	531

	532 let addTopLevelException = false;

	533

286 if (included.length > 0)	534 if (included.length > 0)

287 trigger["if-domain"] = addDomainPrefix(included);	535 {

	536 trigger["if-domain"] = [];

	537

	538 for (let name of included)

	539 {

	540 // If this is a blocking filter or an element hiding filter, add the

	541 // subdomain wildcard only if no subdomains have been excluded.

	542 let notSubdomains = null;

	543 if ((filter instanceof filterClasses.BlockingFilter \|\|

	544 filter instanceof filterClasses.ElemHideFilter) &&

	545 (notSubdomains = findSubdomainsInList(name, excluded)).length > 0)

	546 {

	547 trigger["if-domain"].push(name);

	548

	549 // Add the "www" prefix but only if it hasn't been excluded.

	550 if (!notSubdomains.includes("www"))

	551 trigger["if-domain"].push("www." + name);

	552 }

	553 else

	554 {

	555 trigger["if-domain"].push("*" + name);

	556 }

	557 }

	558 }

288 else if (excluded.length > 0)	559 else if (excluded.length > 0)

289 trigger["unless-domain"] = addDomainPrefix(excluded);	560 {

	561 trigger["unless-domain"] = excluded.map(name => "*" + name);

	562 }

	563 else if (filter instanceof filterClasses.BlockingFilter &&

	564 filter.contentType & typeMap.SUBDOCUMENT && parsed.hostname)

	565 {

	566 // Rules with a hostname part are still allowed to block document requests,

	567 // but we add an exception for top-level documents.

	568 //

	569 // Note that we can only do this if there's no "unless-domain" property for

	570 // now. This also only works in Safari 11 onwards, while older versions

	571 // simply ignore this property. Once Safari 11 becomes our minimum

	572 // supported version, we can merge "unless-domain" into "unless-top-url".

	573 addTopLevelException = true;

	574 excludeTopURLFromTrigger(trigger);

	575 }

290	576

291 rules.push({trigger: trigger, action: {type: action}});	577 rules.push({trigger: trigger, action: {type: action}});

292 }	578

293	579 if (needAltRules)

294 function hasNonASCI(obj)	580 {

295 {	581 // Generate additional rules for any alternative URL schemes.

296 if (typeof obj == "string")	582 for (let altRule of makeRuleCopies(trigger, {type: action}, urlSchemes))

297 {	583 {

298 if (/[^\x00-\x7F]/.test(obj))	584 if (addTopLevelException)

299 return true;	585 excludeTopURLFromTrigger(altRule.trigger);

300 }	586

301	587 rules.push(altRule);

302 if (typeof obj == "object")	588 }

303 {	589 }

304 if (obj instanceof Array)

305 for (let item of obj)

306 if (hasNonASCI(item))

307 return true;

308

309 let names = Object.getOwnPropertyNames(obj);

310 for (let name of names)

311 if (hasNonASCI(obj[name]))

312 return true;

313 }

314

315 return false;

316 }	590 }

317	591

318 function convertIDSelectorsToAttributeSelectors(selector)	592 function convertIDSelectorsToAttributeSelectors(selector)

319 {	593 {

320 // First we figure out where all the IDs are	594 // First we figure out where all the IDs are

321 let sep = "";	595 let sep = "";

322 let start = null;	596 let start = null;

323 let positions = [];	597 let positions = [];

324 for (let i = 0; i < selector.length; i++)	598 for (let i = 0; i < selector.length; i++)

325 {	599 {

(...skipping 33 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
359 {	633 {

360 newSelector.push(selector.substring(i, pos.start));	634 newSelector.push(selector.substring(i, pos.start));

361 newSelector.push('[id=', selector.substring(pos.start + 1, pos.end), ']');	635 newSelector.push('[id=', selector.substring(pos.start + 1, pos.end), ']');

362 i = pos.end;	636 i = pos.end;

363 }	637 }

364 newSelector.push(selector.substring(i));	638 newSelector.push(selector.substring(i));

365	639

366 return newSelector.join("");	640 return newSelector.join("");

367 }	641 }

368	642

369 function closeMatch(s, t, {multi = false} = {})	643 function addCSSRules(rules, selectors, domain, exceptionDomains)
kzar 2017/05/03 11:17:24 I've not seen this syntax before `{multi = false} I've not seen this syntax before `{multi = false} = {}` could you explain? Manish Jethani 2017/05/03 14:41:54 This: function func(param1, param2, {option1 = Show quoted text On 2017/05/03 11:17:24, kzar wrote: > I've not seen this syntax before `{multi = false} = {}` could you explain? This: function func(param1, param2, {option1 = false} = {}) { ... } Is the equivalent of this: function func(param1, param2, options) { let option1 = false; if (options && options.option1 !== undefined) // Note: strict equality option1 = options.option1; ... } It's part of the ES6 standard. http://2ality.com/2015/01/es6-destructuring.html#simulating-named-parameters-... The idea is that a function will take a list of parameters that are strictly required for it to do its job, and then a bunch of options. The list of options can keep growing over time. Here they can be added in a backwards compatible way without much fuss. This is a common pattern in JavaScript. kzar 2017/05/03 15:19:04 Acknowledged. Show quoted text On 2017/05/03 14:41:54, Manish Jethani wrote: > On 2017/05/03 11:17:24, kzar wrote: > > I've not seen this syntax before `{multi = false} = {}` could you explain? > > This: > > function func(param1, param2, {option1 = false} = {}) > { > ... > } > > Is the equivalent of this: > > function func(param1, param2, options) > { > let option1 = false; > > if (options && options.option1 !== undefined) // Note: strict equality > option1 = options.option1; > > ... > } > > It's part of the ES6 standard. > > http://2ality.com/2015/01/es6-destructuring.html#simulating-named-parameters-... > > The idea is that a function will take a list of parameters that are strictly > required for it to do its job, and then a bunch of options. The list of options > can keep growing over time. Here they can be added in a backwards compatible way > without much fuss. This is a common pattern in JavaScript. Acknowledged.
370 {	644 {

371 // This function returns an edit operation (one of "substitute", "delete",	645 let unlessDomain = exceptionDomains.size > 0 ? [] : null;

372 // and "insert") along with an index in the source string where the edit	646

373 // should occur in order to arrive at the target string.	647 exceptionDomains.forEach(name =>

374	648 {

	649 // For domain-specific filters, include the exception domains only if

	650 // they're subdomains of the given domain.

	651 if (!domain \|\| name.substr(-domain.length - 1) == "." + domain)

	652 unlessDomain.push("*" + name);

	653 });

	654

	655 while (selectors.length)

	656 {

	657 let selector = selectors.splice(0, selectorLimit).join(", ");

	658

	659 // As of Safari 9.0 element IDs are matched as lowercase. We work around

	660 // this by converting to the attribute format [id="elementID"]

	661 selector = convertIDSelectorsToAttributeSelectors(selector);

	662

	663 let rule = {

	664 trigger: {"url-filter": matchDomain(domain),

	665 "url-filter-is-case-sensitive": true},

	666 action: {type: "css-display-none",

	667 selector: selector}

	668 };

	669

	670 if (unlessDomain)

	671 rule.trigger["unless-domain"] = unlessDomain;

	672

	673 rules.push(rule);

	674 }

	675 }

	676

	677 /**

	678 * Check if two strings are a close match

	679 *

	680 * This function returns an edit operation, one of "substitute", "delete", and

	681 * "insert", along with an index in the source string where the edit must occur

	682 * in order to arrive at the target string. If the strings are not a close

	683 * match, it returns null.

	684 *

	685 * Two strings are considered to be a close match if they are one edit

	686 * operation apart.

	687 *

	688 * Deletions or insertions of a contiguous range of characters from one string

	689 * into the other, at the same index, are treated as a single edit. For

	690 * example, "internal" and "international" are considered to be one edit apart

	691 * and therefore a close match.

	692 *

	693 * A few things to note:

	694 *

	695 * 1) This function does not care about the format of the input strings. For

	696 * example, the caller may pass in regular expressions, where "[ab]" and

	697 * "[bc]" could be considered to be a close match, since the order within the

	698 * brackets doesn't matter. This function will still return null for this set

	699 * of inputs since they are two edits apart.

	700 *

	701 * 2) To be friendly to calling code that might be passing in regular

	702 * expressions, this function will simply return null if it encounters a

	703 * special character (e.g. "\", "?", "+", etc.) in the delta. For example,

	704 * given "Hello" and "Hello, how are you?", it will return null.

	705 *

	706 * 3) If the caller does indeed pass in regular expressions, it must make the

	707 * important assumption that the parts where two such regular expressions may

	708 * differ can always be treated as normal strings. For example,

	709 * "^https?://example.com/ads" and "^https?://example.com/adv" differ only in

	710 * the last character, therefore the regular expressions can safely be merged

	711 * into "^https?://example.com/ad[sv]".

	712 *

	713 * @param {string} s The source string

	714 * @param {string} t The target string

	715 *

	716 * @returns {object} An object describing the single edit operation that must

	717 * occur in the source string in order to arrive at the

	718 * target string

	719 */

	720 function closeMatch(s, t)

	721 {

375 let diff = s.length - t.length;	722 let diff = s.length - t.length;

376

377 // If the string lenghts differ by more than one character, we cannot arrive
kzar 2017/05/03 11:17:24 Nit: Typo "lenghts". Nit: Typo "lenghts". Manish Jethani 2017/05/04 02:49:32 Done. Show quoted text On 2017/05/03 11:17:24, kzar wrote: > Nit: Typo "lenghts". Done.
378 // at target from source in a single edit operation.

379 if (!multi && (diff < -1 \|\| diff > 1))

380 return null;

381	723

382 // If target is longer than source, swap them for the purpose of our	724 // If target is longer than source, swap them for the purpose of our

383 // calculation.	725 // calculation.

384 if (diff < 0)	726 if (diff < 0)

385 {	727 {

386 let tmp = s;	728 let tmp = s;

387 s = t;	729 s = t;

388 t = tmp;	730 t = tmp;

389 }	731 }

390	732

391 let edit = null;	733 let edit = null;

392 let multiEdit = false;	734

393	735 let i = 0;

394 let j = 0;	736 let j = 0;

395	737

396 for (let i = 0; i < s.length; i++)	738 // Start from the beginning and keep going until we hit a character that

397 {	739 // doesn't match.

398 if (s[i] == t[j])	740 for (; i < s.length; i++)

399 {	741 {

400 j++;	742 if (s[i] != t[i])

401	743 break;

402 if (edit && multiEdit && !edit.closeIndex)	744 }

403 edit.closeIndex = i;	745

404 }	746 // Now do exactly the same from the end, but also stop if we reach the

405 else if (edit && (!multi \|\| diff == 0 \|\| edit.closeIndex))	747 // position where we terminated the previous loop.

406 {	748 for (; j < t.length; j++)

407 // Since we want one and only one edit operation, we must bail here.	749 {

	750 if (t.length - j == i \|\| s[s.length - j - 1] != t[t.length - j - 1])

	751 break;

	752 }

	753

	754 if (diff == 0)

	755 {

	756 // If the strings are equal in length and the delta isn't exactly one

	757 // character, it's not a close match.

	758 if (t.length - j - i != 1)

408 return null;	759 return null;

409 }	760 }

410 else if ((s[i] == "." \|\| s[i] == "+" \|\| s[i] == "$" \|\| s[i] == "?" \|\|	761 else if (i != t.length - j)

411 s[i] == "{" \|\| s[i] == "}" \|\| s[i] == "(" \|\| s[i] == ")" \|\|	762 {

412 s[i] == "[" \|\| s[i] == "]" \|\| s[i] == "\\") \|\|	763 // For strings of unequal length, if we haven't found a match for every

413 (t[j] == "." \|\| t[j] == "+" \|\| t[j] == "$" \|\| t[j] == "?" \|\|	764 // single character in the shorter string counting from both the beginning

414 t[j] == "{" \|\| t[j] == "}" \|\| t[j] == "(" \|\| t[j] == ")" \|\|	765 // and the end, it's not a close match.

415 t[j] == "[" \|\| t[j] == "]" \|\| t[j] == "\\"))	766 return null;

416 {	767 }

417 // We don't deal with special characters for now.	768
kzar 2017/05/03 11:17:24 So we skip special characters in the url-filter re So we skip special characters in the url-filter regexp, but I wonder if that's enough to avoid messing up regexps. For example s = "[ab]" b = "[bc]" Wouldn't we end up with "[abc]"? I'm not sure if things like that are a problem in practice though, what do you think? Manish Jethani 2017/05/03 14:41:54 The above is not a good example because this is no Show quoted text On 2017/05/03 11:17:24, kzar wrote: > So we skip special characters in the url-filter regexp, but I wonder if that's > enough to avoid messing up regexps. For example > > s = "[ab]" > b = "[bc]" > > Wouldn't we end up with "[abc]"? The above is not a good example because this is not even supported. The "edit distance" (Levenshtein distance) between the two is greater than 1. It's going to treat the above two inputs as "not close matches". However let's take this one: "[ab]" and "[ac]". It's going to mess up and give you "[a[bc]]". I thought of supporting this, but then I decided against it after looking at the actual values for the URL filters. There is not a single instance of this case. The code right now makes the assumption that this never occurs in the input. The ignoring of special characters is more for when the character appears as a literal in the original filter. For example, "foo.com/?ad" becomes "^https?://foo\.com/\?ad", and we just don't want to deal with the "?" because it is escaped (which means we really have to consider the preceding backslash with it, and that would complicate things). kzar 2017/05/03 15:19:04 Maybe add a comment explaining that assumption? Show quoted text On 2017/05/03 14:41:54, Manish Jethani wrote: > However let's take this one: "[ab]" and "[ac]". It's going to mess up and give > you "[a[bc]]". > > I thought of supporting this, but then I decided against it after looking at the > actual values for the URL filters. There is not a single instance of this case. > The code right now makes the assumption that this never occurs in the input. Maybe add a comment explaining that assumption? Manish Jethani 2017/05/04 02:49:32 Added a comment to explain this. Show quoted text On 2017/05/03 15:19:04, kzar wrote: > On 2017/05/03 14:41:54, Manish Jethani wrote: > > However let's take this one: "[ab]" and "[ac]". It's going to mess up and give > > you "[a[bc]]". > > > > I thought of supporting this, but then I decided against it after looking at > the > > actual values for the URL filters. There is not a single instance of this > case. > > The code right now makes the assumption that this never occurs in the input. > > Maybe add a comment explaining that assumption? > Added a comment to explain this.
	769 for (let k = i; k < s.length - j; k++)

	770 {

	771 // If the delta contains any special characters, it's not a close match.

	772 if (s[k] == "." \|\| s[k] == "+" \|\| s[k] == "$" \|\| s[k] == "?" \|\|

	773 s[k] == "{" \|\| s[k] == "}" \|\| s[k] == "(" \|\| s[k] == ")" \|\|

	774 s[k] == "[" \|\| s[k] == "]" \|\| s[k] == "\\")

418 return null;	775 return null;

419 }	776 }

420 else	777

421 {	778 if (diff == 0)

422 if (diff == 0)	779 {
kzar 2017/05/03 11:17:24 Nit: Couldn't this be an `else if` too? Nit: Couldn't this be an `else if` too? Manish Jethani 2017/05/04 02:49:32 Done. Show quoted text On 2017/05/03 11:17:24, kzar wrote: > Nit: Couldn't this be an `else if` too? Done.
423 {	780 edit = {type: "substitute", index: i};

424 // If both strings are equal in length, this is a substitution.	781 }

425 edit = {type: "substitute", index: i};	782 else if (diff > 0)

426 j++;	783 {

	784 edit = {type: "delete", index: i};

	785

	786 if (diff > 1)

	787 edit.endIndex = s.length - j;

	788 }

	789 else

	790 {

	791 edit = {type: "insert", index: i};

	792

	793 if (diff < -1)

	794 edit.endIndex = s.length - j;

	795 }

	796

	797 return edit;

	798 }

	799

	800 function eliminateRedundantRulesByURLFilter(rulesInfo, exhaustive)

	801 {

	802 const heuristicRange = 1000;

	803

	804 let ol = rulesInfo.length;

	805

	806 // Throw out obviously redundant rules.

	807 return async(rulesInfo, (ruleInfo, index) => () =>

	808 {

	809 // If this rule is already marked as redundant, don't bother comparing it

	810 // with other rules.

	811 if (rulesInfo[index].redundant)

	812 return;

	813

	814 let limit = exhaustive ? rulesInfo.length :

	815 Math.min(index + heuristicRange, rulesInfo.length);

	816

	817 for (let i = index, j = i + 1; j < limit; j++)

	818 {

	819 if (rulesInfo[j].redundant)

	820 continue;

	821

	822 let source = rulesInfo[i].rule.trigger["url-filter"];

	823 let target = rulesInfo[j].rule.trigger["url-filter"];

	824

	825 if (source.length >= target.length)

	826 {

	827 // If one URL filter is a substring of the other starting at the

	828 // beginning, the other one is clearly redundant.

	829 if (source.substring(0, target.length) == target)

	830 {

	831 rulesInfo[i].redundant = true;

	832 break;

	833 }

	834 }

	835 else if (target.substring(0, source.length) == source)

	836 {

	837 rulesInfo[j].redundant = true;

	838 }

	839 }

	840 })

	841 .then(() => rulesInfo.filter(ruleInfo => !ruleInfo.redundant));

	842 }

	843

	844 function findMatchesForRuleByURLFilter(rulesInfo, index, exhaustive)

	845 {

	846 // Closely matching rules are likely to be within a certain range. We only

	847 // look for matches within this range by default. If we increase this value,

	848 // it can give us more matches and a smaller resulting rule set, but possibly

	849 // at a significant performance cost.

	850 //

	851 // If the exhaustive option is true, we simply ignore this value and look for

	852 // matches throughout the rule set.

	853 const heuristicRange = 1000;

	854

	855 let limit = exhaustive ? rulesInfo.length :

	856 Math.min(index + heuristicRange, rulesInfo.length);

	857

	858 for (let i = index, j = i + 1; j < limit; j++)

	859 {

	860 let source = rulesInfo[i].rule.trigger["url-filter"];

	861 let target = rulesInfo[j].rule.trigger["url-filter"];

	862

	863 let edit = closeMatch(source, target);

	864

	865 if (edit)

	866 {

	867 let urlFilter, ruleInfo, match = {edit};

	868

	869 if (edit.type == "insert")

	870 {

	871 // Convert the insertion into a deletion and stick it on the target

	872 // rule instead. We can only group deletions and substitutions;

	873 // therefore insertions must be treated as deletions on the target

	874 // rule.

	875 urlFilter = target;

	876 ruleInfo = rulesInfo[j];

	877 match.index = i;

	878 edit.type = "delete";

427 }	879 }

428 else	880 else

429 {	881 {

430 if (edit)	882 urlFilter = source;

431 multiEdit = true;	883 ruleInfo = rulesInfo[i];

432 else if (diff > 0)	884 match.index = j;
kzar 2017/05/03 11:17:24 Nit: Please use braces since the clause spans mult Nit: Please use braces since the clause spans multiple lines. Manish Jethani 2017/05/04 02:49:32 Done. Show quoted text On 2017/05/03 11:17:24, kzar wrote: > Nit: Please use braces since the clause spans multiple lines. Done.
433 // If the source string is longer, this is a deletion.	885 }

434 edit = {type: "delete", index: i};	886

	887 // If the edit has an end index, it represents a multiple character

	888 // edit.

	889 let multiEdit = !!edit.endIndex;

	890

	891 if (multiEdit)

	892 {

	893 // We only care about a single multiple character edit because the

	894 // number of characters for such a match doesn't matter, we can

	895 // only merge with one other rule.

	896 if (!ruleInfo.multiEditMatch)

	897 ruleInfo.multiEditMatch = match;

	898 }

	899 else

	900 {

	901 // For single character edits, multiple rules can be merged into

	902 // one. e.g. "ad", "ads", and "adv" can be merged into "ad[sv]?".

	903 if (!ruleInfo.matches)

	904 ruleInfo.matches = new Array(urlFilter.length);

	905

	906 // Matches at a particular index. For example, for a source string

	907 // "ads", both target strings "ad" (deletion) and "adv"

	908 // (substitution) match at index 2, hence they are grouped together

	909 // to possibly be merged later into "ad[sv]?".

	910 let matchesForIndex = ruleInfo.matches[edit.index];

	911

	912 if (matchesForIndex)

	913 {

	914 matchesForIndex.push(match);

	915 }

435 else	916 else

436 edit = {type: "insert", index: i};	917 {

	918 matchesForIndex = [match];

	919 ruleInfo.matches[edit.index] = matchesForIndex;

	920 }

	921

	922 // Keep track of the best set of matches. We later sort by this to

	923 // get best results.

	924 if (!ruleInfo.bestMatches \|\|

	925 matchesForIndex.length > ruleInfo.bestMatches.length)

	926 ruleInfo.bestMatches = matchesForIndex;

437 }	927 }

438 }	928 }

439 }	929 }

440	930 }

441 if (edit && multiEdit && !edit.closeIndex)	931

442 {	932 function mergeCandidateRulesByURLFilter(rulesInfo)

443 if (j < t.length)	933 {

444 return null;	934 // Filter out rules that have no matches at all.

445	935 let candidateRulesInfo = rulesInfo.filter(ruleInfo =>

446 edit.closeIndex = s.length;	936 {

447 }	937 return ruleInfo.bestMatches \|\| ruleInfo.multiEditMatch

448

449 return edit;

450 }

451

452 function ruleWithoutURLFilter(rule)

453 {

454 let copy = {
kzar 2017/05/03 15:19:04 How about `return Object.create(rule, {"url-filter How about `return Object.create(rule, {"url-filter": {value: undefined}});` Manish Jethani 2017/05/04 02:49:31 That would not work for multiple reasons, but most Show quoted text On 2017/05/03 15:19:04, kzar wrote: > How about `return Object.create(rule, {"url-filter": {value: undefined}});` That would not work for multiple reasons, but most notably because "url-filter" is a property of rule.trigger, not rule itself. The other reason is that an object created that way has no own, enumerable properties, therefore the stringified version is "{}".
455 trigger: Object.assign({}, rule.trigger),

456 action: Object.assign({}, rule.action)

457 };

458

459 delete copy.trigger["url-filter"];

460

461 return copy;

462 }

463

464 function mergeCloselyMatchingRules(rules, {multi = false} = {})

465 {

466 // Closely matching rules are likely to be within a certain range. We only

467 // look for matches within this range. If we increase this value, it can give

468 // us more matches and a smaller resulting rule set, but possibly at a

469 // significant performance cost.

470 const heuristicRange = 100;
kzar 2017/05/03 15:19:04 Since the code either runs in a place where speed Since the code either runs in a place where speed really matters, or where it doesn't matter at all, perhaps we should change this and the multi option? I was thinking "multi" could be renamed "slow" or something like that. If slow's true then match multiple character changes and have no heuristicRange limit. We could perhaps use the option to make other decisions in the future too, if we think of other ways to process the rules. Manish Jethani 2017/05/04 02:49:32 In the latest update the generateRules function ta Show quoted text On 2017/05/03 15:19:04, kzar wrote: > Since the code either runs in a place where speed really matters, or where it > doesn't matter at all, perhaps we should change this and the multi option? > > I was thinking "multi" could be renamed "slow" or something like that. If slow's > true then match multiple character changes and have no heuristicRange limit. We > could perhaps use the option to make other decisions in the future too, if we > think of other ways to process the rules. In the latest update the generateRules function takes both general options like fastMerge and more specific options like advancedMerge (experimental or slow stuff) and exhaustiveMerge (test every rule against every other rule). If fast is true, it'll skip both these; if set to false, it'll be as experimental and as slow as it can (but you can still turn off individual settings).
471

472 let rulesInfo = new Array(rules.length);

473

474 rules.forEach((rule, index) =>

475 {

476 rulesInfo[index] = {rule};
kzar 2017/05/03 11:17:24 I'm not sure syntax like this will work for Safari I'm not sure syntax like this will work for Safari 9 :( Manish Jethani 2017/05/03 14:41:54 I'll check, but if it doesn't work then I'll have Show quoted text On 2017/05/03 11:17:24, kzar wrote: > I'm not sure syntax like this will work for Safari 9 :( I'll check, but if it doesn't work then I'll have to change this in a few places. kzar 2017/05/08 08:13:02 You mentioned testing the code on Safari now, but Show quoted text On 2017/05/03 14:41:54, Manish Jethani wrote: > On 2017/05/03 11:17:24, kzar wrote: > > I'm not sure syntax like this will work for Safari 9 :( > > I'll check, but if it doesn't work then I'll have to change this > in a few places. You mentioned testing the code on Safari now, but unless I missed it not the version. Which version did you test with? Manish Jethani 2017/05/08 14:03:58 I've been testing with Safari 10. Anyway, this is Show quoted text On 2017/05/08 08:13:02, kzar wrote: > On 2017/05/03 14:41:54, Manish Jethani wrote: > > On 2017/05/03 11:17:24, kzar wrote: > > > I'm not sure syntax like this will work for Safari 9 :( > > > > I'll check, but if it doesn't work then I'll have to change this > > in a few places. > > You mentioned testing the code on Safari now, but unless I missed it not the > version. Which version did you test with? I've been testing with Safari 10. Anyway, this is not a problem because JS Hydra transpiles {rule} into {rule: rule}. kzar 2017/05/09 10:05:46 I think you should test with Safari 9 at least onc Show quoted text On 2017/05/08 14:03:58, Manish Jethani wrote: > On 2017/05/08 08:13:02, kzar wrote: > > On 2017/05/03 14:41:54, Manish Jethani wrote: > > > On 2017/05/03 11:17:24, kzar wrote: > > > > I'm not sure syntax like this will work for Safari 9 :( > > > > > > I'll check, but if it doesn't work then I'll have to change this > > > in a few places. > > > > You mentioned testing the code on Safari now, but unless I missed it not the > > version. Which version did you test with? > > I've been testing with Safari 10. > > Anyway, this is not a problem because JS Hydra transpiles {rule} into {rule: > rule}. I think you should test with Safari 9 at least once to make sure it still works. If you don't have a access to a copy you can us our test machines from https://app.webmate.io/ Manish Jethani 2017/05/09 15:52:46 "{rule: rule}" ought to work in every single JS en Show quoted text On 2017/05/09 10:05:46, kzar wrote: > On 2017/05/08 14:03:58, Manish Jethani wrote: > > On 2017/05/08 08:13:02, kzar wrote: > > > On 2017/05/03 14:41:54, Manish Jethani wrote: > > > > On 2017/05/03 11:17:24, kzar wrote: > > > > > I'm not sure syntax like this will work for Safari 9 :( > > > > > > > > I'll check, but if it doesn't work then I'll have to change this > > > > in a few places. > > > > > > You mentioned testing the code on Safari now, but unless I missed it not the > > > version. Which version did you test with? > > > > I've been testing with Safari 10. > > > > Anyway, this is not a problem because JS Hydra transpiles {rule} into {rule: > > rule}. > > I think you should test with Safari 9 at least once to make sure it still works. > If you don't have a access to a copy you can us our test machines from > https://app.webmate.io/ "{rule: rule}" ought to work in every single JS engine in existence. It makes sense to test everything out in Safari 9 though, I'll do that and get back with the results.
477

478 if (rule.action.type == "ignore-previous-rules")

479 {

480 rulesInfo[index].skip = true;

481 }

482 else

483 {

484 // Save a stringified version of the rule, but without the URL filter. We

485 // use this for comparison later.

486 rulesInfo[index].stringifiedWithoutURLFilter =

487 JSON.stringify(ruleWithoutURLFilter(rule));

488 }

489 });	938 });

490	939

491 for (let i = 0; i < rules.length; i++)	940 // For best results, we have to sort the candidates by the largest set of

492 {	941 // matches.

493 if (rulesInfo[i].skip)	942 //

	943 // For example, we want "ads", "bds", "adv", "bdv", "adx", and "bdx" to

	944 // generate "ad[svx]" and "bd[svx]" (2 rules), not "[ab]ds", "[ab]dv", and

	945 // "[ab]dx" (3 rules).

	946 candidateRulesInfo.sort((ruleInfo1, ruleInfo2) =>

	947 {

	948 let weight1 = ruleInfo1.bestMatches ? ruleInfo1.bestMatches.length :

	949 ruleInfo1.multiEditMatch ? 1 : 0;

	950 let weight2 = ruleInfo2.bestMatches ? ruleInfo2.bestMatches.length :

	951 ruleInfo2.multiEditMatch ? 1 : 0;

	952

	953 return weight2 - weight1;

	954 });

	955

	956 for (let ruleInfo of candidateRulesInfo)

	957 {

	958 let rule = ruleInfo.rule;

	959

	960 // If this rule has already been merged into another rule, we skip it.

	961 if (ruleInfo.merged)

494 continue;	962 continue;

495	963

496 for (let j = i + 1; j < i + heuristicRange && j < rules.length; j++)	964 // Find the best set of rules to group, which is simply the largest set.

497 {	965 let best = (ruleInfo.matches \|\| []).reduce((best, matchesForIndex) =>

498 if (rulesInfo[j].skip)	966 {

499 continue;	967 matchesForIndex = (matchesForIndex \|\| []).filter(match =>

500	968 {

501 // Check if the rules are identical except for the URL filter.	969 // Filter out rules that have either already been merged into other

502 if (rulesInfo[i].stringifiedWithoutURLFilter ==	970 // rules or have had other rules merged into them.
kzar 2017/05/03 15:19:04 I wonder if we could create a lookup table stringi I wonder if we could create a lookup table stringifiedWithoutURLFilter => [ruleInfo] instead? Manish Jethani 2017/05/04 02:49:32 I'm not sure what the benefit of that would be. W Show quoted text On 2017/05/03 15:19:04, kzar wrote: > I wonder if we could create a lookup table stringifiedWithoutURLFilter => > [ruleInfo] instead? I'm not sure what the benefit of that would be. We have two ruleInfo objects, and we want to find out if they're identical except for the URL filter. Now it's as simple as a string comparison. I actually tried the lookup table approach (we don't even need to store an array there, just an empty object), it is significantly slower. But you give me a good idea: compare hashes instead. I just take the first 32 bits of the SHA-1 hash of the string, and it makes it more than 10% faster.
503 rulesInfo[j].stringifiedWithoutURLFilter)	971 return !rulesInfo[match.index].merged &&

504 {	972 !rulesInfo[match.index].mergedInto;

505 let source = rules[i].trigger["url-filter"];	973 });

506 let target = rules[j].trigger["url-filter"];	974

507	975 return matchesForIndex.length > best.length ? matchesForIndex : best;

508 let edit = closeMatch(source, target, {multi});	976 },

509	977 []);

510 if (edit)	978

	979 let multiEdit = false;

	980

	981 // If we couldn't find a single rule to merge with, let's see if we have a

	982 // multiple character edit. e.g. we could merge "ad" and "adserver" into

	983 // "ad(server)?".

	984 if (best.length == 0 && ruleInfo.multiEditMatch &&

	985 !rulesInfo[ruleInfo.multiEditMatch.index].merged &&

	986 !rulesInfo[ruleInfo.multiEditMatch.index].mergedInto)

	987 {

	988 best = [ruleInfo.multiEditMatch];

	989 multiEdit = true;

	990 }

	991

	992 if (best.length > 0)

	993 {

	994 let urlFilter = rule.trigger["url-filter"];

	995

	996 let editIndex = best[0].edit.index;

	997

	998 if (!multiEdit)

	999 {

	1000 // Merge all the matching rules into this one.

	1001

	1002 let characters = [urlFilter[editIndex]];

	1003 let quantifier = "";

	1004

	1005 for (let match of best)

511 {	1006 {

512 let urlFilter, ruleInfo, match = {edit};	1007 if (match.edit.type == "delete")

513

514 if (edit.type == "insert")

515 {	1008 {

516 // Convert the insertion into a deletion and stick it on the target	1009 quantifier = "?";

517 // rule instead. We can only group deletions and substitutions;

518 // therefore insertions must be treated as deletions on the target

519 // rule, to be dealt with later.

520 urlFilter = target;

521 ruleInfo = rulesInfo[j];

522 match.index = i;

523 edit.type = "delete";

524 }	1010 }

525 else	1011 else

526 {	1012 {

527 urlFilter = source;	1013 let character = rulesInfo[match.index].rule

528 ruleInfo = rulesInfo[i];	1014 .trigger["url-filter"][editIndex];

529 match.index = j;	1015

	1016 // Insert any hyphen at the beginning so it gets interpreted as a

	1017 // literal hyphen.

	1018 if (character == "-")

	1019 characters.unshift(character);

	1020 else

	1021 characters.push(character);

530 }	1022 }

531	1023

532 if (edit.closeIndex)	1024 // Mark the target rule as merged so other rules don't try to merge

533 {	1025 // it again.

534 if (!ruleInfo.multiEditMatch)	1026 rulesInfo[match.index].merged = true;

535 ruleInfo.multiEditMatch = match;

536 }

537 else

538 {

539 if (!ruleInfo.matches)

540 ruleInfo.matches = new Array(urlFilter.length + 1);

541

542 let matchesForIndex = ruleInfo.matches[edit.index];

543

544 if (matchesForIndex)

545 {

546 matchesForIndex.push(match);

547 }

548 else

549 {

550 matchesForIndex = [match];

551 ruleInfo.matches[edit.index] = matchesForIndex;

552 }

553

554 if (!ruleInfo.bestMatches \|\|

555 matchesForIndex.length > ruleInfo.bestMatches.length)

556 ruleInfo.bestMatches = matchesForIndex;

557 }

558 }

559 }

560 }

561 }

562

563 let candidateRulesInfo = rulesInfo.filter(ruleInfo => ruleInfo.bestMatches \|\|

564 ruleInfo.multiEditMatch) ;
kzar 2017/05/03 11:17:24 Nit: Long line. Nit: Long line. Manish Jethani 2017/05/04 02:49:31 Done. Show quoted text On 2017/05/03 11:17:24, kzar wrote: > Nit: Long line. Done.
565

566 // For best results, we have to sort the candidates by the number of matches.

567 // For example, we want "ads", "bds", "adv", "bdv", and "bdx" to generate

568 // "ad[sv]" and "bd[svx]" (2 rules), not "[ab]ds", "[ab]dv", and "bdx" (3

569 // rules).

570 candidateRulesInfo.sort((ruleInfo1, ruleInfo2) =>

571 {

572 let weight1 = 1;

573 let weight2 = 1;

574

575 if (ruleInfo1.bestMatches)

576 weight1 = ruleInfo1.bestMatches.length;

577

578 if (ruleInfo2.bestMatches)

579 weight2 = ruleInfo2.bestMatches.length;

580

581 return weight2 - weight1;

582 });

583

584 for (let ruleInfo of candidateRulesInfo)

585 {

586 let rule = ruleInfo.rule;

587

588 if (rule._merged)

589 continue;

590

591 // Find the best set of rules to group, which is simply the largest set.

592 let best = (ruleInfo.matches \|\| []).reduce((best, matchesForIndex) =>

593 {

594 matchesForIndex = (matchesForIndex \|\| []).filter(match =>

595 {

596 // Filter out rules that have either already been merged into other

597 // rules or have had other rules merged into them.

598 return !rules[match.index]._merged &&

599 !rulesInfo[match.index].mergedInto;

600 });

601

602 return matchesForIndex.length > best.length ? matchesForIndex : best;

603 },

604 []);

605

606 if (best.length == 0 && ruleInfo.multiEditMatch &&

607 !rules[ruleInfo.multiEditMatch.index]._merged &&

608 !rulesInfo[ruleInfo.multiEditMatch.index].mergedInto)

609 best = [ruleInfo.multiEditMatch];

610

611 if (best.length > 0)

612 {

613 let urlFilter = rule.trigger["url-filter"];

614

615 let editIndex = best[0].edit.index;

616

617 if (best[0] != ruleInfo.multiEditMatch)

618 {

619 // Merge all the matching rules into this one.

620

621 let characters = [];

622 let quantifier = "";

623

624 for (let match of best)

625 {

626 if (match.edit.type == "delete")

627 quantifier = "?";

628 else

629 characters.push(rules[match.index].trigger["url-filter"][editIndex]) ;

630

631 rules[match.index]._merged = true;

632 }	1027 }

633	1028

634 urlFilter = urlFilter.substring(0, editIndex + 1) + quantifier +	1029 urlFilter = urlFilter.substring(0, editIndex + 1) + quantifier +

635 urlFilter.substring(editIndex + 1);	1030 urlFilter.substring(editIndex + 1);

636 if (characters.length > 0)	1031 if (characters.length > 1)

637 {	1032 {

638 urlFilter = urlFilter.substring(0, editIndex) + "[" +	1033 urlFilter = urlFilter.substring(0, editIndex) + "[" +

639 urlFilter[editIndex] + characters.join("") + "]" +	1034 characters.join("") + "]" +

640 urlFilter.substring(editIndex + 1);	1035 urlFilter.substring(editIndex + 1);

641 }	1036 }

642 }	1037 }

643 else	1038 else

644 {	1039 {

645 let editCloseIndex = best[0].edit.closeIndex;	1040 let editEndIndex = best[0].edit.endIndex;

646	1041

647 rules[best[0].index]._merged = true;	1042 // Mark the target rule as merged so other rules don't try to merge it

	1043 // again.

	1044 rulesInfo[best[0].index].merged = true;

648	1045

649 urlFilter = urlFilter.substring(0, editIndex) + "(" +	1046 urlFilter = urlFilter.substring(0, editIndex) + "(" +

650 urlFilter.substring(editIndex, editCloseIndex) + ")?" +	1047 urlFilter.substring(editIndex, editEndIndex) + ")?" +

651 urlFilter.substring(editCloseIndex);	1048 urlFilter.substring(editEndIndex);

652 }	1049 }

653	1050

654 rule.trigger["url-filter"] = urlFilter;	1051 rule.trigger["url-filter"] = urlFilter;

655	1052

	1053 // Mark this rule as one that has had other rules merged into it.

656 ruleInfo.mergedInto = true;	1054 ruleInfo.mergedInto = true;

657 }	1055 }

658 }	1056 }

659	1057 }

660 return rules.filter(rule => !rule._merged);	1058

	1059 function mergeRulesByURLFilter(rulesInfo, exhaustive)

	1060 {

	1061 return async(rulesInfo, (ruleInfo, index) => () =>

	1062 findMatchesForRuleByURLFilter(rulesInfo, index, exhaustive)

	1063 )

	1064 .then(() => mergeCandidateRulesByURLFilter(rulesInfo));

	1065 }

	1066

	1067 function mergeRulesByArrayProperty(rulesInfo, propertyType, property)

	1068 {

	1069 if (rulesInfo.length <= 1)

	1070 return;

	1071

	1072 let valueSet = new Set(rulesInfo[0].rule[propertyType][property]);

	1073

	1074 for (let i = 1; i < rulesInfo.length; i++)

	1075 {

	1076 for (let value of rulesInfo[i].rule[propertyType][property] \|\| [])

	1077 valueSet.add(value);

	1078

	1079 rulesInfo[i].merged = true;

	1080 }

	1081

	1082 if (valueSet.size > 0)

	1083 rulesInfo[0].rule[propertyType][property] = Array.from(valueSet);

	1084

	1085 rulesInfo[0].mergedInto = true;

	1086 }

	1087

	1088 function groupRulesByMergeableProperty(rulesInfo, propertyType, property)

	1089 {

	1090 let mergeableRulesInfoByGroup = new Map();

	1091

	1092 for (let ruleInfo of rulesInfo)

	1093 {

	1094 let copy = {

	1095 trigger: Object.assign({}, ruleInfo.rule.trigger),

	1096 action: Object.assign({}, ruleInfo.rule.action)

	1097 };

	1098

	1099 delete copy[propertyType][property];

	1100

	1101 let groupKey = JSON.stringify(copy);

	1102

	1103 let mergeableRulesInfo = mergeableRulesInfoByGroup.get(groupKey);

	1104

	1105 if (mergeableRulesInfo)

	1106 mergeableRulesInfo.push(ruleInfo);

	1107 else

	1108 mergeableRulesInfoByGroup.set(groupKey, [ruleInfo]);

	1109 }

	1110

	1111 return mergeableRulesInfoByGroup;

	1112 }

	1113

	1114 function mergeRules(rules, exhaustive)

	1115 {

	1116 let rulesInfo = rules.map(rule => ({rule}));

	1117

	1118 let arrayPropertiesToMergeBy = ["resource-type", "if-domain"];

	1119

	1120 return async(() =>

	1121 {

	1122 let map = groupRulesByMergeableProperty(rulesInfo, "trigger", "url-filter");

	1123 return async(map.values(), mergeableRulesInfo => () =>

	1124 eliminateRedundantRulesByURLFilter(mergeableRulesInfo, exhaustive)

	1125 .then(rulesInfo => mergeRulesByURLFilter(rulesInfo, exhaustive))

	1126 )

	1127 .then(() =>

	1128 {

	1129 // Filter out rules that are redundant or have been merged into other

	1130 // rules.

	1131 rulesInfo = rulesInfo.filter(ruleInfo => !ruleInfo.redundant &&

	1132 !ruleInfo.merged);

	1133 });

	1134 })

	1135 .then(() => async(arrayPropertiesToMergeBy, arrayProperty => () =>

	1136 {

	1137 let map = groupRulesByMergeableProperty(rulesInfo, "trigger",

	1138 arrayProperty);

	1139 return async(map.values(), mergeableRulesInfo => () =>

	1140 mergeRulesByArrayProperty(mergeableRulesInfo, "trigger", arrayProperty)

	1141 )

	1142 .then(() =>

	1143 {

	1144 rulesInfo = rulesInfo.filter(ruleInfo => !ruleInfo.merged);

	1145 });

	1146 }))

	1147 .then(() => rulesInfo.map(ruleInfo => ruleInfo.rule));

661 }	1148 }

662	1149

663 let ContentBlockerList =	1150 let ContentBlockerList =

664 /**	1151 /**

665 * Create a new Adblock Plus filter to content blocker list converter	1152 * Create a new Adblock Plus filter to content blocker list converter

666 *	1153 *

	1154 * @param {object} options Options for content blocker list generation

	1155 *

667 * @constructor	1156 * @constructor

668 */	1157 */

669 exports.ContentBlockerList = function ()	1158 exports.ContentBlockerList = function (options)

670 {	1159 {

	1160 const defaultOptions = {

	1161 merge: "auto"

	1162 };

	1163

	1164 this.options = Object.assign({}, defaultOptions, options);

	1165

671 this.requestFilters = [];	1166 this.requestFilters = [];

672 this.requestExceptions = [];	1167 this.requestExceptions = [];

673 this.elemhideFilters = [];	1168 this.elemhideFilters = [];

674 this.elemhideExceptions = [];	1169 this.elemhideExceptions = [];

	1170 this.genericblockExceptions = [];

	1171 this.generichideExceptions = [];

675 this.elemhideSelectorExceptions = new Map();	1172 this.elemhideSelectorExceptions = new Map();

676 };	1173 };

677	1174

678 /**	1175 /**

679 * Add Adblock Plus filter to be converted	1176 * Add Adblock Plus filter to be converted

680 *	1177 *

681 * @param {Filter} filter Filter to convert	1178 * @param {Filter} filter Filter to convert

682 */	1179 */

683 ContentBlockerList.prototype.addFilter = function(filter)	1180 ContentBlockerList.prototype.addFilter = function(filter)

684 {	1181 {

685 if (filter.sitekeys)	1182 if (filter.sitekeys)

686 return;	1183 return;

687 if (filter instanceof filterClasses.RegExpFilter &&	1184 if (filter instanceof filterClasses.RegExpFilter &&

688 filter.regexpSource == null)	1185 filter.regexpSource == null)

689 return;	1186 return;

690	1187

691 if (filter instanceof filterClasses.BlockingFilter)	1188 if (filter instanceof filterClasses.BlockingFilter)

692 this.requestFilters.push(filter);	1189 this.requestFilters.push(filter);

693	1190

694 if (filter instanceof filterClasses.WhitelistFilter)	1191 if (filter instanceof filterClasses.WhitelistFilter)

695 {	1192 {

696 if (filter.contentType & (typeMap.DOCUMENT \| whitelistableRequestTypes))	1193 if (filter.contentType & (typeMap.DOCUMENT \| whitelistableRequestTypes))

697 this.requestExceptions.push(filter);	1194 this.requestExceptions.push(filter);

698	1195

699 if (filter.contentType & typeMap.ELEMHIDE)	1196 if (filter.contentType & typeMap.GENERICBLOCK)

700 this.elemhideExceptions.push(filter);	1197 this.genericblockExceptions.push(filter);

	1198

	1199 if (filter.contentType & typeMap.ELEMHIDE)

	1200 this.elemhideExceptions.push(filter);

	1201 else if (filter.contentType & typeMap.GENERICHIDE)

	1202 this.generichideExceptions.push(filter);

701 }	1203 }

702	1204

703 if (filter instanceof filterClasses.ElemHideFilter)	1205 if (filter instanceof filterClasses.ElemHideFilter)

704 this.elemhideFilters.push(filter);	1206 this.elemhideFilters.push(filter);

705	1207

706 if (filter instanceof filterClasses.ElemHideException)	1208 if (filter instanceof filterClasses.ElemHideException)

707 {	1209 {

708 let domains = this.elemhideSelectorExceptions[filter.selector];	1210 let domains = this.elemhideSelectorExceptions[filter.selector];

709 if (!domains)	1211 if (!domains)

710 domains = this.elemhideSelectorExceptions[filter.selector] = [];	1212 domains = this.elemhideSelectorExceptions[filter.selector] = [];

711	1213

712 parseDomains(filter.domains, domains, []);	1214 parseDomains(filter.domains, domains, []);

713 }	1215 }

714 };	1216 };

715	1217

716 /**	1218 /**

717 * Generate content blocker list for all filters that were added	1219 * Generate content blocker list for all filters that were added

718 *

719 * @returns {Filter} filter Filter to convert

720 */	1220 */

721 ContentBlockerList.prototype.generateRules = function(	1221 ContentBlockerList.prototype.generateRules = function()

722 {merge = false, multiMerge = false} = {})	1222 {

723 {	1223 let cssRules = [];

724 let rules = [];	1224 let cssExceptionRules = [];

725	1225 let blockingRules = [];

	1226 let blockingExceptionRules = [];

	1227

	1228 let ruleGroups = [cssRules, cssExceptionRules,

	1229 blockingRules, blockingExceptionRules];

	1230

	1231 let genericSelectors = [];

726 let groupedElemhideFilters = new Map();	1232 let groupedElemhideFilters = new Map();

	1233

727 for (let filter of this.elemhideFilters)	1234 for (let filter of this.elemhideFilters)

728 {	1235 {

729 let result = convertElemHideFilter(filter, this.elemhideSelectorExceptions);	1236 let result = convertElemHideFilter(filter, this.elemhideSelectorExceptions);

730 if (!result)	1237 if (!result)

731 continue;	1238 continue;

732	1239

733 if (result.matchDomains.length == 0)	1240 if (result.matchDomains.length == 0)

734 result.matchDomains = ["^https?://"];	1241 {

735	1242 genericSelectors.push(result.selector);

736 for (let matchDomain of result.matchDomains)	1243 }

737 {	1244 else

738 let group = groupedElemhideFilters.get(matchDomain) \|\| [];	1245 {

739 group.push(result.selector);	1246 for (let matchDomain of result.matchDomains)

740 groupedElemhideFilters.set(matchDomain, group);	1247 {

741 }	1248 let group = groupedElemhideFilters.get(matchDomain) \|\| [];

742 }	1249 group.push(result.selector);

	1250 groupedElemhideFilters.set(matchDomain, group);

	1251 }

	1252 }

	1253 }

	1254

	1255 // Separate out the element hiding exceptions that have only a hostname part

	1256 // from the rest. This allows us to implement a workaround for issue #5345

	1257 // (WebKit bug #167423), but as a bonus it also reduces the number of

	1258 // generated rules. The downside is that the exception will only apply to the

	1259 // top-level document, not to iframes. We have to live with this until the

	1260 // WebKit bug is fixed in all supported versions of Safari.

	1261 // https://bugs.webkit.org/show_bug.cgi?id=167423

	1262 //

	1263 // Note that as a result of this workaround we end up with a huge rule set in

	1264 // terms of the amount of memory used. This can cause Node.js to throw

	1265 // "JavaScript heap out of memory". To avoid this, call Node.js with

	1266 // --max_old_space_size=4096

	1267 let elemhideExceptionDomains = extractFilterDomains(this.elemhideExceptions);

	1268

	1269 let genericSelectorExceptionDomains =

	1270 extractFilterDomains(this.generichideExceptions);

	1271 elemhideExceptionDomains.forEach(name =>

	1272 {

	1273 genericSelectorExceptionDomains.add(name);

	1274 });

	1275

	1276 addCSSRules(cssRules, genericSelectors, null,

	1277 genericSelectorExceptionDomains);

	1278

	1279 // Filter out whitelisted domains.

	1280 elemhideExceptionDomains.forEach(domain =>

	1281 groupedElemhideFilters.delete(domain));

743	1282

744 groupedElemhideFilters.forEach((selectors, matchDomain) =>	1283 groupedElemhideFilters.forEach((selectors, matchDomain) =>

745 {	1284 {

746 while (selectors.length)	1285 addCSSRules(cssRules, selectors, matchDomain, elemhideExceptionDomains);

747 {	1286 });

748 let selector = selectors.splice(0, selectorLimit).join(", ");	1287

749	1288 let requestFilterExceptionDomains = [];

750 // As of Safari 9.0 element IDs are matched as lowercase. We work around	1289 for (let filter of this.genericblockExceptions)

751 // this by converting to the attribute format [id="elementID"]	1290 {

752 selector = convertIDSelectorsToAttributeSelectors(selector);	1291 let parsed = parseFilterRegexpSource(filter.regexpSource);

753	1292 if (parsed.hostname)

754 rules.push({	1293 requestFilterExceptionDomains.push(parsed.hostname);

755 trigger: {"url-filter": matchDomain,	1294 }

756 "url-filter-is-case-sensitive": true},	1295

757 action: {type: "css-display-none",	1296 for (let filter of this.requestFilters)

758 selector: selector}	1297 {

	1298 convertFilterAddRules(blockingRules, filter, "block", true,

	1299 requestFilterExceptionDomains);

	1300 }

	1301

	1302 for (let filter of this.requestExceptions)

	1303 {

	1304 convertFilterAddRules(blockingExceptionRules, filter,

	1305 "ignore-previous-rules", true);

	1306 }

	1307

	1308 return async(ruleGroups, (group, index) => () =>

	1309 {

	1310 let next = () =>

	1311 {

	1312 if (index == ruleGroups.length - 1)

	1313 return ruleGroups.reduce((all, rules) => all.concat(rules), []);

	1314 };

	1315

	1316 if (this.options.merge == "all" \|\|

	1317 (this.options.merge == "auto" &&

	1318 ruleGroups.reduce((n, group) => n + group.length, 0) > 50000))

	1319 {

	1320 return mergeRules(ruleGroups[index], this.options.merge == "all")

	1321 .then(rules =>

	1322 {

	1323 ruleGroups[index] = rules;

	1324 return next();

759 });	1325 });

760 }	1326 }

	1327

	1328 return next();

761 });	1329 });

762

763 for (let filter of this.elemhideExceptions)

764 convertFilterAddRules(rules, filter, "ignore-previous-rules", false);

765 for (let filter of this.requestFilters)

766 convertFilterAddRules(rules, filter, "block", true);

767 for (let filter of this.requestExceptions)

768 convertFilterAddRules(rules, filter, "ignore-previous-rules", true);

769

770 rules = rules.filter(rule => !hasNonASCI(rule));

771

772 if (merge)

773 rules = mergeCloselyMatchingRules(rules, {multi: multiMerge});

774

775 return rules;

776 };	1330 };

LEFT	RIGHT