lib/abp2blocklist.js - Issue 29426594: Issue 3673 - Merge closely matching rules

Delta Between Two Patch Sets: lib/abp2blocklist.js

Issue 29426594: Issue 3673 - Merge closely matching rules (Closed) Base URL: https://hg.adblockplus.org/abp2blocklist

Left Patch Set: Remove redundant async Created May 24, 2017, 2:48 a.m.

Right Patch Set: Rebase Created July 28, 2017, 1:31 p.m.

Left:
Right:

Use n/p to move between diff chunks; N/P to move between comments.

Jump to:

Left: Side by side diff | Download
Right: Side by side diff | Download

LEFT	RIGHT
1 /*	1 /*

2 * This file is part of Adblock Plus <https://adblockplus.org/>,	2 * This file is part of Adblock Plus <https://adblockplus.org/>,

3 * Copyright (C) 2006-2017 eyeo GmbH	3 * Copyright (C) 2006-2017 eyeo GmbH

4 *	4 *

5 * Adblock Plus is free software: you can redistribute it and/or modify	5 * Adblock Plus is free software: you can redistribute it and/or modify

6 * it under the terms of the GNU General Public License version 3 as	6 * it under the terms of the GNU General Public License version 3 as

7 * published by the Free Software Foundation.	7 * published by the Free Software Foundation.

8 *	8 *

9 * Adblock Plus is distributed in the hope that it will be useful,	9 * Adblock Plus is distributed in the hope that it will be useful,

10 * but WITHOUT ANY WARRANTY; without even the implied warranty of	10 * but WITHOUT ANY WARRANTY; without even the implied warranty of

11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the	11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

12 * GNU General Public License for more details.	12 * GNU General Public License for more details.

13 *	13 *

14 * You should have received a copy of the GNU General Public License	14 * You should have received a copy of the GNU General Public License

15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.	15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.

16 */	16 */

17	17

18 /** @module abp2blocklist */	18 /** @module abp2blocklist */

19	19

20 "use strict";	20 "use strict";

21	21

22 let filterClasses = require("filterClasses");	22 let filterClasses = require("filterClasses");

23 let tldjs = require("tldjs");

24 let punycode = require("punycode");	23 let punycode = require("punycode");

25	24

26 const selectorLimit = 5000;	25 const selectorLimit = 5000;

27 const typeMap = filterClasses.RegExpFilter.typeMap;	26 const typeMap = filterClasses.RegExpFilter.typeMap;

28 const whitelistableRequestTypes = (typeMap.IMAGE	27

29 \| typeMap.STYLESHEET	28 const httpRequestTypes = typeMap.IMAGE \|

30 \| typeMap.SCRIPT	29 typeMap.STYLESHEET \|

31 \| typeMap.FONT	30 typeMap.SCRIPT \|

32 \| typeMap.MEDIA	31 typeMap.FONT \|

33 \| typeMap.POPUP	32 typeMap.MEDIA \|

34 \| typeMap.OBJECT	33 typeMap.POPUP \|

35 \| typeMap.OBJECT_SUBREQUEST	34 typeMap.OBJECT \|

36 \| typeMap.XMLHTTPREQUEST	35 typeMap.OBJECT_SUBREQUEST \|

37 \| typeMap.PING	36 typeMap.XMLHTTPREQUEST \|

38 \| typeMap.SUBDOCUMENT	37 typeMap.PING \|

39 \| typeMap.OTHER);	38 typeMap.SUBDOCUMENT \|

	39 typeMap.OTHER;

	40 const rawRequestTypes = typeMap.XMLHTTPREQUEST \|

	41 typeMap.WEBSOCKET \|

	42 typeMap.WEBRTC \|

	43 typeMap.OBJECT_SUBREQUEST \|

	44 typeMap.PING \|

	45 typeMap.OTHER;

	46 const whitelistableRequestTypes = httpRequestTypes \|

	47 typeMap.WEBSOCKET \|

	48 typeMap.WEBRTC;

40	49

41 function callLater(func)	50 function callLater(func)

42 {	51 {

43 return new Promise(resolve =>	52 return new Promise(resolve =>

44 {	53 {

45 let call = () => resolve(func());	54 let call = () => resolve(func());

46	55

47 // If this looks like Node.js, call process.nextTick, otherwise call	56 // If this looks like Node.js, call process.nextTick, otherwise call

48 // setTimeout.	57 // setTimeout.

49 if (typeof process != "undefined")	58 if (typeof process != "undefined")

50 process.nextTick(call);	59 process.nextTick(call);

51 else	60 else

52 setTimeout(call, 0);	61 setTimeout(call, 0);

53 });	62 });

54 }	63 }

55	64

56 function async(funcs)	65 function async(callees, mapFunction)

57 {	66 {

58 if (!Array.isArray(funcs))	67 if (!(Symbol.iterator in callees))

59 funcs = Array.from(arguments);	68 callees = [callees];

60	69

61 let lastPause = Date.now();	70 let lastPause = Date.now();

62	71 let index = 0;

63 return funcs.reduce((promise, next) => promise.then(() =>	72

64 {	73 let promise = Promise.resolve();

65 // If it has been 100ms or longer since the last call, take a pause. This	74

66 // keeps the browser from freezing up.	75 for (let next of callees)

67 let now = Date.now();	76 {

68 if (now - lastPause >= 100)	77 let currentIndex = index;

69 {	78

70 lastPause = now;	79 promise = promise.then(() =>

71 return callLater(next);	80 {

72 }	81 if (mapFunction)

73	82 next = mapFunction(next, currentIndex);

74 return next();	83

75 }),	84 // If it has been 100ms or longer since the last call, take a pause. This

76 Promise.resolve());	85 // keeps the browser from freezing up.

	86 let now = Date.now();

	87 if (now - lastPause >= 100)

	88 {

	89 lastPause = now;

	90 return callLater(next);

	91 }

	92

	93 return next();

	94 });

	95

	96 index++;

	97 }

	98

	99 return promise;

77 }	100 }

78	101

79 function parseDomains(domains, included, excluded)	102 function parseDomains(domains, included, excluded)

80 {	103 {

81 for (let domain in domains)	104 for (let domain in domains)

82 {	105 {

83 if (domain != "")	106 if (domain != "")

84 {	107 {

85 let enabled = domains[domain];	108 let enabled = domains[domain];

86 domain = punycode.toASCII(domain.toLowerCase());	109 domain = punycode.toASCII(domain.toLowerCase());

87	110

88 if (!enabled)	111 if (!enabled)

89 excluded.push(domain);	112 excluded.push(domain);

90 else if (!domains[""])	113 else if (!domains[""])

91 included.push(domain);	114 included.push(domain);

92 }	115 }

93 }	116 }

94 }	117 }

95	118

96 function escapeRegExp(s)	119 function escapeRegExp(s)

97 {	120 {

98 return s.replace(/[.*+?^${}()\|[\]\\]/g, "\\$&");	121 return s.replace(/[.*+?^${}()\|[\]\\]/g, "\\$&");

99 }	122 }

100	123

101 function matchDomain(domain)	124 function matchDomain(domain)

102 {	125 {

	126 if (!domain)

	127 return "^https?://";

	128

103 return "^https?://([^/:]*\\.)?" + escapeRegExp(domain).toLowerCase() + "[/:]";	129 return "^https?://([^/:]*\\.)?" + escapeRegExp(domain).toLowerCase() + "[/:]";

	130 }

	131

	132 function getURLSchemes(contentType)

	133 {

	134 // If the given content type includes all supported URL schemes, simply

	135 // return a single generic URL scheme pattern. This minimizes the size of the

	136 // generated rule set. The downside to this is that it will also match

	137 // schemes that we do not want to match (e.g. "ftp://"), but this can be

	138 // mitigated by adding exceptions for those schemes.

	139 if (contentType & typeMap.WEBSOCKET && contentType & typeMap.WEBRTC &&

	140 contentType & httpRequestTypes)

	141 return ["[^:]+:(//)?"];

	142

	143 let urlSchemes = [];

	144

	145 if (contentType & typeMap.WEBSOCKET)

	146 urlSchemes.push("wss?://");

	147

	148 if (contentType & typeMap.WEBRTC)

	149 urlSchemes.push("stuns?:", "turns?:");

	150

	151 if (contentType & httpRequestTypes)

	152 urlSchemes.push("https?://");

	153

	154 return urlSchemes;

	155 }

	156

	157 function findSubdomainsInList(domain, list)

	158 {

	159 let subdomains = [];

	160 let suffixLength = domain.length + 1;

	161

	162 for (let name of list)

	163 {

	164 if (name.length > suffixLength && name.slice(-suffixLength) == "." + domain)

	165 subdomains.push(name.slice(0, -suffixLength));

	166 }

	167

	168 return subdomains;

	169 }

	170

	171 function extractFilterDomains(filters)

	172 {

	173 let domains = new Set();

	174 for (let filter of filters)

	175 {

	176 let parsed = parseFilterRegexpSource(filter.regexpSource);

	177 if (parsed.justHostname)

	178 domains.add(parsed.hostname);

	179 }

	180 return domains;

104 }	181 }

105	182

106 function convertElemHideFilter(filter, elemhideSelectorExceptions)	183 function convertElemHideFilter(filter, elemhideSelectorExceptions)

107 {	184 {

108 let included = [];	185 let included = [];

109 let excluded = [];	186 let excluded = [];

110 let rules = [];

111	187

112 parseDomains(filter.domains, included, excluded);	188 parseDomains(filter.domains, included, excluded);

113	189

114 if (excluded.length == 0 && !(filter.selector in elemhideSelectorExceptions))	190 if (excluded.length == 0 && !(filter.selector in elemhideSelectorExceptions))

115 return {matchDomains: included.map(matchDomain), selector: filter.selector};	191 return {matchDomains: included, selector: filter.selector};

116 }	192 }

117	193

118 /**	194 /**

119 * Parse the given filter "regexpSource" string. Producing a regular expression,	195 * Parse the given filter "regexpSource" string. Producing a regular expression,

120 * extracting the hostname (if any), deciding if the regular expression is safe	196 * extracting the hostname (if any), deciding if the regular expression is safe

121 * to be converted + matched as lower case and noting if the source contains	197 * to be converted + matched as lower case and noting if the source contains

122 * anything after the hostname.)	198 * anything after the hostname.)

123 *	199 *

124 * @param {string} text regexpSource property of a filter	200 * @param {string} text regexpSource property of a filter

	201 * @param {string} urlScheme The URL scheme to use in the regular expression

125 * @returns {object} An object containing a regular expression string, a bool	202 * @returns {object} An object containing a regular expression string, a bool

126 * indicating if the filter can be safely matched as lower	203 * indicating if the filter can be safely matched as lower

127 * case, a hostname string (or undefined) and a bool	204 * case, a hostname string (or undefined) and a bool

128 * indicating if the source only contains a hostname or not:	205 * indicating if the source only contains a hostname or not:

129 * {regexp: "...",	206 * {regexp: "...",

130 * canSafelyMatchAsLowercase: true/false,	207 * canSafelyMatchAsLowercase: true/false,

131 * hostname: "...",	208 * hostname: "...",

132 * justHostname: true/false}	209 * justHostname: true/false}

133 */	210 */

134 function parseFilterRegexpSource(text)	211 function parseFilterRegexpSource(text, urlScheme)

135 {	212 {

136 let regexp = [];	213 let regexp = [];

137 let lastIndex = text.length - 1;	214

	215 // Convert the text into an array of Unicode characters.

	216 //

	217 // In the case of surrogate pairs (the smiley emoji, for example), one

	218 // Unicode code point is represented by two JavaScript characters together.

	219 // We want to iterate over Unicode code points rather than JavaScript

	220 // characters.

	221 let characters = Array.from(text);

	222

	223 let lastIndex = characters.length - 1;

138 let hostname;	224 let hostname;

139 let hostnameStart = null;	225 let hostnameStart = null;

140 let hostnameFinished = false;	226 let hostnameFinished = false;

141 let justHostname = false;	227 let justHostname = false;

142 let canSafelyMatchAsLowercase = false;	228 let canSafelyMatchAsLowercase = false;

143	229

144 for (let i = 0; i < text.length; i++)	230 if (!urlScheme)

145 {	231 urlScheme = getURLSchemes()[0];

146 let c = text[i];	232

	233 for (let i = 0; i < characters.length; i++)

	234 {

	235 let c = characters[i];

147	236

148 if (hostnameFinished)	237 if (hostnameFinished)

149 justHostname = false;	238 justHostname = false;

150	239

151 // If we're currently inside the hostname we have to be careful not to	240 // If we're currently inside the hostname we have to be careful not to

152 // escape any characters until after we have converted it to punycode.	241 // escape any characters until after we have converted it to punycode.

153 if (hostnameStart != null && !hostnameFinished)	242 if (hostnameStart != null && !hostnameFinished)

154 {	243 {

155 let endingChar = (c == "*" \|\| c == "^" \|\|	244 let endingChar = (c == "*" \|\| c == "^" \|\|

156 c == "?" \|\| c == "/" \|\| c == "\|");	245 c == "?" \|\| c == "/" \|\| c == "\|");

157 if (!endingChar && i != lastIndex)	246 if (!endingChar && i != lastIndex)

158 continue;	247 continue;

159	248

160 hostname = punycode.toASCII(	249 hostname = punycode.toASCII(

161 text.substring(hostnameStart, endingChar ? i : i + 1)	250 characters.slice(hostnameStart, endingChar ? i : i + 1).join("")

	251 .toLowerCase()

162 );	252 );

163 hostnameFinished = justHostname = true;	253 hostnameFinished = justHostname = true;

164 regexp.push(escapeRegExp(hostname));	254 regexp.push(escapeRegExp(hostname));

165 if (!endingChar)	255 if (!endingChar)

166 break;	256 break;

167 }	257 }

168	258

169 switch (c)	259 switch (c)

170 {	260 {

171 case "*":	261 case "*":

172 if (regexp.length > 0 && i < lastIndex && text[i + 1] != "*")	262 if (regexp.length > 0 && i < lastIndex && characters[i + 1] != "*")

173 regexp.push(".*");	263 regexp.push(".*");

174 break;	264 break;

175 case "^":	265 case "^":

176 if (i < lastIndex)	266 let alphabet = "a-z";

177 regexp.push(".");	267 // If justHostname is true and we've encountered a "^", it means we're

	268 // still in the hostname part of the URL. Since hostnames are always

	269 // lower case (Punycode), there's no need to include "A-Z" in the

	270 // pattern. Further, subsequent code may lower-case the entire regular

	271 // expression (if the URL contains only the hostname part), leaving us

	272 // with "a-za-z", which would be redundant.

	273 if (!justHostname)

	274 alphabet = "A-Z" + alphabet;

	275 let digits = "0-9";

	276 // Note that the "-" must appear first here in order to retain its

	277 // literal meaning within the brackets.

	278 let specialCharacters = "-_.%";

	279 let separator = "[^" + specialCharacters + alphabet + digits + "]";

	280 if (i == 0)

	281 regexp.push("^" + urlScheme + "(.*" + separator + ")?");

	282 else if (i == lastIndex)

	283 regexp.push("(" + separator + ".*)?$");

	284 else

	285 regexp.push(separator);

178 break;	286 break;

179 case "\|":	287 case "\|":

180 if (i == 0)	288 if (i == 0)

181 {	289 {

182 regexp.push("^");	290 regexp.push("^");

183 break;	291 break;

184 }	292 }

185 if (i == lastIndex)	293 if (i == lastIndex)

186 {	294 {

187 regexp.push("$");	295 regexp.push("$");

188 break;	296 break;

189 }	297 }

190 if (i == 1 && text[0] == "\|")	298 if (i == 1 && characters[0] == "\|")

191 {	299 {

192 hostnameStart = i + 1;	300 hostnameStart = i + 1;

193 canSafelyMatchAsLowercase = true;	301 canSafelyMatchAsLowercase = true;

194 regexp.push("https?://([^/]+\\.)?");	302 regexp.push(urlScheme + "([^/]+\\.)?");

195 break;	303 break;

196 }	304 }

197 regexp.push("\\\|");	305 regexp.push("\\\|");

198 break;	306 break;

199 case "/":	307 case "/":

200 if (!hostnameFinished &&	308 if (!hostnameFinished &&

201 text.charAt(i-2) == ":" && text.charAt(i-1) == "/")	309 characters[i - 2] == ":" && characters[i - 1] == "/")

202 {	310 {

203 hostnameStart = i + 1;	311 hostnameStart = i + 1;

204 canSafelyMatchAsLowercase = true;	312 canSafelyMatchAsLowercase = true;

205 }	313 }

206 regexp.push("/");	314 regexp.push("/");

207 break;	315 break;

208 case ".": case "+": case "$": case "?":	316 case ".": case "+": case "$": case "?":

209 case "{": case "}": case "(": case ")":	317 case "{": case "}": case "(": case ")":

210 case "[": case "]": case "\\":	318 case "[": case "]": case "\\":

211 regexp.push("\\", c);	319 regexp.push("\\", c);

212 break;	320 break;

213 default:	321 default:

214 if (hostnameFinished && (c >= "a" && c <= "z" \|\|	322 if (hostnameFinished && (c >= "a" && c <= "z" \|\|

215 c >= "A" && c <= "Z"))	323 c >= "A" && c <= "Z"))

216 canSafelyMatchAsLowercase = false;	324 canSafelyMatchAsLowercase = false;

217 regexp.push(c);	325 regexp.push(c == "%" ? c : encodeURI(c));

218 }	326 }

219 }	327 }

220	328

221 return {	329 return {

222 regexp: regexp.join(""),	330 regexp: regexp.join(""),

223 canSafelyMatchAsLowercase: canSafelyMatchAsLowercase,	331 canSafelyMatchAsLowercase: canSafelyMatchAsLowercase,

224 hostname: hostname,	332 hostname: hostname,

225 justHostname: justHostname	333 justHostname: justHostname

226 };	334 };

227 }	335 }

228	336

229 function getResourceTypes(filter)	337 function getResourceTypes(contentType)

230 {	338 {

231 let types = [];	339 let types = [];

232	340

233 if (filter.contentType & typeMap.IMAGE)	341 if (contentType & typeMap.IMAGE)

234 types.push("image");	342 types.push("image");

235 if (filter.contentType & typeMap.STYLESHEET)	343 if (contentType & typeMap.STYLESHEET)

236 types.push("style-sheet");	344 types.push("style-sheet");

237 if (filter.contentType & typeMap.SCRIPT)	345 if (contentType & typeMap.SCRIPT)

238 types.push("script");	346 types.push("script");

239 if (filter.contentType & typeMap.FONT)	347 if (contentType & typeMap.FONT)

240 types.push("font");	348 types.push("font");

241 if (filter.contentType & (typeMap.MEDIA \| typeMap.OBJECT))	349 if (contentType & (typeMap.MEDIA \| typeMap.OBJECT))

242 types.push("media");	350 types.push("media");

243 if (filter.contentType & typeMap.POPUP)	351 if (contentType & typeMap.POPUP)

244 types.push("popup");	352 types.push("popup");

245 if (filter.contentType & (typeMap.XMLHTTPREQUEST \|	353 if (contentType & rawRequestTypes)

246 typeMap.OBJECT_SUBREQUEST \|

247 typeMap.PING \|

248 typeMap.OTHER))

249 types.push("raw");	354 types.push("raw");

250 if (filter.contentType & typeMap.SUBDOCUMENT)	355 if (contentType & typeMap.SUBDOCUMENT)

251 types.push("document");	356 types.push("document");

252	357

253 return types;	358 return types;

254 }	359 }

255	360

256 function addDomainPrefix(domains)	361 function makeRuleCopies(trigger, action, urlSchemes)

257 {	362 {

258 let result = [];	363 let copies = [];

259	364

260 for (let domain of domains)	365 // Always make a deep copy of the rule, since rules may have to be

261 {	366 // manipulated individually at a later stage.

262 result.push(domain);	367 let stringifiedTrigger = JSON.stringify(trigger);

263	368

264 if (tldjs.getDomain(domain) == domain)	369 let filterPattern = trigger["url-filter"].substring(1);

265 result.push("www." + domain);	370 let startIndex = 0;

266 }	371

267	372 // If the URL filter already begins with the first URL scheme pattern, skip

268 return result;	373 // it.

269 }	374 if (trigger["url-filter"].startsWith("^" + urlSchemes[0]))

270	375 {

271 function convertFilterAddRules(rules, filter, action, withResourceTypes)	376 filterPattern = filterPattern.substring(urlSchemes[0].length);

272 {	377 startIndex = 1;

273 let parsed = parseFilterRegexpSource(filter.regexpSource);	378 }

	379 else

	380 {

	381 filterPattern = ".*" + filterPattern;

	382 }

	383

	384 for (let i = startIndex; i < urlSchemes.length; i++)

	385 {

	386 let copyTrigger = Object.assign(JSON.parse(stringifiedTrigger), {

	387 "url-filter": "^" + urlSchemes[i] + filterPattern

	388 });

	389 copies.push({trigger: copyTrigger, action});

	390 }

	391

	392 return copies;

	393 }

	394

	395 function excludeTopURLFromTrigger(trigger)

	396 {

	397 trigger["unless-top-url"] = [trigger["url-filter"]];

	398 if (trigger["url-filter-is-case-sensitive"])

	399 trigger["top-url-filter-is-case-sensitive"] = true;

	400 }

	401

	402 function convertFilterAddRules(rules, filter, action, withResourceTypes,

	403 exceptionDomains, contentType)

	404 {

	405 if (!contentType)

	406 contentType = filter.contentType;

	407

	408 // If WebSocket or WebRTC are given along with other options but not

	409 // including all three of WebSocket, WebRTC, and at least one HTTP raw type,

	410 // we must generate multiple rules. For example, for the filter

	411 // "foo$websocket,image", we must generate one rule with "^wss?://" and "raw"

	412 // and another rule with "^https?://" and "image". If we merge the two, we

	413 // end up blocking requests of all HTTP raw types (e.g. XMLHttpRequest)

	414 // inadvertently.

	415 if ((contentType & typeMap.WEBSOCKET && contentType != typeMap.WEBSOCKET &&

	416 !(contentType & typeMap.WEBRTC &&

	417 contentType & rawRequestTypes & httpRequestTypes)) \|\|

	418 (contentType & typeMap.WEBRTC && contentType != typeMap.WEBRTC &&

	419 !(contentType & typeMap.WEBSOCKET &&

	420 contentType & rawRequestTypes & httpRequestTypes)))

	421 {

	422 if (contentType & typeMap.WEBSOCKET)

	423 {

	424 convertFilterAddRules(rules, filter, action, withResourceTypes,

	425 exceptionDomains, typeMap.WEBSOCKET);

	426 }

	427

	428 if (contentType & typeMap.WEBRTC)

	429 {

	430 convertFilterAddRules(rules, filter, action, withResourceTypes,

	431 exceptionDomains, typeMap.WEBRTC);

	432 }

	433

	434 contentType &= ~(typeMap.WEBSOCKET \| typeMap.WEBRTC);

	435

	436 if (!contentType)

	437 return;

	438 }

	439

	440 let urlSchemes = getURLSchemes(contentType);

	441 let parsed = parseFilterRegexpSource(filter.regexpSource, urlSchemes[0]);

274	442

275 // For the special case of $document whitelisting filters with just a domain	443 // For the special case of $document whitelisting filters with just a domain

276 // we can generate an equivalent blocking rule exception using if-domain.	444 // we can generate an equivalent blocking rule exception using if-domain.

277 if (filter instanceof filterClasses.WhitelistFilter &&	445 if (filter instanceof filterClasses.WhitelistFilter &&

278 filter.contentType & typeMap.DOCUMENT &&	446 contentType & typeMap.DOCUMENT &&

279 parsed.justHostname)	447 parsed.justHostname)

280 {	448 {

281 rules.push({	449 rules.push({

282 trigger: {	450 trigger: {

283 "url-filter": ".*",	451 "url-filter": ".*",

284 "if-domain": addDomainPrefix([parsed.hostname])	452 "if-domain": ["*" + parsed.hostname]

285 },	453 },

286 action: {type: "ignore-previous-rules"}	454 action: {type: "ignore-previous-rules"}

287 });	455 });

288 // If the filter contains other supported options we'll need to generate	456 // If the filter contains other supported options we'll need to generate

289 // further rules for it, but if not we can simply return now.	457 // further rules for it, but if not we can simply return now.

290 if (!(filter.contentType & whitelistableRequestTypes))	458 if (!(contentType & whitelistableRequestTypes))

291 return;	459 return;

292 }	460 }

293	461

294 let trigger = {"url-filter": parsed.regexp};	462 let trigger = {"url-filter": parsed.regexp};

295	463

296 // Limit rules to HTTP(S) URLs	464 // If the URL filter begins with one of the URL schemes for this content

297 if (!/^(\^\|http)/i.test(trigger["url-filter"]))	465 // type, we generate additional rules for all the URL scheme patterns;

298 trigger["url-filter"] = "^https?://.*" + trigger["url-filter"];	466 // otherwise, if the start of the URL filter literally matches the first URL

	467 // scheme pattern, we just generate additional rules for the remaining URL

	468 // scheme patterns.

	469 //

	470 // For example, "stun:foo$webrtc" will give us "stun:foo", then we add a "^"

	471 // in front of this and generate two additional rules for

	472 // "^stuns?:.stun:foo" and "^turns?:.stun:foo". On the other hand,

	473 // "\|\|foo$webrtc" will give us "^stuns?:([^/]+\\.)?foo", so we just generate

	474 // "^turns?:([^/]+\\.)?foo" in addition.

	475 //

	476 // Note that the filter can be already anchored to the beginning

	477 // (e.g. "\|stun:foo$webrtc"), in which case we do not generate any additional

	478 // rules.

	479 let needAltRules = trigger["url-filter"][0] != "^" \|\|

	480 trigger["url-filter"].startsWith("^" + urlSchemes[0]);

	481

	482 if (trigger["url-filter"][0] != "^")

	483 {

	484 if (!urlSchemes.some(scheme => new RegExp("^" + scheme)

	485 .test(trigger["url-filter"])))

	486 {

	487 trigger["url-filter"] = urlSchemes[0] + ".*" + trigger["url-filter"];

	488 }

	489

	490 trigger["url-filter"] = "^" + trigger["url-filter"];

	491 }

299	492

300 // For rules containing only a hostname we know that we're matching against	493 // For rules containing only a hostname we know that we're matching against

301 // a lowercase string unless the matchCase option was passed.	494 // a lowercase string unless the matchCase option was passed.

302 if (parsed.canSafelyMatchAsLowercase && !filter.matchCase)	495 if (parsed.canSafelyMatchAsLowercase && !filter.matchCase)

303 trigger["url-filter"] = trigger["url-filter"].toLowerCase();	496 trigger["url-filter"] = trigger["url-filter"].toLowerCase();

304	497

305 if (parsed.canSafelyMatchAsLowercase \|\| filter.matchCase)	498 if (parsed.canSafelyMatchAsLowercase \|\| filter.matchCase)

306 trigger["url-filter-is-case-sensitive"] = true;	499 trigger["url-filter-is-case-sensitive"] = true;

307	500

308 let included = [];	501 let included = [];

309 let excluded = [];	502 let excluded = [];

310	503

311 parseDomains(filter.domains, included, excluded);	504 parseDomains(filter.domains, included, excluded);

312	505

	506 if (exceptionDomains)

	507 excluded = excluded.concat(exceptionDomains);

	508

313 if (withResourceTypes)	509 if (withResourceTypes)

314 {	510 {

315 trigger["resource-type"] = getResourceTypes(filter);	511 let resourceTypes = getResourceTypes(contentType);

316	512

317 if (trigger["resource-type"].length == 0)	513 // Content blocker rules can't differentiate between sub-document requests

	514 // (iframes) and top-level document requests. To avoid too many false

	515 // positives, we prevent rules with no hostname part from blocking document

	516 // requests.

	517 //

	518 // Once Safari 11 becomes our minimum supported version, we could change

	519 // our approach here to use the new "unless-top-url" property instead.

	520 if (filter instanceof filterClasses.BlockingFilter && !parsed.hostname)

	521 resourceTypes = resourceTypes.filter(type => type != "document");

	522

	523 if (resourceTypes.length == 0)

318 return;	524 return;

	525

	526 trigger["resource-type"] = resourceTypes;

319 }	527 }

320	528

321 if (filter.thirdParty != null)	529 if (filter.thirdParty != null)

322 trigger["load-type"] = [filter.thirdParty ? "third-party" : "first-party"];	530 trigger["load-type"] = [filter.thirdParty ? "third-party" : "first-party"];

323	531

	532 let addTopLevelException = false;

	533

324 if (included.length > 0)	534 if (included.length > 0)

325 trigger["if-domain"] = addDomainPrefix(included);	535 {

	536 trigger["if-domain"] = [];

	537

	538 for (let name of included)

	539 {

	540 // If this is a blocking filter or an element hiding filter, add the

	541 // subdomain wildcard only if no subdomains have been excluded.

	542 let notSubdomains = null;

	543 if ((filter instanceof filterClasses.BlockingFilter \|\|

	544 filter instanceof filterClasses.ElemHideFilter) &&

	545 (notSubdomains = findSubdomainsInList(name, excluded)).length > 0)

	546 {

	547 trigger["if-domain"].push(name);

	548

	549 // Add the "www" prefix but only if it hasn't been excluded.

	550 if (!notSubdomains.includes("www"))

	551 trigger["if-domain"].push("www." + name);

	552 }

	553 else

	554 {

	555 trigger["if-domain"].push("*" + name);

	556 }

	557 }

	558 }

326 else if (excluded.length > 0)	559 else if (excluded.length > 0)

327 trigger["unless-domain"] = addDomainPrefix(excluded);	560 {

	561 trigger["unless-domain"] = excluded.map(name => "*" + name);

	562 }

	563 else if (filter instanceof filterClasses.BlockingFilter &&

	564 filter.contentType & typeMap.SUBDOCUMENT && parsed.hostname)

	565 {

	566 // Rules with a hostname part are still allowed to block document requests,

	567 // but we add an exception for top-level documents.

	568 //

	569 // Note that we can only do this if there's no "unless-domain" property for

	570 // now. This also only works in Safari 11 onwards, while older versions

	571 // simply ignore this property. Once Safari 11 becomes our minimum

	572 // supported version, we can merge "unless-domain" into "unless-top-url".

	573 addTopLevelException = true;

	574 excludeTopURLFromTrigger(trigger);

	575 }

328	576

329 rules.push({trigger: trigger, action: {type: action}});	577 rules.push({trigger: trigger, action: {type: action}});

330 }	578

331	579 if (needAltRules)

332 function hasNonASCI(obj)	580 {

333 {	581 // Generate additional rules for any alternative URL schemes.

334 if (typeof obj == "string")	582 for (let altRule of makeRuleCopies(trigger, {type: action}, urlSchemes))

335 {	583 {

336 if (/[^\x00-\x7F]/.test(obj))	584 if (addTopLevelException)

337 return true;	585 excludeTopURLFromTrigger(altRule.trigger);

338 }	586

339	587 rules.push(altRule);

340 if (typeof obj == "object")	588 }

341 {	589 }

342 if (obj instanceof Array)

343 for (let item of obj)

344 if (hasNonASCI(item))

345 return true;

346

347 let names = Object.getOwnPropertyNames(obj);

348 for (let name of names)

349 if (hasNonASCI(obj[name]))

350 return true;

351 }

352

353 return false;

354 }	590 }

355	591

356 function convertIDSelectorsToAttributeSelectors(selector)	592 function convertIDSelectorsToAttributeSelectors(selector)

357 {	593 {

358 // First we figure out where all the IDs are	594 // First we figure out where all the IDs are

359 let sep = "";	595 let sep = "";

360 let start = null;	596 let start = null;

361 let positions = [];	597 let positions = [];

362 for (let i = 0; i < selector.length; i++)	598 for (let i = 0; i < selector.length; i++)

363 {	599 {

(...skipping 33 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
397 {	633 {

398 newSelector.push(selector.substring(i, pos.start));	634 newSelector.push(selector.substring(i, pos.start));

399 newSelector.push('[id=', selector.substring(pos.start + 1, pos.end), ']');	635 newSelector.push('[id=', selector.substring(pos.start + 1, pos.end), ']');

400 i = pos.end;	636 i = pos.end;

401 }	637 }

402 newSelector.push(selector.substring(i));	638 newSelector.push(selector.substring(i));

403	639

404 return newSelector.join("");	640 return newSelector.join("");

405 }	641 }

406	642

	643 function addCSSRules(rules, selectors, domain, exceptionDomains)

	644 {

	645 let unlessDomain = exceptionDomains.size > 0 ? [] : null;

	646

	647 exceptionDomains.forEach(name =>

	648 {

	649 // For domain-specific filters, include the exception domains only if

	650 // they're subdomains of the given domain.

	651 if (!domain \|\| name.substr(-domain.length - 1) == "." + domain)

	652 unlessDomain.push("*" + name);

	653 });

	654

	655 while (selectors.length)

	656 {

	657 let selector = selectors.splice(0, selectorLimit).join(", ");

	658

	659 // As of Safari 9.0 element IDs are matched as lowercase. We work around

	660 // this by converting to the attribute format [id="elementID"]

	661 selector = convertIDSelectorsToAttributeSelectors(selector);

	662

	663 let rule = {

	664 trigger: {"url-filter": matchDomain(domain),

	665 "url-filter-is-case-sensitive": true},

	666 action: {type: "css-display-none",

	667 selector: selector}

	668 };

	669

	670 if (unlessDomain)

	671 rule.trigger["unless-domain"] = unlessDomain;

	672

	673 rules.push(rule);

	674 }

	675 }

	676

407 /**	677 /**

408 * Check if two strings are a close match	678 * Check if two strings are a close match

409 *	679 *

410 * This function returns an edit operation, one of "substitute", "delete", and	680 * This function returns an edit operation, one of "substitute", "delete", and

411 * "insert", along with an index in the source string where the edit must occur	681 * "insert", along with an index in the source string where the edit must occur

412 * in order to arrive at the target string. If the strings are not a close	682 * in order to arrive at the target string. If the strings are not a close

413 * match, it returns null.	683 * match, it returns null.

414 *	684 *

415 * Two strings are considered to be a close match if they are one edit	685 * Two strings are considered to be a close match if they are one edit

416 * operation apart.	686 * operation apart.

(...skipping 38 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
455 // calculation.	725 // calculation.

456 if (diff < 0)	726 if (diff < 0)

457 {	727 {

458 let tmp = s;	728 let tmp = s;

459 s = t;	729 s = t;

460 t = tmp;	730 t = tmp;

461 }	731 }

462	732

463 let edit = null;	733 let edit = null;

464	734

465 let i = 0, j = 0;	735 let i = 0;
kzar 2017/05/29 09:49:54 Nit: Please split into separate lets. Also I wond Nit: Please split into separate lets. Also I wonder if we should leave them as undefined here and assign them in the initialisation bit of the for loops? (Especially with j it seems kind of misleading to set it to 0 here.) Manish Jethani 2017/05/31 06:43:06 Done. Show quoted text On 2017/05/29 09:49:54, kzar wrote: > Nit: Please split into separate lets. Done. Show quoted text > Also I wonder if we should leave them as undefined here and assign them in the > initialisation bit of the for loops? (Especially with j it seems kind of > misleading to set it to 0 here.) I feel like since the variables are in the outer scope it's actually clearer to initialize them here. Also j does need to start at 0 (see next comment).
	736 let j = 0;

466	737

467 // Start from the beginning and keep going until we hit a character that	738 // Start from the beginning and keep going until we hit a character that

468 // doesn't match.	739 // doesn't match.

469 for (; i < s.length; i++)	740 for (; i < s.length; i++)

470 {	741 {

471 if (s[i] != t[i])	742 if (s[i] != t[i])

472 break;	743 break;

473 }	744 }

474	745

475 // Now do exactly the same from the end, but also stop if we reach the	746 // Now do exactly the same from the end, but also stop if we reach the

476 // position where we terminated the previous loop.	747 // position where we terminated the previous loop.

477 for (; j < t.length; j++)	748 for (; j < t.length; j++)
kzar 2017/05/29 09:49:53 Since j is working backwards through the character Since j is working backwards through the characters wouldn't it be clearer to start it as t.length -1 and decrement it down to 0? I think then you wouldn't need to do `length - j` so often later on as well? Manish Jethani 2017/05/31 06:43:06 Then we would need two variables, one for s.length Show quoted text On 2017/05/29 09:49:53, kzar wrote: > Since j is working backwards through the characters wouldn't it be clearer to > start it as t.length -1 and decrement it down to 0? I think then you wouldn't > need to do `length - j` so often later on as well? Then we would need two variables, one for s.length - j and one for t.length - j. j is basically the offset from the end of both the strings.
478 {	749 {

479 if (t.length - j == i \|\| s[s.length - j - 1] != t[t.length - j - 1])	750 if (t.length - j == i \|\| s[s.length - j - 1] != t[t.length - j - 1])

480 break;	751 break;

481 }	752 }

482	753

483 if (diff == 0)	754 if (diff == 0)

484 {	755 {

485 // If the strings are equal in length and the delta isn't exactly one	756 // If the strings are equal in length and the delta isn't exactly one

486 // character, it's not a close match.	757 // character, it's not a close match.

487 if (t.length - j - i != 1)	758 if (t.length - j - i != 1)
kzar 2017/05/29 09:49:53 Probably a dumb question but wouldn't this also co Probably a dumb question but wouldn't this also consider identical strings to not be a close match? Manish Jethani 2017/05/31 06:43:07 Yes, identical strings are not a close match by de Show quoted text On 2017/05/29 09:49:53, kzar wrote: > Probably a dumb question but wouldn't this also consider identical strings to > not be a close match? Yes, identical strings are not a close match by definition, since there's no edit operation. It's debatable whether this should be the case, but anyway it doesn't matter since identical URL filters will be filtered out in the previous step when we eliminate redundant rules.
488 return null;	759 return null;

489 }	760 }

490 else if (i != t.length - j)	761 else if (i != t.length - j)

491 {	762 {

492 // For strings of unequal length, if we haven't found a match for every	763 // For strings of unequal length, if we haven't found a match for every

493 // single character in the shorter string counting from both the beginning	764 // single character in the shorter string counting from both the beginning

494 // and the end, it's not a close match.	765 // and the end, it's not a close match.

495 return null;	766 return null;

496 }	767 }

497	768

(...skipping 25 matching lines...) Expand all Loading...
523 edit.endIndex = s.length - j;	794 edit.endIndex = s.length - j;

524 }	795 }

525	796

526 return edit;	797 return edit;

527 }	798 }

528	799

529 function eliminateRedundantRulesByURLFilter(rulesInfo, exhaustive)	800 function eliminateRedundantRulesByURLFilter(rulesInfo, exhaustive)

530 {	801 {

531 const heuristicRange = 1000;	802 const heuristicRange = 1000;

532	803

	804 let ol = rulesInfo.length;

	805

533 // Throw out obviously redundant rules.	806 // Throw out obviously redundant rules.

534 return async(rulesInfo.map((ruleInfo, index) => () =>	807 return async(rulesInfo, (ruleInfo, index) => () =>

535 {	808 {

536 // If this rule is already marked as redundant, don't bother comparing it	809 // If this rule is already marked as redundant, don't bother comparing it

537 // with other rules.	810 // with other rules.

538 if (rulesInfo[index].redundant)	811 if (rulesInfo[index].redundant)

539 return;	812 return;

540	813

541 let limit = exhaustive ? rulesInfo.length :	814 let limit = exhaustive ? rulesInfo.length :

542 Math.min(index + heuristicRange, rulesInfo.length);	815 Math.min(index + heuristicRange, rulesInfo.length);

543	816

544 for (let i = index, j = i + 1; j < limit; j++)	817 for (let i = index, j = i + 1; j < limit; j++)

(...skipping 12 matching lines...) Expand all Loading...
557 {	830 {

558 rulesInfo[i].redundant = true;	831 rulesInfo[i].redundant = true;

559 break;	832 break;

560 }	833 }

561 }	834 }

562 else if (target.substring(0, source.length) == source)	835 else if (target.substring(0, source.length) == source)

563 {	836 {

564 rulesInfo[j].redundant = true;	837 rulesInfo[j].redundant = true;

565 }	838 }

566 }	839 }

567 }))	840 })

568 .then(() => rulesInfo.filter(ruleInfo => !ruleInfo.redundant));	841 .then(() => rulesInfo.filter(ruleInfo => !ruleInfo.redundant));

569 }	842 }

570	843

571 function findMatchesForRuleByURLFilter(rulesInfo, index, exhaustive)	844 function findMatchesForRuleByURLFilter(rulesInfo, index, exhaustive)

572 {	845 {

573 // Closely matching rules are likely to be within a certain range. We only	846 // Closely matching rules are likely to be within a certain range. We only

574 // look for matches within this range by default. If we increase this value,	847 // look for matches within this range by default. If we increase this value,

575 // it can give us more matches and a smaller resulting rule set, but possibly	848 // it can give us more matches and a smaller resulting rule set, but possibly

576 // at a significant performance cost.	849 // at a significant performance cost.

577 //	850 //

(...skipping 141 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
719 if (best.length > 0)	992 if (best.length > 0)

720 {	993 {

721 let urlFilter = rule.trigger["url-filter"];	994 let urlFilter = rule.trigger["url-filter"];

722	995

723 let editIndex = best[0].edit.index;	996 let editIndex = best[0].edit.index;

724	997

725 if (!multiEdit)	998 if (!multiEdit)

726 {	999 {

727 // Merge all the matching rules into this one.	1000 // Merge all the matching rules into this one.

728	1001

729 let characters = [];	1002 let characters = [urlFilter[editIndex]];

730 let quantifier = "";	1003 let quantifier = "";

731	1004

732 for (let match of best)	1005 for (let match of best)

733 {	1006 {

734 if (match.edit.type == "delete")	1007 if (match.edit.type == "delete")

735 {	1008 {

736 quantifier = "?";	1009 quantifier = "?";

737 }	1010 }

738 else	1011 else

739 {	1012 {

740 let character = rulesInfo[match.index].rule	1013 let character = rulesInfo[match.index].rule

741 .trigger["url-filter"][editIndex];	1014 .trigger["url-filter"][editIndex];

742 characters.push(character);	1015

	1016 // Insert any hyphen at the beginning so it gets interpreted as a

	1017 // literal hyphen.

	1018 if (character == "-")

	1019 characters.unshift(character);

	1020 else

	1021 characters.push(character);

743 }	1022 }

744	1023

745 // Mark the target rule as merged so other rules don't try to merge	1024 // Mark the target rule as merged so other rules don't try to merge

746 // it again.	1025 // it again.

747 rulesInfo[match.index].merged = true;	1026 rulesInfo[match.index].merged = true;

748 }	1027 }

749	1028

750 urlFilter = urlFilter.substring(0, editIndex + 1) + quantifier +	1029 urlFilter = urlFilter.substring(0, editIndex + 1) + quantifier +

751 urlFilter.substring(editIndex + 1);	1030 urlFilter.substring(editIndex + 1);

752 if (characters.length > 0)	1031 if (characters.length > 1)

753 {	1032 {

754 urlFilter = urlFilter.substring(0, editIndex) + "[" +	1033 urlFilter = urlFilter.substring(0, editIndex) + "[" +

755 urlFilter[editIndex] + characters.join("") + "]" +	1034 characters.join("") + "]" +

756 urlFilter.substring(editIndex + 1);	1035 urlFilter.substring(editIndex + 1);

757 }	1036 }

758 }	1037 }

759 else	1038 else

760 {	1039 {

761 let editEndIndex = best[0].edit.endIndex;	1040 let editEndIndex = best[0].edit.endIndex;

762	1041

763 // Mark the target rule as merged so other rules don't try to merge it	1042 // Mark the target rule as merged so other rules don't try to merge it

764 // again.	1043 // again.

765 rulesInfo[best[0].index].merged = true;	1044 rulesInfo[best[0].index].merged = true;

766	1045

767 urlFilter = urlFilter.substring(0, editIndex) + "(" +	1046 urlFilter = urlFilter.substring(0, editIndex) + "(" +

768 urlFilter.substring(editIndex, editEndIndex) + ")?" +	1047 urlFilter.substring(editIndex, editEndIndex) + ")?" +

769 urlFilter.substring(editEndIndex);	1048 urlFilter.substring(editEndIndex);

770 }	1049 }

771	1050

772 rule.trigger["url-filter"] = urlFilter;	1051 rule.trigger["url-filter"] = urlFilter;

773	1052

774 // Mark this rule as one that has had other rules merged into it.	1053 // Mark this rule as one that has had other rules merged into it.

775 ruleInfo.mergedInto = true;	1054 ruleInfo.mergedInto = true;

776 }	1055 }

777 }	1056 }

778 }	1057 }

779	1058

780 function mergeRulesByURLFilter(rulesInfo, exhaustive)	1059 function mergeRulesByURLFilter(rulesInfo, exhaustive)

781 {	1060 {

782 return async(rulesInfo.map((ruleInfo, index) => () =>	1061 return async(rulesInfo, (ruleInfo, index) => () =>

783 findMatchesForRuleByURLFilter(rulesInfo, index, exhaustive)	1062 findMatchesForRuleByURLFilter(rulesInfo, index, exhaustive)

784 ))	1063 )

785 .then(() => mergeCandidateRulesByURLFilter(rulesInfo));	1064 .then(() => mergeCandidateRulesByURLFilter(rulesInfo));

786 }	1065 }

787	1066

788 function mergeRulesByArrayProperty(rulesInfo, propertyType, property)	1067 function mergeRulesByArrayProperty(rulesInfo, propertyType, property)

789 {	1068 {

790 if (rulesInfo.length <= 1)	1069 if (rulesInfo.length <= 1)

791 return;	1070 return;

792	1071

793 let oneRuleInfo = rulesInfo.shift();	1072 let valueSet = new Set(rulesInfo[0].rule[propertyType][property]);

794 let valueSet = new Set(oneRuleInfo.rule[propertyType][property]);	1073

795	1074 for (let i = 1; i < rulesInfo.length; i++)

796 for (let ruleInfo of rulesInfo)	1075 {

797 {	1076 for (let value of rulesInfo[i].rule[propertyType][property] \|\| [])

798 if (ruleInfo.rule[propertyType][property])	1077 valueSet.add(value);

799 {	1078

800 for (let value of ruleInfo.rule[propertyType][property])	1079 rulesInfo[i].merged = true;

801 valueSet.add(value);

802 }

803

804 ruleInfo.merged = true;

805 }	1080 }

806	1081

807 if (valueSet.size > 0)	1082 if (valueSet.size > 0)

808 oneRuleInfo.rule[propertyType][property] = Array.from(valueSet);	1083 rulesInfo[0].rule[propertyType][property] = Array.from(valueSet);

809	1084

810 oneRuleInfo.mergedInto = true;	1085 rulesInfo[0].mergedInto = true;

811 }	1086 }

812	1087

813 function groupRulesByMergeableProperty(rulesInfo, propertyType, property)	1088 function groupRulesByMergeableProperty(rulesInfo, propertyType, property)

814 {	1089 {

815 let mergeableRulesInfoByGroup = new Map();	1090 let mergeableRulesInfoByGroup = new Map();

816	1091

817 for (let ruleInfo of rulesInfo)	1092 for (let ruleInfo of rulesInfo)

818 {	1093 {

819 let copy = {	1094 let copy = {

820 trigger: Object.assign({}, ruleInfo.rule.trigger),	1095 trigger: Object.assign({}, ruleInfo.rule.trigger),

(...skipping 17 matching lines...) Expand all Loading...
838	1113

839 function mergeRules(rules, exhaustive)	1114 function mergeRules(rules, exhaustive)

840 {	1115 {

841 let rulesInfo = rules.map(rule => ({rule}));	1116 let rulesInfo = rules.map(rule => ({rule}));

842	1117

843 let arrayPropertiesToMergeBy = ["resource-type", "if-domain"];	1118 let arrayPropertiesToMergeBy = ["resource-type", "if-domain"];

844	1119

845 return async(() =>	1120 return async(() =>

846 {	1121 {

847 let map = groupRulesByMergeableProperty(rulesInfo, "trigger", "url-filter");	1122 let map = groupRulesByMergeableProperty(rulesInfo, "trigger", "url-filter");

848 return async(Array.from(map.values()).map(mergeableRulesInfo => () =>	1123 return async(map.values(), mergeableRulesInfo => () =>

849 eliminateRedundantRulesByURLFilter(mergeableRulesInfo, exhaustive)	1124 eliminateRedundantRulesByURLFilter(mergeableRulesInfo, exhaustive)

850 .then(rulesInfo => mergeRulesByURLFilter(rulesInfo, exhaustive))	1125 .then(rulesInfo => mergeRulesByURLFilter(rulesInfo, exhaustive))

851 ))	1126 )

852 .then(() =>	1127 .then(() =>

853 {	1128 {

854 // Filter out rules that are redundant or have been merged into other	1129 // Filter out rules that are redundant or have been merged into other

855 // rules.	1130 // rules.

856 rulesInfo = rulesInfo.filter(ruleInfo => !ruleInfo.redundant &&	1131 rulesInfo = rulesInfo.filter(ruleInfo => !ruleInfo.redundant &&

857 !ruleInfo.merged);	1132 !ruleInfo.merged);

858 });	1133 });

859 })	1134 })

860 .then(() => async(arrayPropertiesToMergeBy.map(arrayProperty => () =>	1135 .then(() => async(arrayPropertiesToMergeBy, arrayProperty => () =>

861 {	1136 {

862 let map = groupRulesByMergeableProperty(rulesInfo, "trigger",	1137 let map = groupRulesByMergeableProperty(rulesInfo, "trigger",

863 arrayProperty);	1138 arrayProperty);

864 return async(Array.from(map.values()).map(mergeableRulesInfo => () =>	1139 return async(map.values(), mergeableRulesInfo => () =>

865 mergeRulesByArrayProperty(mergeableRulesInfo, "trigger", arrayProperty)	1140 mergeRulesByArrayProperty(mergeableRulesInfo, "trigger", arrayProperty)

866 ))	1141 )

867 .then(() =>	1142 .then(() =>

868 {	1143 {

869 rulesInfo = rulesInfo.filter(ruleInfo => !ruleInfo.merged);	1144 rulesInfo = rulesInfo.filter(ruleInfo => !ruleInfo.merged);

870 });	1145 });

871 })))	1146 }))

872 .then(() => rulesInfo.map(ruleInfo => ruleInfo.rule));	1147 .then(() => rulesInfo.map(ruleInfo => ruleInfo.rule));

873 }	1148 }

874	1149

875 let ContentBlockerList =	1150 let ContentBlockerList =

876 /**	1151 /**

877 * Create a new Adblock Plus filter to content blocker list converter	1152 * Create a new Adblock Plus filter to content blocker list converter

878 *	1153 *

879 * @param {object} options Options for content blocker list generation	1154 * @param {object} options Options for content blocker list generation

880 *	1155 *

881 * @constructor	1156 * @constructor

882 */	1157 */

883 exports.ContentBlockerList = function(options)	1158 exports.ContentBlockerList = function (options)

884 {	1159 {

885 const defaultOptions = {	1160 const defaultOptions = {

886 merge: "auto"	1161 merge: "auto"

887 };	1162 };

888	1163

889 this.options = Object.assign({}, defaultOptions, options);	1164 this.options = Object.assign({}, defaultOptions, options);

890	1165

891 this.requestFilters = [];	1166 this.requestFilters = [];

892 this.requestExceptions = [];	1167 this.requestExceptions = [];

893 this.elemhideFilters = [];	1168 this.elemhideFilters = [];

894 this.elemhideExceptions = [];	1169 this.elemhideExceptions = [];

	1170 this.genericblockExceptions = [];

	1171 this.generichideExceptions = [];

895 this.elemhideSelectorExceptions = new Map();	1172 this.elemhideSelectorExceptions = new Map();

896 };	1173 };

897	1174

898 /**	1175 /**

899 * Add Adblock Plus filter to be converted	1176 * Add Adblock Plus filter to be converted

900 *	1177 *

901 * @param {Filter} filter Filter to convert	1178 * @param {Filter} filter Filter to convert

902 */	1179 */

903 ContentBlockerList.prototype.addFilter = function(filter)	1180 ContentBlockerList.prototype.addFilter = function(filter)

904 {	1181 {

905 if (filter.sitekeys)	1182 if (filter.sitekeys)

906 return;	1183 return;

907 if (filter instanceof filterClasses.RegExpFilter &&	1184 if (filter instanceof filterClasses.RegExpFilter &&

908 filter.regexpSource == null)	1185 filter.regexpSource == null)

909 return;	1186 return;

910	1187

911 if (filter instanceof filterClasses.BlockingFilter)	1188 if (filter instanceof filterClasses.BlockingFilter)

912 this.requestFilters.push(filter);	1189 this.requestFilters.push(filter);

913	1190

914 if (filter instanceof filterClasses.WhitelistFilter)	1191 if (filter instanceof filterClasses.WhitelistFilter)

915 {	1192 {

916 if (filter.contentType & (typeMap.DOCUMENT \| whitelistableRequestTypes))	1193 if (filter.contentType & (typeMap.DOCUMENT \| whitelistableRequestTypes))

917 this.requestExceptions.push(filter);	1194 this.requestExceptions.push(filter);

918	1195

919 if (filter.contentType & typeMap.ELEMHIDE)	1196 if (filter.contentType & typeMap.GENERICBLOCK)

920 this.elemhideExceptions.push(filter);	1197 this.genericblockExceptions.push(filter);

	1198

	1199 if (filter.contentType & typeMap.ELEMHIDE)

	1200 this.elemhideExceptions.push(filter);

	1201 else if (filter.contentType & typeMap.GENERICHIDE)

	1202 this.generichideExceptions.push(filter);

921 }	1203 }

922	1204

923 if (filter instanceof filterClasses.ElemHideFilter)	1205 if (filter instanceof filterClasses.ElemHideFilter)

924 this.elemhideFilters.push(filter);	1206 this.elemhideFilters.push(filter);

925	1207

926 if (filter instanceof filterClasses.ElemHideException)	1208 if (filter instanceof filterClasses.ElemHideException)

927 {	1209 {

928 let domains = this.elemhideSelectorExceptions[filter.selector];	1210 let domains = this.elemhideSelectorExceptions[filter.selector];

929 if (!domains)	1211 if (!domains)

930 domains = this.elemhideSelectorExceptions[filter.selector] = [];	1212 domains = this.elemhideSelectorExceptions[filter.selector] = [];

931	1213

932 parseDomains(filter.domains, domains, []);	1214 parseDomains(filter.domains, domains, []);

933 }	1215 }

934 };	1216 };

935	1217

936 /**	1218 /**

937 * Generate content blocker list for all filters that were added	1219 * Generate content blocker list for all filters that were added

938 */	1220 */

939 ContentBlockerList.prototype.generateRules = function()	1221 ContentBlockerList.prototype.generateRules = function()

940 {	1222 {

941 let cssRules = [];	1223 let cssRules = [];

942 let cssExceptionRules = [];	1224 let cssExceptionRules = [];

943 let blockingRules = [];	1225 let blockingRules = [];

944 let blockingExceptionRules = [];	1226 let blockingExceptionRules = [];

945	1227

946 let ruleGroups = [cssRules, cssExceptionRules,	1228 let ruleGroups = [cssRules, cssExceptionRules,

947 blockingRules, blockingExceptionRules];	1229 blockingRules, blockingExceptionRules];

948	1230

	1231 let genericSelectors = [];

949 let groupedElemhideFilters = new Map();	1232 let groupedElemhideFilters = new Map();

	1233

950 for (let filter of this.elemhideFilters)	1234 for (let filter of this.elemhideFilters)

951 {	1235 {

952 let result = convertElemHideFilter(filter, this.elemhideSelectorExceptions);	1236 let result = convertElemHideFilter(filter, this.elemhideSelectorExceptions);

953 if (!result)	1237 if (!result)

954 continue;	1238 continue;

955	1239

956 if (result.matchDomains.length == 0)	1240 if (result.matchDomains.length == 0)

957 result.matchDomains = ["^https?://"];	1241 {

958	1242 genericSelectors.push(result.selector);

959 for (let matchDomain of result.matchDomains)	1243 }

960 {	1244 else

961 let group = groupedElemhideFilters.get(matchDomain) \|\| [];	1245 {

962 group.push(result.selector);	1246 for (let matchDomain of result.matchDomains)

963 groupedElemhideFilters.set(matchDomain, group);	1247 {

964 }	1248 let group = groupedElemhideFilters.get(matchDomain) \|\| [];

965 }	1249 group.push(result.selector);

	1250 groupedElemhideFilters.set(matchDomain, group);

	1251 }

	1252 }

	1253 }

	1254

	1255 // Separate out the element hiding exceptions that have only a hostname part

	1256 // from the rest. This allows us to implement a workaround for issue #5345

	1257 // (WebKit bug #167423), but as a bonus it also reduces the number of

	1258 // generated rules. The downside is that the exception will only apply to the

	1259 // top-level document, not to iframes. We have to live with this until the

	1260 // WebKit bug is fixed in all supported versions of Safari.

	1261 // https://bugs.webkit.org/show_bug.cgi?id=167423

	1262 //

	1263 // Note that as a result of this workaround we end up with a huge rule set in

	1264 // terms of the amount of memory used. This can cause Node.js to throw

	1265 // "JavaScript heap out of memory". To avoid this, call Node.js with

	1266 // --max_old_space_size=4096

	1267 let elemhideExceptionDomains = extractFilterDomains(this.elemhideExceptions);

	1268

	1269 let genericSelectorExceptionDomains =

	1270 extractFilterDomains(this.generichideExceptions);

	1271 elemhideExceptionDomains.forEach(name =>

	1272 {

	1273 genericSelectorExceptionDomains.add(name);

	1274 });

	1275

	1276 addCSSRules(cssRules, genericSelectors, null,

	1277 genericSelectorExceptionDomains);

	1278

	1279 // Filter out whitelisted domains.

	1280 elemhideExceptionDomains.forEach(domain =>

	1281 groupedElemhideFilters.delete(domain));

966	1282

967 groupedElemhideFilters.forEach((selectors, matchDomain) =>	1283 groupedElemhideFilters.forEach((selectors, matchDomain) =>

968 {	1284 {

969 while (selectors.length)	1285 addCSSRules(cssRules, selectors, matchDomain, elemhideExceptionDomains);

970 {

971 let selector = selectors.splice(0, selectorLimit).join(", ");

972

973 // As of Safari 9.0 element IDs are matched as lowercase. We work around

974 // this by converting to the attribute format [id="elementID"]

975 selector = convertIDSelectorsToAttributeSelectors(selector);

976

977 cssRules.push({

978 trigger: {"url-filter": matchDomain,

979 "url-filter-is-case-sensitive": true},

980 action: {type: "css-display-none",

981 selector: selector}

982 });

983 }

984 });	1286 });

985	1287

986 for (let filter of this.elemhideExceptions)	1288 let requestFilterExceptionDomains = [];

987 {	1289 for (let filter of this.genericblockExceptions)

988 convertFilterAddRules(cssExceptionRules, filter,	1290 {

989 "ignore-previous-rules", false);	1291 let parsed = parseFilterRegexpSource(filter.regexpSource);

	1292 if (parsed.hostname)

	1293 requestFilterExceptionDomains.push(parsed.hostname);

990 }	1294 }

991	1295

992 for (let filter of this.requestFilters)	1296 for (let filter of this.requestFilters)

993 convertFilterAddRules(blockingRules, filter, "block", true);	1297 {

	1298 convertFilterAddRules(blockingRules, filter, "block", true,

	1299 requestFilterExceptionDomains);

	1300 }

994	1301

995 for (let filter of this.requestExceptions)	1302 for (let filter of this.requestExceptions)

996 {	1303 {

997 convertFilterAddRules(blockingExceptionRules, filter,	1304 convertFilterAddRules(blockingExceptionRules, filter,

998 "ignore-previous-rules", true);	1305 "ignore-previous-rules", true);

999 }	1306 }

1000	1307

1001 return async(ruleGroups.map((group, index) => () =>	1308 return async(ruleGroups, (group, index) => () =>

1002 {	1309 {

1003 let next = () =>	1310 let next = () =>

1004 {	1311 {

1005 if (index == ruleGroups.length - 1)	1312 if (index == ruleGroups.length - 1)

1006 return ruleGroups.reduce((all, rules) => all.concat(rules), []);	1313 return ruleGroups.reduce((all, rules) => all.concat(rules), []);

1007 };	1314 };

1008	1315

1009 ruleGroups[index] = ruleGroups[index].filter(rule => !hasNonASCI(rule));

1010

1011 if (this.options.merge == "all" \|\|	1316 if (this.options.merge == "all" \|\|

1012 (this.options.merge == "auto" &&	1317 (this.options.merge == "auto" &&

1013 ruleGroups.reduce((n, group) => n + group.length, 0) > 50000))	1318 ruleGroups.reduce((n, group) => n + group.length, 0) > 50000))

1014 {	1319 {

1015 return mergeRules(ruleGroups[index], this.options.merge == "all")	1320 return mergeRules(ruleGroups[index], this.options.merge == "all")

1016 .then(rules =>	1321 .then(rules =>

1017 {	1322 {

1018 ruleGroups[index] = rules;	1323 ruleGroups[index] = rules;

1019 return next();	1324 return next();

1020 });	1325 });

1021 }	1326 }

1022	1327

1023 return next();	1328 return next();

1024 }));	1329 });

1025 };	1330 };

LEFT	RIGHT