lib/abp2blocklist.js - Issue 29452289: Issue 5283 - Add support for $websocket and $webrtc

Side by Side Diff: lib/abp2blocklist.js

Issue 29452289: Issue 5283 - Add support for $websocket and $webrtc (Closed) Base URL: https://hg.adblockplus.org/abp2blocklist

Patch Set: Generate additional rules if filter contains URL scheme Created June 2, 2017, 7:43 a.m.

Left:
Right:

Use n/p to move between diff chunks; N/P to move between comments.

Jump to:

View unified diff | Download patch

OLD	NEW
1 /*	1 /*

2 * This file is part of Adblock Plus <https://adblockplus.org/>,	2 * This file is part of Adblock Plus <https://adblockplus.org/>,

3 * Copyright (C) 2006-2017 eyeo GmbH	3 * Copyright (C) 2006-2017 eyeo GmbH

4 *	4 *

5 * Adblock Plus is free software: you can redistribute it and/or modify	5 * Adblock Plus is free software: you can redistribute it and/or modify

6 * it under the terms of the GNU General Public License version 3 as	6 * it under the terms of the GNU General Public License version 3 as

7 * published by the Free Software Foundation.	7 * published by the Free Software Foundation.

8 *	8 *

9 * Adblock Plus is distributed in the hope that it will be useful,	9 * Adblock Plus is distributed in the hope that it will be useful,

10 * but WITHOUT ANY WARRANTY; without even the implied warranty of	10 * but WITHOUT ANY WARRANTY; without even the implied warranty of

(...skipping 15 matching lines...) Expand all Loading...
26 const typeMap = filterClasses.RegExpFilter.typeMap;	26 const typeMap = filterClasses.RegExpFilter.typeMap;

27 const whitelistableRequestTypes = (typeMap.IMAGE	27 const whitelistableRequestTypes = (typeMap.IMAGE

28 \| typeMap.STYLESHEET	28 \| typeMap.STYLESHEET

29 \| typeMap.SCRIPT	29 \| typeMap.SCRIPT

30 \| typeMap.FONT	30 \| typeMap.FONT

31 \| typeMap.MEDIA	31 \| typeMap.MEDIA

32 \| typeMap.POPUP	32 \| typeMap.POPUP

33 \| typeMap.OBJECT	33 \| typeMap.OBJECT

34 \| typeMap.OBJECT_SUBREQUEST	34 \| typeMap.OBJECT_SUBREQUEST

35 \| typeMap.XMLHTTPREQUEST	35 \| typeMap.XMLHTTPREQUEST

	36 \| typeMap.WEBSOCKET

	37 \| typeMap.WEBRTC

36 \| typeMap.PING	38 \| typeMap.PING

37 \| typeMap.SUBDOCUMENT	39 \| typeMap.SUBDOCUMENT

38 \| typeMap.OTHER);	40 \| typeMap.OTHER);

39	41

40 function parseDomains(domains, included, excluded)	42 function parseDomains(domains, included, excluded)

41 {	43 {

42 for (let domain in domains)	44 for (let domain in domains)

43 {	45 {

44 if (domain != "")	46 if (domain != "")

45 {	47 {

(...skipping 11 matching lines...) Expand all Loading...
57 function escapeRegExp(s)	59 function escapeRegExp(s)

58 {	60 {

59 return s.replace(/[.*+?^${}()\|[\]\\]/g, "\\$&");	61 return s.replace(/[.*+?^${}()\|[\]\\]/g, "\\$&");

60 }	62 }

61	63

62 function matchDomain(domain)	64 function matchDomain(domain)

63 {	65 {

64 return "^https?://([^/:]*\\.)?" + escapeRegExp(domain).toLowerCase() + "[/:]";	66 return "^https?://([^/:]*\\.)?" + escapeRegExp(domain).toLowerCase() + "[/:]";

65 }	67 }

66	68

	69 function getURLSchemes(contentType)

	70 {

	71 if (contentType == typeMap.WEBSOCKET)

	72 return ["wss?://"];

	73

	74 if (contentType == typeMap.WEBRTC)

	75 return ["stuns?:", "turns?:"];

	76

	77 return ["https?://"];

	78 }

	79

67 function findSubdomainsInList(domain, list)	80 function findSubdomainsInList(domain, list)

68 {	81 {

69 let subdomains = [];	82 let subdomains = [];

70 let suffixLength = domain.length + 1;	83 let suffixLength = domain.length + 1;

71	84

72 for (let name of list)	85 for (let name of list)

73 {	86 {

74 if (name.length > suffixLength && name.slice(-suffixLength) == "." + domain)	87 if (name.length > suffixLength && name.slice(-suffixLength) == "." + domain)

75 subdomains.push(name.slice(0, -suffixLength));	88 subdomains.push(name.slice(0, -suffixLength));

76 }	89 }

(...skipping 13 matching lines...) Expand all Loading...
90 return {matchDomains: included.map(matchDomain), selector: filter.selector};	103 return {matchDomains: included.map(matchDomain), selector: filter.selector};

91 }	104 }

92	105

93 /**	106 /**

94 * Parse the given filter "regexpSource" string. Producing a regular expression,	107 * Parse the given filter "regexpSource" string. Producing a regular expression,

95 * extracting the hostname (if any), deciding if the regular expression is safe	108 * extracting the hostname (if any), deciding if the regular expression is safe

96 * to be converted + matched as lower case and noting if the source contains	109 * to be converted + matched as lower case and noting if the source contains

97 * anything after the hostname.)	110 * anything after the hostname.)

98 *	111 *

99 * @param {string} text regexpSource property of a filter	112 * @param {string} text regexpSource property of a filter

	113 * @param {string} urlScheme The URL scheme to use in the regular expression

100 * @returns {object} An object containing a regular expression string, a bool	114 * @returns {object} An object containing a regular expression string, a bool

101 * indicating if the filter can be safely matched as lower	115 * indicating if the filter can be safely matched as lower

102 * case, a hostname string (or undefined) and a bool	116 * case, a hostname string (or undefined) and a bool

103 * indicating if the source only contains a hostname or not:	117 * indicating if the source only contains a hostname or not:

104 * {regexp: "...",	118 * {regexp: "...",

105 * canSafelyMatchAsLowercase: true/false,	119 * canSafelyMatchAsLowercase: true/false,

106 * hostname: "...",	120 * hostname: "...",

107 * justHostname: true/false}	121 * justHostname: true/false}

108 */	122 */

109 function parseFilterRegexpSource(text)	123 function parseFilterRegexpSource(text, urlScheme)

110 {	124 {

111 let regexp = [];	125 let regexp = [];

112 let lastIndex = text.length - 1;	126 let lastIndex = text.length - 1;

113 let hostname;	127 let hostname;

114 let hostnameStart = null;	128 let hostnameStart = null;

115 let hostnameFinished = false;	129 let hostnameFinished = false;

116 let justHostname = false;	130 let justHostname = false;

117 let canSafelyMatchAsLowercase = false;	131 let canSafelyMatchAsLowercase = false;

118	132

	133 if (!urlScheme)

	134 urlScheme = getURLSchemes()[0];

	135

119 for (let i = 0; i < text.length; i++)	136 for (let i = 0; i < text.length; i++)

120 {	137 {

121 let c = text[i];	138 let c = text[i];

122	139

123 if (hostnameFinished)	140 if (hostnameFinished)

124 justHostname = false;	141 justHostname = false;

125	142

126 // If we're currently inside the hostname we have to be careful not to	143 // If we're currently inside the hostname we have to be careful not to

127 // escape any characters until after we have converted it to punycode.	144 // escape any characters until after we have converted it to punycode.

128 if (hostnameStart != null && !hostnameFinished)	145 if (hostnameStart != null && !hostnameFinished)

(...skipping 30 matching lines...) Expand all Loading...
159 }	176 }

160 if (i == lastIndex)	177 if (i == lastIndex)

161 {	178 {

162 regexp.push("$");	179 regexp.push("$");

163 break;	180 break;

164 }	181 }

165 if (i == 1 && text[0] == "\|")	182 if (i == 1 && text[0] == "\|")

166 {	183 {

167 hostnameStart = i + 1;	184 hostnameStart = i + 1;

168 canSafelyMatchAsLowercase = true;	185 canSafelyMatchAsLowercase = true;

169 regexp.push("https?://([^/]+\\.)?");	186 regexp.push(urlScheme + "([^/]+\\.)?");

170 break;	187 break;

171 }	188 }

172 regexp.push("\\\|");	189 regexp.push("\\\|");

173 break;	190 break;

174 case "/":	191 case "/":

175 if (!hostnameFinished &&	192 if (!hostnameFinished &&

176 text.charAt(i-2) == ":" && text.charAt(i-1) == "/")	193 text.charAt(i-2) == ":" && text.charAt(i-1) == "/")

177 {	194 {

178 hostnameStart = i + 1;	195 hostnameStart = i + 1;

179 canSafelyMatchAsLowercase = true;	196 canSafelyMatchAsLowercase = true;

(...skipping 14 matching lines...) Expand all Loading...
194 }	211 }

195	212

196 return {	213 return {

197 regexp: regexp.join(""),	214 regexp: regexp.join(""),

198 canSafelyMatchAsLowercase: canSafelyMatchAsLowercase,	215 canSafelyMatchAsLowercase: canSafelyMatchAsLowercase,

199 hostname: hostname,	216 hostname: hostname,

200 justHostname: justHostname	217 justHostname: justHostname

201 };	218 };

202 }	219 }

203	220

204 function getResourceTypes(filter)	221 function getResourceTypes(contentType)

205 {	222 {

206 let types = [];	223 let types = [];

207	224

208 if (filter.contentType & typeMap.IMAGE)	225 if (contentType & typeMap.IMAGE)

209 types.push("image");	226 types.push("image");

210 if (filter.contentType & typeMap.STYLESHEET)	227 if (contentType & typeMap.STYLESHEET)

211 types.push("style-sheet");	228 types.push("style-sheet");

212 if (filter.contentType & typeMap.SCRIPT)	229 if (contentType & typeMap.SCRIPT)

213 types.push("script");	230 types.push("script");

214 if (filter.contentType & typeMap.FONT)	231 if (contentType & typeMap.FONT)

215 types.push("font");	232 types.push("font");

216 if (filter.contentType & (typeMap.MEDIA \| typeMap.OBJECT))	233 if (contentType & (typeMap.MEDIA \| typeMap.OBJECT))

217 types.push("media");	234 types.push("media");

218 if (filter.contentType & typeMap.POPUP)	235 if (contentType & typeMap.POPUP)

219 types.push("popup");	236 types.push("popup");

220 if (filter.contentType & (typeMap.XMLHTTPREQUEST \|	237 if (contentType & (typeMap.XMLHTTPREQUEST \|

	238 typeMap.WEBSOCKET \|

	239 typeMap.WEBRTC \|

221 typeMap.OBJECT_SUBREQUEST \|	240 typeMap.OBJECT_SUBREQUEST \|

222 typeMap.PING \|	241 typeMap.PING \|

223 typeMap.OTHER))	242 typeMap.OTHER))

	243 {

224 types.push("raw");	244 types.push("raw");

225 if (filter.contentType & typeMap.SUBDOCUMENT)	245 }

	246 if (contentType & typeMap.SUBDOCUMENT)

226 types.push("document");	247 types.push("document");

227	248

228 return types;	249 return types;

229 }	250 }

230	251

	252 function makeRuleCopies(trigger, action, urlSchemes)

	253 {

	254 let copies = [];

	255

	256 // Always make a deep copy of the rule, since rules may have to be

	257 // manipulated individually at a later stage.

	258 let stringifiedTrigger = JSON.stringify(trigger);

	259

	260 let filterPattern = trigger["url-filter"].substring(1);

	261 let startIndex = 0;

	262

	263 // If the URL filter already begins with the first URL scheme pattern, skip

	264 // it.

	265 if (trigger["url-filter"].startsWith("^" + urlSchemes[0]))

	266 {

	267 filterPattern = filterPattern.substring(urlSchemes[0].length);

	268 startIndex = 1;

	269 }

	270 else

	271 {

	272 filterPattern = ".*" + filterPattern;

	273 }

	274

	275 for (let i = startIndex; i < urlSchemes.length; i++)

	276 {

	277 let copyTrigger = Object.assign(JSON.parse(stringifiedTrigger), {

	278 "url-filter": "^" + urlSchemes[i] + filterPattern

	279 });

	280 copies.push({trigger: copyTrigger, action});

	281 }

	282

	283 return copies;

	284 }

	285

231 function convertFilterAddRules(rules, filter, action, withResourceTypes,	286 function convertFilterAddRules(rules, filter, action, withResourceTypes,

232 exceptionDomains)	287 exceptionDomains)

233 {	288 {

234 let parsed = parseFilterRegexpSource(filter.regexpSource);	289 let contentType = filter.contentType;

	290

	291 // Support WebSocket and WebRTC only if they're the only option. If we try to

	292 // support them otherwise (e.g. $xmlhttprequest,websocket,webrtc), we end up

	293 // having to generate multiple rules, which bloats the rule set and is not

	294 // really necessary in practice.

	295 if ((contentType & typeMap.WEBSOCKET && contentType != typeMap.WEBSOCKET) \|\|

	296 (contentType & typeMap.WEBRTC && contentType != typeMap.WEBRTC))

	297 {

	298 contentType &= ~(typeMap.WEBSOCKET \| typeMap.WEBRTC);

	299 }

	300

	301 let urlSchemes = getURLSchemes(contentType);

	302 let parsed = parseFilterRegexpSource(filter.regexpSource, urlSchemes[0]);

235	303

236 // For the special case of $document whitelisting filters with just a domain	304 // For the special case of $document whitelisting filters with just a domain

237 // we can generate an equivalent blocking rule exception using if-domain.	305 // we can generate an equivalent blocking rule exception using if-domain.

238 if (filter instanceof filterClasses.WhitelistFilter &&	306 if (filter instanceof filterClasses.WhitelistFilter &&

239 filter.contentType & typeMap.DOCUMENT &&	307 contentType & typeMap.DOCUMENT &&

240 parsed.justHostname)	308 parsed.justHostname)

241 {	309 {

242 rules.push({	310 rules.push({

243 trigger: {	311 trigger: {

244 "url-filter": ".*",	312 "url-filter": ".*",

245 "if-domain": ["*" + parsed.hostname]	313 "if-domain": ["*" + parsed.hostname]

246 },	314 },

247 action: {type: "ignore-previous-rules"}	315 action: {type: "ignore-previous-rules"}

248 });	316 });

249 // If the filter contains other supported options we'll need to generate	317 // If the filter contains other supported options we'll need to generate

250 // further rules for it, but if not we can simply return now.	318 // further rules for it, but if not we can simply return now.

251 if (!(filter.contentType & whitelistableRequestTypes))	319 if (!(contentType & whitelistableRequestTypes))

252 return;	320 return;

253 }	321 }

254	322

255 let trigger = {"url-filter": parsed.regexp};	323 let trigger = {"url-filter": parsed.regexp};

256	324

257 // Limit rules to HTTP(S) URLs	325 // If the URL filter begins with one of the URL schemes for this content

258 if (!/^(\^\|http)/i.test(trigger["url-filter"]))	326 // type, we generate additional rules for all the URL scheme patterns;

259 trigger["url-filter"] = "^https?://.*" + trigger["url-filter"];	327 // otherwise, if the start of the URL filter literally matches the first URL

	328 // scheme pattern, we just generate additional rules for the remaining URL

	329 // scheme patterns.

	330 //

	331 // For example, "stun:foo$webrtc" will give us "stun:foo", then we add a "^"

	332 // in front of this and generate two additional rules for

	333 // "^stuns?:.stun:foo" and "^turns?:.stun:foo". On the other hand,

	334 // "\|\|foo$webrtc" will give us "^stuns?:([^/]+\\.)?foo", so we just generate

	335 // "^turns?:([^/]+\\.)?foo" in addition.

	336 //

	337 // Note that the filter can be already anchored to the beginning

	338 // (e.g. "\|stun:foo$webrtc"), in which case we do not generate any additional

	339 // rules.

	340 let needAltRules = trigger["url-filter"][0] != "^" \|\|

	341 trigger["url-filter"].startsWith("^" + urlSchemes[0]);

	342

	343 if (trigger["url-filter"][0] != "^")

	344 {

	345 if (!urlSchemes.some(scheme => new RegExp("^" + scheme)

	346 .test(trigger["url-filter"])))

	347 {

	348 trigger["url-filter"] = urlSchemes[0] + ".*" + trigger["url-filter"];

	349 }

	350

	351 trigger["url-filter"] = "^" + trigger["url-filter"];

	352 }

260	353

261 // For rules containing only a hostname we know that we're matching against	354 // For rules containing only a hostname we know that we're matching against

262 // a lowercase string unless the matchCase option was passed.	355 // a lowercase string unless the matchCase option was passed.

263 if (parsed.canSafelyMatchAsLowercase && !filter.matchCase)	356 if (parsed.canSafelyMatchAsLowercase && !filter.matchCase)

264 trigger["url-filter"] = trigger["url-filter"].toLowerCase();	357 trigger["url-filter"] = trigger["url-filter"].toLowerCase();

265	358

266 if (parsed.canSafelyMatchAsLowercase \|\| filter.matchCase)	359 if (parsed.canSafelyMatchAsLowercase \|\| filter.matchCase)

267 trigger["url-filter-is-case-sensitive"] = true;	360 trigger["url-filter-is-case-sensitive"] = true;

268	361

269 let included = [];	362 let included = [];

270 let excluded = [];	363 let excluded = [];

271	364

272 parseDomains(filter.domains, included, excluded);	365 parseDomains(filter.domains, included, excluded);

273	366

274 if (exceptionDomains)	367 if (exceptionDomains)

275 excluded = excluded.concat(exceptionDomains);	368 excluded = excluded.concat(exceptionDomains);

276	369

277 if (withResourceTypes)	370 if (withResourceTypes)

278 {	371 {

279 trigger["resource-type"] = getResourceTypes(filter);	372 trigger["resource-type"] = getResourceTypes(contentType);

280	373

281 if (trigger["resource-type"].length == 0)	374 if (trigger["resource-type"].length == 0)

282 return;	375 return;

283 }	376 }

284	377

285 if (filter.thirdParty != null)	378 if (filter.thirdParty != null)

286 trigger["load-type"] = [filter.thirdParty ? "third-party" : "first-party"];	379 trigger["load-type"] = [filter.thirdParty ? "third-party" : "first-party"];

287	380

288 if (included.length > 0)	381 if (included.length > 0)

289 {	382 {

(...skipping 19 matching lines...) Expand all Loading...
309 trigger["if-domain"].push("*" + name);	402 trigger["if-domain"].push("*" + name);

310 }	403 }

311 }	404 }

312 }	405 }

313 else if (excluded.length > 0)	406 else if (excluded.length > 0)

314 {	407 {

315 trigger["unless-domain"] = excluded.map(name => "*" + name);	408 trigger["unless-domain"] = excluded.map(name => "*" + name);

316 }	409 }

317	410

318 rules.push({trigger: trigger, action: {type: action}});	411 rules.push({trigger: trigger, action: {type: action}});

	412

	413 if (needAltRules)

	414 {

	415 // Generate additional rules for any alternative URL schemes.

	416 for (let altRule of makeRuleCopies(trigger, {type: action}, urlSchemes))

	417 rules.push(altRule);

	418 }

319 }	419 }

320	420

321 function hasNonASCI(obj)	421 function hasNonASCI(obj)

322 {	422 {

323 if (typeof obj == "string")	423 if (typeof obj == "string")

324 {	424 {

325 if (/[^\x00-\x7F]/.test(obj))	425 if (/[^\x00-\x7F]/.test(obj))

326 return true;	426 return true;

327 }	427 }

328	428

(...skipping 203 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
532 {	632 {

533 convertFilterAddRules(rules, filter, "block", true,	633 convertFilterAddRules(rules, filter, "block", true,

534 requestFilterExceptionDomains);	634 requestFilterExceptionDomains);

535 }	635 }

536	636

537 for (let filter of this.requestExceptions)	637 for (let filter of this.requestExceptions)

538 convertFilterAddRules(rules, filter, "ignore-previous-rules", true);	638 convertFilterAddRules(rules, filter, "ignore-previous-rules", true);

539	639

540 return rules.filter(rule => !hasNonASCI(rule));	640 return rules.filter(rule => !hasNonASCI(rule));

541 };	641 };

OLD	NEW

« no previous file with comments | « no previous file | node_modules/filterClasses.js » ('j') | no next file with comments »