lib/abp2blocklist.js - Issue 29452289: Issue 5283 - Add support for $websocket and $webrtc

Side by Side Diff: lib/abp2blocklist.js

Issue 29452289: Issue 5283 - Add support for $websocket and $webrtc (Closed) Base URL: https://hg.adblockplus.org/abp2blocklist

Patch Set: Generate single rule for filter covering all raw types Created July 3, 2017, 6:28 p.m.

Left:
Right:

Use n/p to move between diff chunks; N/P to move between comments.

Jump to:

View unified diff | Download patch

OLD	NEW
1 /*	1 /*

2 * This file is part of Adblock Plus <https://adblockplus.org/>,	2 * This file is part of Adblock Plus <https://adblockplus.org/>,

3 * Copyright (C) 2006-2017 eyeo GmbH	3 * Copyright (C) 2006-2017 eyeo GmbH

4 *	4 *

5 * Adblock Plus is free software: you can redistribute it and/or modify	5 * Adblock Plus is free software: you can redistribute it and/or modify

6 * it under the terms of the GNU General Public License version 3 as	6 * it under the terms of the GNU General Public License version 3 as

7 * published by the Free Software Foundation.	7 * published by the Free Software Foundation.

8 *	8 *

9 * Adblock Plus is distributed in the hope that it will be useful,	9 * Adblock Plus is distributed in the hope that it will be useful,

10 * but WITHOUT ANY WARRANTY; without even the implied warranty of	10 * but WITHOUT ANY WARRANTY; without even the implied warranty of

(...skipping 15 matching lines...) Expand all Loading...
26 const typeMap = filterClasses.RegExpFilter.typeMap;	26 const typeMap = filterClasses.RegExpFilter.typeMap;

27 const whitelistableRequestTypes = (typeMap.IMAGE	27 const whitelistableRequestTypes = (typeMap.IMAGE

28 \| typeMap.STYLESHEET	28 \| typeMap.STYLESHEET

29 \| typeMap.SCRIPT	29 \| typeMap.SCRIPT

30 \| typeMap.FONT	30 \| typeMap.FONT

31 \| typeMap.MEDIA	31 \| typeMap.MEDIA

32 \| typeMap.POPUP	32 \| typeMap.POPUP

33 \| typeMap.OBJECT	33 \| typeMap.OBJECT

34 \| typeMap.OBJECT_SUBREQUEST	34 \| typeMap.OBJECT_SUBREQUEST

35 \| typeMap.XMLHTTPREQUEST	35 \| typeMap.XMLHTTPREQUEST

	36 \| typeMap.WEBSOCKET

	37 \| typeMap.WEBRTC

36 \| typeMap.PING	38 \| typeMap.PING

37 \| typeMap.SUBDOCUMENT	39 \| typeMap.SUBDOCUMENT

38 \| typeMap.OTHER);	40 \| typeMap.OTHER);

39	41

40 function parseDomains(domains, included, excluded)	42 function parseDomains(domains, included, excluded)

41 {	43 {

42 for (let domain in domains)	44 for (let domain in domains)

43 {	45 {

44 if (domain != "")	46 if (domain != "")

45 {	47 {

(...skipping 11 matching lines...) Expand all Loading...
57 function escapeRegExp(s)	59 function escapeRegExp(s)

58 {	60 {

59 return s.replace(/[.*+?^${}()\|[\]\\]/g, "\\$&");	61 return s.replace(/[.*+?^${}()\|[\]\\]/g, "\\$&");

60 }	62 }

61	63

62 function matchDomain(domain)	64 function matchDomain(domain)

63 {	65 {

64 return "^https?://([^/:]*\\.)?" + escapeRegExp(domain).toLowerCase() + "[/:]";	66 return "^https?://([^/:]*\\.)?" + escapeRegExp(domain).toLowerCase() + "[/:]";

65 }	67 }

66	68

	69 function getURLSchemes(contentType)

	70 {

	71 // If the given content type includes all supported URL schemes, simply

	72 // return a single generic URL scheme pattern. This minimizes the size of the

	73 // generated rule set. The downside to this is that it will also match

	74 // schemes that we do not want to match (e.g. "ftp://"), but this can be

	75 // mitigated by adding exceptions for those schemes.

	76 if (contentType & typeMap.WEBSOCKET && contentType & typeMap.WEBRTC &&

	77 contentType & ~(typeMap.WEBSOCKET \| typeMap.WEBRTC))

	78 return ["[^:]+:(//)?"];

	79

	80 let urlSchemes = [];

	81

	82 if (contentType & typeMap.WEBSOCKET)

	83 urlSchemes.push("wss?://");

	84

	85 if (contentType & typeMap.WEBRTC)

	86 urlSchemes.push("stuns?:", "turns?:");

	87

	88 if (contentType & ~(typeMap.WEBSOCKET \| typeMap.WEBRTC))

	89 urlSchemes.push("https?://");

	90

	91 return urlSchemes;

	92 }

	93

67 function findSubdomainsInList(domain, list)	94 function findSubdomainsInList(domain, list)

68 {	95 {

69 let subdomains = [];	96 let subdomains = [];

70 let suffixLength = domain.length + 1;	97 let suffixLength = domain.length + 1;

71	98

72 for (let name of list)	99 for (let name of list)

73 {	100 {

74 if (name.length > suffixLength && name.slice(-suffixLength) == "." + domain)	101 if (name.length > suffixLength && name.slice(-suffixLength) == "." + domain)

75 subdomains.push(name.slice(0, -suffixLength));	102 subdomains.push(name.slice(0, -suffixLength));

76 }	103 }

(...skipping 13 matching lines...) Expand all Loading...
90 return {matchDomains: included.map(matchDomain), selector: filter.selector};	117 return {matchDomains: included.map(matchDomain), selector: filter.selector};

91 }	118 }

92	119

93 /**	120 /**

94 * Parse the given filter "regexpSource" string. Producing a regular expression,	121 * Parse the given filter "regexpSource" string. Producing a regular expression,

95 * extracting the hostname (if any), deciding if the regular expression is safe	122 * extracting the hostname (if any), deciding if the regular expression is safe

96 * to be converted + matched as lower case and noting if the source contains	123 * to be converted + matched as lower case and noting if the source contains

97 * anything after the hostname.)	124 * anything after the hostname.)

98 *	125 *

99 * @param {string} text regexpSource property of a filter	126 * @param {string} text regexpSource property of a filter

	127 * @param {string} urlScheme The URL scheme to use in the regular expression

100 * @returns {object} An object containing a regular expression string, a bool	128 * @returns {object} An object containing a regular expression string, a bool

101 * indicating if the filter can be safely matched as lower	129 * indicating if the filter can be safely matched as lower

102 * case, a hostname string (or undefined) and a bool	130 * case, a hostname string (or undefined) and a bool

103 * indicating if the source only contains a hostname or not:	131 * indicating if the source only contains a hostname or not:

104 * {regexp: "...",	132 * {regexp: "...",

105 * canSafelyMatchAsLowercase: true/false,	133 * canSafelyMatchAsLowercase: true/false,

106 * hostname: "...",	134 * hostname: "...",

107 * justHostname: true/false}	135 * justHostname: true/false}

108 */	136 */

109 function parseFilterRegexpSource(text)	137 function parseFilterRegexpSource(text, urlScheme)

110 {	138 {

111 let regexp = [];	139 let regexp = [];

112 let lastIndex = text.length - 1;	140 let lastIndex = text.length - 1;

113 let hostname;	141 let hostname;

114 let hostnameStart = null;	142 let hostnameStart = null;

115 let hostnameFinished = false;	143 let hostnameFinished = false;

116 let justHostname = false;	144 let justHostname = false;

117 let canSafelyMatchAsLowercase = false;	145 let canSafelyMatchAsLowercase = false;

118	146

	147 if (!urlScheme)

	148 urlScheme = getURLSchemes()[0];

	149

119 for (let i = 0; i < text.length; i++)	150 for (let i = 0; i < text.length; i++)

120 {	151 {

121 let c = text[i];	152 let c = text[i];

122	153

123 if (hostnameFinished)	154 if (hostnameFinished)

124 justHostname = false;	155 justHostname = false;

125	156

126 // If we're currently inside the hostname we have to be careful not to	157 // If we're currently inside the hostname we have to be careful not to

127 // escape any characters until after we have converted it to punycode.	158 // escape any characters until after we have converted it to punycode.

128 if (hostnameStart != null && !hostnameFinished)	159 if (hostnameStart != null && !hostnameFinished)

(...skipping 30 matching lines...) Expand all Loading...
159 }	190 }

160 if (i == lastIndex)	191 if (i == lastIndex)

161 {	192 {

162 regexp.push("$");	193 regexp.push("$");

163 break;	194 break;

164 }	195 }

165 if (i == 1 && text[0] == "\|")	196 if (i == 1 && text[0] == "\|")

166 {	197 {

167 hostnameStart = i + 1;	198 hostnameStart = i + 1;

168 canSafelyMatchAsLowercase = true;	199 canSafelyMatchAsLowercase = true;

169 regexp.push("https?://([^/]+\\.)?");	200 regexp.push(urlScheme + "([^/]+\\.)?");

170 break;	201 break;

171 }	202 }

172 regexp.push("\\\|");	203 regexp.push("\\\|");

173 break;	204 break;

174 case "/":	205 case "/":

175 if (!hostnameFinished &&	206 if (!hostnameFinished &&

176 text.charAt(i-2) == ":" && text.charAt(i-1) == "/")	207 text.charAt(i-2) == ":" && text.charAt(i-1) == "/")

177 {	208 {

178 hostnameStart = i + 1;	209 hostnameStart = i + 1;

179 canSafelyMatchAsLowercase = true;	210 canSafelyMatchAsLowercase = true;

(...skipping 14 matching lines...) Expand all Loading...
194 }	225 }

195	226

196 return {	227 return {

197 regexp: regexp.join(""),	228 regexp: regexp.join(""),

198 canSafelyMatchAsLowercase: canSafelyMatchAsLowercase,	229 canSafelyMatchAsLowercase: canSafelyMatchAsLowercase,

199 hostname: hostname,	230 hostname: hostname,

200 justHostname: justHostname	231 justHostname: justHostname

201 };	232 };

202 }	233 }

203	234

204 function getResourceTypes(filter)	235 function getResourceTypes(contentType)

205 {	236 {

206 let types = [];	237 let types = [];

207	238

208 if (filter.contentType & typeMap.IMAGE)	239 if (contentType & typeMap.IMAGE)

209 types.push("image");	240 types.push("image");

210 if (filter.contentType & typeMap.STYLESHEET)	241 if (contentType & typeMap.STYLESHEET)

211 types.push("style-sheet");	242 types.push("style-sheet");

212 if (filter.contentType & typeMap.SCRIPT)	243 if (contentType & typeMap.SCRIPT)

213 types.push("script");	244 types.push("script");

214 if (filter.contentType & typeMap.FONT)	245 if (contentType & typeMap.FONT)

215 types.push("font");	246 types.push("font");

216 if (filter.contentType & (typeMap.MEDIA \| typeMap.OBJECT))	247 if (contentType & (typeMap.MEDIA \| typeMap.OBJECT))

217 types.push("media");	248 types.push("media");

218 if (filter.contentType & typeMap.POPUP)	249 if (contentType & typeMap.POPUP)

219 types.push("popup");	250 types.push("popup");

220 if (filter.contentType & (typeMap.XMLHTTPREQUEST \|	251 if (contentType & (typeMap.XMLHTTPREQUEST \|

	252 typeMap.WEBSOCKET \|

	253 typeMap.WEBRTC \|

221 typeMap.OBJECT_SUBREQUEST \|	254 typeMap.OBJECT_SUBREQUEST \|

222 typeMap.PING \|	255 typeMap.PING \|

223 typeMap.OTHER))	256 typeMap.OTHER))

	257 {

224 types.push("raw");	258 types.push("raw");

225 if (filter.contentType & typeMap.SUBDOCUMENT)	259 }

	260 if (contentType & typeMap.SUBDOCUMENT)

226 types.push("document");	261 types.push("document");

227	262

228 return types;	263 return types;

229 }	264 }

230	265

	266 function makeRuleCopies(trigger, action, urlSchemes)

	267 {

	268 let copies = [];

	269

	270 // Always make a deep copy of the rule, since rules may have to be

	271 // manipulated individually at a later stage.

	272 let stringifiedTrigger = JSON.stringify(trigger);

	273

	274 let filterPattern = trigger["url-filter"].substring(1);

	275 let startIndex = 0;

	276

	277 // If the URL filter already begins with the first URL scheme pattern, skip

	278 // it.

	279 if (trigger["url-filter"].startsWith("^" + urlSchemes[0]))

	280 {

	281 filterPattern = filterPattern.substring(urlSchemes[0].length);

	282 startIndex = 1;

	283 }

	284 else

	285 {

	286 filterPattern = ".*" + filterPattern;

	287 }

	288

	289 for (let i = startIndex; i < urlSchemes.length; i++)

	290 {

	291 let copyTrigger = Object.assign(JSON.parse(stringifiedTrigger), {

	292 "url-filter": "^" + urlSchemes[i] + filterPattern

	293 });

	294 copies.push({trigger: copyTrigger, action});

	295 }

	296

	297 return copies;

	298 }

	299

231 function convertFilterAddRules(rules, filter, action, withResourceTypes,	300 function convertFilterAddRules(rules, filter, action, withResourceTypes,

232 exceptionDomains)	301 exceptionDomains, contentType)

233 {	302 {

234 let parsed = parseFilterRegexpSource(filter.regexpSource);	303 if (!contentType)

	304 contentType = filter.contentType;

	305

	306 // If WebSocket or WebRTC are given along with other options but not

	307 // including all three of WebSocket, WebRTC, and XMLHttpRequest, we must

	308 // generate multiple rules. For example, for the filter

	309 // "foo$websocket,image", we must generate one rule with "^wss?://" and "raw"

	310 // and another rule with "^https?://" and "image". If we merge the two, we

	311 // end up blocking requests of type XMLHttpRequest inadvertently.

	312 if ((contentType & typeMap.WEBSOCKET && contentType != typeMap.WEBSOCKET &&

	313 !(contentType & typeMap.WEBRTC &&

	314 contentType & typeMap.XMLHTTPREQUEST)) \|\|

	315 (contentType & typeMap.WEBRTC && contentType != typeMap.WEBRTC &&

	316 !(contentType & typeMap.WEBSOCKET &&

	317 contentType & typeMap.XMLHTTPREQUEST)))

	318 {

	319 if (contentType & typeMap.WEBSOCKET)

	320 {

	321 convertFilterAddRules(rules, filter, action, withResourceTypes,

	322 exceptionDomains, typeMap.WEBSOCKET);

	323 }

	324

	325 if (contentType & typeMap.WEBRTC)

	326 {

	327 convertFilterAddRules(rules, filter, action, withResourceTypes,

	328 exceptionDomains, typeMap.WEBRTC);

	329 }

	330

	331 contentType &= ~(typeMap.WEBSOCKET \| typeMap.WEBRTC);

	332

	333 if (!contentType)

	334 return;

	335 }

	336

	337 let urlSchemes = getURLSchemes(contentType);

	338 let parsed = parseFilterRegexpSource(filter.regexpSource, urlSchemes[0]);

235	339

236 // For the special case of $document whitelisting filters with just a domain	340 // For the special case of $document whitelisting filters with just a domain

237 // we can generate an equivalent blocking rule exception using if-domain.	341 // we can generate an equivalent blocking rule exception using if-domain.

238 if (filter instanceof filterClasses.WhitelistFilter &&	342 if (filter instanceof filterClasses.WhitelistFilter &&

239 filter.contentType & typeMap.DOCUMENT &&	343 contentType & typeMap.DOCUMENT &&

240 parsed.justHostname)	344 parsed.justHostname)

241 {	345 {

242 rules.push({	346 rules.push({

243 trigger: {	347 trigger: {

244 "url-filter": ".*",	348 "url-filter": ".*",

245 "if-domain": ["*" + parsed.hostname]	349 "if-domain": ["*" + parsed.hostname]

246 },	350 },

247 action: {type: "ignore-previous-rules"}	351 action: {type: "ignore-previous-rules"}

248 });	352 });

249 // If the filter contains other supported options we'll need to generate	353 // If the filter contains other supported options we'll need to generate

250 // further rules for it, but if not we can simply return now.	354 // further rules for it, but if not we can simply return now.

251 if (!(filter.contentType & whitelistableRequestTypes))	355 if (!(contentType & whitelistableRequestTypes))

252 return;	356 return;

253 }	357 }

254	358

255 let trigger = {"url-filter": parsed.regexp};	359 let trigger = {"url-filter": parsed.regexp};

256	360

257 // Limit rules to HTTP(S) URLs	361 // If the URL filter begins with one of the URL schemes for this content

258 if (!/^(\^\|http)/i.test(trigger["url-filter"]))	362 // type, we generate additional rules for all the URL scheme patterns;

259 trigger["url-filter"] = "^https?://.*" + trigger["url-filter"];	363 // otherwise, if the start of the URL filter literally matches the first URL

	364 // scheme pattern, we just generate additional rules for the remaining URL

	365 // scheme patterns.

	366 //

	367 // For example, "stun:foo$webrtc" will give us "stun:foo", then we add a "^"

	368 // in front of this and generate two additional rules for

	369 // "^stuns?:.stun:foo" and "^turns?:.stun:foo". On the other hand,

	370 // "\|\|foo$webrtc" will give us "^stuns?:([^/]+\\.)?foo", so we just generate

	371 // "^turns?:([^/]+\\.)?foo" in addition.

	372 //

	373 // Note that the filter can be already anchored to the beginning

	374 // (e.g. "\|stun:foo$webrtc"), in which case we do not generate any additional

	375 // rules.

	376 let needAltRules = trigger["url-filter"][0] != "^" \|\|

	377 trigger["url-filter"].startsWith("^" + urlSchemes[0]);

	378

	379 if (trigger["url-filter"][0] != "^")

	380 {

	381 if (!urlSchemes.some(scheme => new RegExp("^" + scheme)

	382 .test(trigger["url-filter"])))

	383 {

	384 trigger["url-filter"] = urlSchemes[0] + ".*" + trigger["url-filter"];

	385 }

	386

	387 trigger["url-filter"] = "^" + trigger["url-filter"];

	388 }

260	389

261 // For rules containing only a hostname we know that we're matching against	390 // For rules containing only a hostname we know that we're matching against

262 // a lowercase string unless the matchCase option was passed.	391 // a lowercase string unless the matchCase option was passed.

263 if (parsed.canSafelyMatchAsLowercase && !filter.matchCase)	392 if (parsed.canSafelyMatchAsLowercase && !filter.matchCase)

264 trigger["url-filter"] = trigger["url-filter"].toLowerCase();	393 trigger["url-filter"] = trigger["url-filter"].toLowerCase();

265	394

266 if (parsed.canSafelyMatchAsLowercase \|\| filter.matchCase)	395 if (parsed.canSafelyMatchAsLowercase \|\| filter.matchCase)

267 trigger["url-filter-is-case-sensitive"] = true;	396 trigger["url-filter-is-case-sensitive"] = true;

268	397

269 let included = [];	398 let included = [];

270 let excluded = [];	399 let excluded = [];

271	400

272 parseDomains(filter.domains, included, excluded);	401 parseDomains(filter.domains, included, excluded);

273	402

274 if (exceptionDomains)	403 if (exceptionDomains)

275 excluded = excluded.concat(exceptionDomains);	404 excluded = excluded.concat(exceptionDomains);

276	405

277 if (withResourceTypes)	406 if (withResourceTypes)

278 {	407 {

279 trigger["resource-type"] = getResourceTypes(filter);	408 trigger["resource-type"] = getResourceTypes(contentType);

280	409

281 if (trigger["resource-type"].length == 0)	410 if (trigger["resource-type"].length == 0)

282 return;	411 return;

283 }	412 }

284	413

285 if (filter.thirdParty != null)	414 if (filter.thirdParty != null)

286 trigger["load-type"] = [filter.thirdParty ? "third-party" : "first-party"];	415 trigger["load-type"] = [filter.thirdParty ? "third-party" : "first-party"];

287	416

288 if (included.length > 0)	417 if (included.length > 0)

289 {	418 {

(...skipping 17 matching lines...) Expand all Loading...
307 else	436 else

308 {	437 {

309 trigger["if-domain"].push("*" + name);	438 trigger["if-domain"].push("*" + name);

310 }	439 }

311 }	440 }

312 }	441 }

313 else if (excluded.length > 0)	442 else if (excluded.length > 0)

314 {	443 {

315 trigger["unless-domain"] = excluded.map(name => "*" + name);	444 trigger["unless-domain"] = excluded.map(name => "*" + name);

316 }	445 }

317 else if (filter instanceof filterClasses.BlockingFilter &&

318 filter.contentType & typeMap.SUBDOCUMENT)

319 {

320 trigger["unless-top-url"] = [trigger["url-filter"]];

321 if (trigger["url-filter-is-case-sensitive"])

322 trigger["top-url-filter-is-case-sensitive"] = true;

323 }

324	446

325 rules.push({trigger: trigger, action: {type: action}});	447 rules.push({trigger: trigger, action: {type: action}});

	448

	449 if (needAltRules)

	450 {

	451 // Generate additional rules for any alternative URL schemes.

	452 for (let altRule of makeRuleCopies(trigger, {type: action}, urlSchemes))

	453 rules.push(altRule);

	454 }

326 }	455 }

327	456

328 function hasNonASCI(obj)	457 function hasNonASCI(obj)

329 {	458 {

330 if (typeof obj == "string")	459 if (typeof obj == "string")

331 {	460 {

332 if (/[^\x00-\x7F]/.test(obj))	461 if (/[^\x00-\x7F]/.test(obj))

333 return true;	462 return true;

334 }	463 }

335	464

(...skipping 203 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
539 {	668 {

540 convertFilterAddRules(rules, filter, "block", true,	669 convertFilterAddRules(rules, filter, "block", true,

541 requestFilterExceptionDomains);	670 requestFilterExceptionDomains);

542 }	671 }

543	672

544 for (let filter of this.requestExceptions)	673 for (let filter of this.requestExceptions)

545 convertFilterAddRules(rules, filter, "ignore-previous-rules", true);	674 convertFilterAddRules(rules, filter, "ignore-previous-rules", true);

546	675

547 return rules.filter(rule => !hasNonASCI(rule));	676 return rules.filter(rule => !hasNonASCI(rule));

548 };	677 };

OLD	NEW

« no previous file with comments | « no previous file | node_modules/filterClasses.js » ('j') | no next file with comments »