lib/abp2blocklist.js - Issue 29452289: Issue 5283 - Add support for $websocket and $webrtc

Side by Side Diff: lib/abp2blocklist.js

Issue 29452289: Issue 5283 - Add support for $websocket and $webrtc (Closed) Base URL: https://hg.adblockplus.org/abp2blocklist

Patch Set: Created May 31, 2017, 2:42 a.m.

Left:
Right:

Use n/p to move between diff chunks; N/P to move between comments.

Jump to:

View unified diff | Download patch

OLD	NEW
1 /*	1 /*

2 * This file is part of Adblock Plus <https://adblockplus.org/>,	2 * This file is part of Adblock Plus <https://adblockplus.org/>,

3 * Copyright (C) 2006-2017 eyeo GmbH	3 * Copyright (C) 2006-2017 eyeo GmbH

4 *	4 *

5 * Adblock Plus is free software: you can redistribute it and/or modify	5 * Adblock Plus is free software: you can redistribute it and/or modify

6 * it under the terms of the GNU General Public License version 3 as	6 * it under the terms of the GNU General Public License version 3 as

7 * published by the Free Software Foundation.	7 * published by the Free Software Foundation.

8 *	8 *

9 * Adblock Plus is distributed in the hope that it will be useful,	9 * Adblock Plus is distributed in the hope that it will be useful,

10 * but WITHOUT ANY WARRANTY; without even the implied warranty of	10 * but WITHOUT ANY WARRANTY; without even the implied warranty of

(...skipping 15 matching lines...) Expand all Loading...
26 const typeMap = filterClasses.RegExpFilter.typeMap;	26 const typeMap = filterClasses.RegExpFilter.typeMap;

27 const whitelistableRequestTypes = (typeMap.IMAGE	27 const whitelistableRequestTypes = (typeMap.IMAGE

28 \| typeMap.STYLESHEET	28 \| typeMap.STYLESHEET

29 \| typeMap.SCRIPT	29 \| typeMap.SCRIPT

30 \| typeMap.FONT	30 \| typeMap.FONT

31 \| typeMap.MEDIA	31 \| typeMap.MEDIA

32 \| typeMap.POPUP	32 \| typeMap.POPUP

33 \| typeMap.OBJECT	33 \| typeMap.OBJECT

34 \| typeMap.OBJECT_SUBREQUEST	34 \| typeMap.OBJECT_SUBREQUEST

35 \| typeMap.XMLHTTPREQUEST	35 \| typeMap.XMLHTTPREQUEST

	36 \| typeMap.WEBSOCKET

	37 \| typeMap.WEBRTC

36 \| typeMap.PING	38 \| typeMap.PING

37 \| typeMap.SUBDOCUMENT	39 \| typeMap.SUBDOCUMENT

38 \| typeMap.OTHER);	40 \| typeMap.OTHER);

39	41

40 function parseDomains(domains, included, excluded)	42 function parseDomains(domains, included, excluded)

41 {	43 {

42 for (let domain in domains)	44 for (let domain in domains)

43 {	45 {

44 if (domain != "")	46 if (domain != "")

45 {	47 {

(...skipping 11 matching lines...) Expand all Loading...
57 function escapeRegExp(s)	59 function escapeRegExp(s)

58 {	60 {

59 return s.replace(/[.*+?^${}()\|[\]\\]/g, "\\$&");	61 return s.replace(/[.*+?^${}()\|[\]\\]/g, "\\$&");

60 }	62 }

61	63

62 function matchDomain(domain)	64 function matchDomain(domain)

63 {	65 {

64 return "^https?://([^/:]*\\.)?" + escapeRegExp(domain).toLowerCase() + "[/:]";	66 return "^https?://([^/:]*\\.)?" + escapeRegExp(domain).toLowerCase() + "[/:]";

65 }	67 }

66	68

	69 function getURLSchemes(contentType)

	70 {

	71 if (contentType == typeMap.WEBSOCKET)

	72 return ["wss?://"];

	73

	74 if (contentType == typeMap.WEBRTC)

	75 return ["stuns?:", "turns?:"];

	76

	77 return ["https?://"];

	78 }

	79

67 function findSubdomainsInList(domain, list)	80 function findSubdomainsInList(domain, list)

68 {	81 {

69 let subdomains = [];	82 let subdomains = [];

70 let suffixLength = domain.length + 1;	83 let suffixLength = domain.length + 1;

71	84

72 for (let name of list)	85 for (let name of list)

73 {	86 {

74 if (name.length > suffixLength && name.slice(-suffixLength) == "." + domain)	87 if (name.length > suffixLength && name.slice(-suffixLength) == "." + domain)

75 subdomains.push(name.slice(0, -suffixLength));	88 subdomains.push(name.slice(0, -suffixLength));

76 }	89 }

(...skipping 13 matching lines...) Expand all Loading...
90 return {matchDomains: included.map(matchDomain), selector: filter.selector};	103 return {matchDomains: included.map(matchDomain), selector: filter.selector};

91 }	104 }

92	105

93 /**	106 /**

94 * Parse the given filter "regexpSource" string. Producing a regular expression,	107 * Parse the given filter "regexpSource" string. Producing a regular expression,

95 * extracting the hostname (if any), deciding if the regular expression is safe	108 * extracting the hostname (if any), deciding if the regular expression is safe

96 * to be converted + matched as lower case and noting if the source contains	109 * to be converted + matched as lower case and noting if the source contains

97 * anything after the hostname.)	110 * anything after the hostname.)

98 *	111 *

99 * @param {string} text regexpSource property of a filter	112 * @param {string} text regexpSource property of a filter

	113 * @param {string} urlScheme The URL scheme to use in the regular expression

100 * @returns {object} An object containing a regular expression string, a bool	114 * @returns {object} An object containing a regular expression string, a bool

101 * indicating if the filter can be safely matched as lower	115 * indicating if the filter can be safely matched as lower

102 * case, a hostname string (or undefined) and a bool	116 * case, a hostname string (or undefined) and a bool

103 * indicating if the source only contains a hostname or not:	117 * indicating if the source only contains a hostname or not:

104 * {regexp: "...",	118 * {regexp: "...",

105 * canSafelyMatchAsLowercase: true/false,	119 * canSafelyMatchAsLowercase: true/false,

106 * hostname: "...",	120 * hostname: "...",

107 * justHostname: true/false}	121 * justHostname: true/false}

108 */	122 */

109 function parseFilterRegexpSource(text)	123 function parseFilterRegexpSource(text, urlScheme)

110 {	124 {

111 let regexp = [];	125 let regexp = [];

112 let lastIndex = text.length - 1;	126 let lastIndex = text.length - 1;

113 let hostname;	127 let hostname;

114 let hostnameStart = null;	128 let hostnameStart = null;

115 let hostnameFinished = false;	129 let hostnameFinished = false;

116 let justHostname = false;	130 let justHostname = false;

117 let canSafelyMatchAsLowercase = false;	131 let canSafelyMatchAsLowercase = false;

118	132

	133 if (!urlScheme)

	134 urlScheme = getURLSchemes()[0];

	135

119 for (let i = 0; i < text.length; i++)	136 for (let i = 0; i < text.length; i++)

120 {	137 {

121 let c = text[i];	138 let c = text[i];

122	139

123 if (hostnameFinished)	140 if (hostnameFinished)

124 justHostname = false;	141 justHostname = false;

125	142

126 // If we're currently inside the hostname we have to be careful not to	143 // If we're currently inside the hostname we have to be careful not to

127 // escape any characters until after we have converted it to punycode.	144 // escape any characters until after we have converted it to punycode.

128 if (hostnameStart != null && !hostnameFinished)	145 if (hostnameStart != null && !hostnameFinished)

(...skipping 30 matching lines...) Expand all Loading...
159 }	176 }

160 if (i == lastIndex)	177 if (i == lastIndex)

161 {	178 {

162 regexp.push("$");	179 regexp.push("$");

163 break;	180 break;

164 }	181 }

165 if (i == 1 && text[0] == "\|")	182 if (i == 1 && text[0] == "\|")

166 {	183 {

167 hostnameStart = i + 1;	184 hostnameStart = i + 1;

168 canSafelyMatchAsLowercase = true;	185 canSafelyMatchAsLowercase = true;

169 regexp.push("https?://([^/]+\\.)?");	186 regexp.push(urlScheme + "([^/]+\\.)?");

170 break;	187 break;

171 }	188 }

172 regexp.push("\\\|");	189 regexp.push("\\\|");

173 break;	190 break;

174 case "/":	191 case "/":

175 if (!hostnameFinished &&	192 if (!hostnameFinished &&

176 text.charAt(i-2) == ":" && text.charAt(i-1) == "/")	193 text.charAt(i-2) == ":" && text.charAt(i-1) == "/")

177 {	194 {

178 hostnameStart = i + 1;	195 hostnameStart = i + 1;

179 canSafelyMatchAsLowercase = true;	196 canSafelyMatchAsLowercase = true;

180 }	197 }

181 regexp.push("/");	198 regexp.push("/");

182 break;	199 break;

183 case ".": case "+": case "$": case "?":	200 case ".": case "+": case "$": case "?":

184 case "{": case "}": case "(": case ")":	201 case "{": case "}": case "(": case ")":

185 case "[": case "]": case "\\":	202 case "[": case "]": case "\\":

186 regexp.push("\\", c);	203 regexp.push("\\", c);

187 break;	204 break;

188 default:	205 default:

189 if (hostnameFinished && (c >= "a" && c <= "z" \|\|	206 if (hostnameFinished && (c >= "a" && c <= "z" \|\|

190 c >= "A" && c <= "Z"))	207 c >= "A" && c <= "Z"))

191 canSafelyMatchAsLowercase = false;	208 canSafelyMatchAsLowercase = false;

192 regexp.push(c);	209 regexp.push(c);

193 }	210 }

194 }	211 }

195	212

	213 if (regexp.length == 0 \|\| regexp[0] != "^")

	214 regexp.unshift("^" + urlScheme + ".*");

	215

196 return {	216 return {

197 regexp: regexp.join(""),	217 regexp: regexp.join(""),

198 canSafelyMatchAsLowercase: canSafelyMatchAsLowercase,	218 canSafelyMatchAsLowercase: canSafelyMatchAsLowercase,

199 hostname: hostname,	219 hostname: hostname,

200 justHostname: justHostname	220 justHostname: justHostname

201 };	221 };

202 }	222 }

203	223

204 function getResourceTypes(filter)	224 function getResourceTypes(contentType)

205 {	225 {

206 let types = [];	226 let types = [];

207	227

208 if (filter.contentType & typeMap.IMAGE)	228 if (contentType & typeMap.IMAGE)

209 types.push("image");	229 types.push("image");

210 if (filter.contentType & typeMap.STYLESHEET)	230 if (contentType & typeMap.STYLESHEET)

211 types.push("style-sheet");	231 types.push("style-sheet");

212 if (filter.contentType & typeMap.SCRIPT)	232 if (contentType & typeMap.SCRIPT)

213 types.push("script");	233 types.push("script");

214 if (filter.contentType & typeMap.FONT)	234 if (contentType & typeMap.FONT)

215 types.push("font");	235 types.push("font");

216 if (filter.contentType & (typeMap.MEDIA \| typeMap.OBJECT))	236 if (contentType & (typeMap.MEDIA \| typeMap.OBJECT))

217 types.push("media");	237 types.push("media");

218 if (filter.contentType & typeMap.POPUP)	238 if (contentType & typeMap.POPUP)

219 types.push("popup");	239 types.push("popup");

220 if (filter.contentType & (typeMap.XMLHTTPREQUEST \|	240 if (contentType & (typeMap.XMLHTTPREQUEST \|

	241 typeMap.WEBSOCKET \|

	242 typeMap.WEBRTC \|

221 typeMap.OBJECT_SUBREQUEST \|	243 typeMap.OBJECT_SUBREQUEST \|

222 typeMap.PING \|	244 typeMap.PING \|

223 typeMap.OTHER))	245 typeMap.OTHER))

	246 {

224 types.push("raw");	247 types.push("raw");

225 if (filter.contentType & typeMap.SUBDOCUMENT)	248 }

	249 if (contentType & typeMap.SUBDOCUMENT)

226 types.push("document");	250 types.push("document");

227	251

228 return types;	252 return types;

229 }	253 }

230	254

231 function convertFilterAddRules(rules, filter, action, withResourceTypes,	255 function convertFilterAddRules(rules, filter, action, withResourceTypes,

232 exceptionDomains)	256 exceptionDomains)

233 {	257 {

234 let parsed = parseFilterRegexpSource(filter.regexpSource);	258 let contentType = filter.contentType;

	259

	260 // Support WebSocket and WebRTC only if they're the only option. If we try to

	261 // support them otherwise (e.g. $xmlhttprequest,websocket,webrtc), we end up

	262 // having to generate multiple rules, which bloats the rule set and is not

	263 // really necessary in practice.

	264 if ((contentType & typeMap.WEBSOCKET && contentType != typeMap.WEBSOCKET) \|\|

	265 (contentType & typeMap.WEBRTC && contentType != typeMap.WEBRTC))

	266 {

	267 contentType &= ~(typeMap.WEBSOCKET \| typeMap.WEBRTC);

	268 }

	269

	270 let urlSchemes = getURLSchemes(contentType);

	271 let parsed = parseFilterRegexpSource(filter.regexpSource, urlSchemes[0]);

235	272

236 // For the special case of $document whitelisting filters with just a domain	273 // For the special case of $document whitelisting filters with just a domain

237 // we can generate an equivalent blocking rule exception using if-domain.	274 // we can generate an equivalent blocking rule exception using if-domain.

238 if (filter instanceof filterClasses.WhitelistFilter &&	275 if (filter instanceof filterClasses.WhitelistFilter &&

239 filter.contentType & typeMap.DOCUMENT &&	276 contentType & typeMap.DOCUMENT &&

240 parsed.justHostname)	277 parsed.justHostname)

241 {	278 {

242 rules.push({	279 rules.push({

243 trigger: {	280 trigger: {

244 "url-filter": ".*",	281 "url-filter": ".*",

245 "if-domain": ["*" + parsed.hostname]	282 "if-domain": ["*" + parsed.hostname]

246 },	283 },

247 action: {type: "ignore-previous-rules"}	284 action: {type: "ignore-previous-rules"}

248 });	285 });

249 // If the filter contains other supported options we'll need to generate	286 // If the filter contains other supported options we'll need to generate

250 // further rules for it, but if not we can simply return now.	287 // further rules for it, but if not we can simply return now.

251 if (!(filter.contentType & whitelistableRequestTypes))	288 if (!(contentType & whitelistableRequestTypes))

252 return;	289 return;

253 }	290 }

254	291

255 let trigger = {"url-filter": parsed.regexp};	292 let trigger = {"url-filter": parsed.regexp};

256	293

257 // Limit rules to HTTP(S) URLs
Manish Jethani 2017/05/31 02:51:43 This has been moved into parseFilterRegexpSource. This has been moved into parseFilterRegexpSource.
258 if (!/^(\^\|http)/i.test(trigger["url-filter"]))

259 trigger["url-filter"] = "^https?://.*" + trigger["url-filter"];

260

261 // For rules containing only a hostname we know that we're matching against	294 // For rules containing only a hostname we know that we're matching against

262 // a lowercase string unless the matchCase option was passed.	295 // a lowercase string unless the matchCase option was passed.

263 if (parsed.canSafelyMatchAsLowercase && !filter.matchCase)	296 if (parsed.canSafelyMatchAsLowercase && !filter.matchCase)

264 trigger["url-filter"] = trigger["url-filter"].toLowerCase();	297 trigger["url-filter"] = trigger["url-filter"].toLowerCase();

265	298

266 if (parsed.canSafelyMatchAsLowercase \|\| filter.matchCase)	299 if (parsed.canSafelyMatchAsLowercase \|\| filter.matchCase)

267 trigger["url-filter-is-case-sensitive"] = true;	300 trigger["url-filter-is-case-sensitive"] = true;

268	301

269 let included = [];	302 let included = [];

270 let excluded = [];	303 let excluded = [];

271	304

272 parseDomains(filter.domains, included, excluded);	305 parseDomains(filter.domains, included, excluded);

273	306

274 if (exceptionDomains)	307 if (exceptionDomains)

275 excluded = excluded.concat(exceptionDomains);	308 excluded = excluded.concat(exceptionDomains);

276	309

277 if (withResourceTypes)	310 if (withResourceTypes)

278 {	311 {

279 trigger["resource-type"] = getResourceTypes(filter);	312 trigger["resource-type"] = getResourceTypes(contentType);

280	313

281 if (trigger["resource-type"].length == 0)	314 if (trigger["resource-type"].length == 0)

282 return;	315 return;

283 }	316 }

284	317

285 if (filter.thirdParty != null)	318 if (filter.thirdParty != null)

286 trigger["load-type"] = [filter.thirdParty ? "third-party" : "first-party"];	319 trigger["load-type"] = [filter.thirdParty ? "third-party" : "first-party"];

287	320

288 if (included.length > 0)	321 if (included.length > 0)

289 {	322 {

(...skipping 19 matching lines...) Expand all Loading...
309 trigger["if-domain"].push("*" + name);	342 trigger["if-domain"].push("*" + name);

310 }	343 }

311 }	344 }

312 }	345 }

313 else if (excluded.length > 0)	346 else if (excluded.length > 0)

314 {	347 {

315 trigger["unless-domain"] = excluded.map(name => "*" + name);	348 trigger["unless-domain"] = excluded.map(name => "*" + name);

316 }	349 }

317	350

318 rules.push({trigger: trigger, action: {type: action}});	351 rules.push({trigger: trigger, action: {type: action}});

	352

	353 // Generate additional rules for any alternative URL schemes.

	354 if (urlSchemes.length > 1 &&

	355 trigger["url-filter"].startsWith("^" + urlSchemes[0]))

	356 {

	357 // Always make a deep copy of the rule, since rules may have to be

	358 // manipulated individually at a later stage.

	359 let stringifiedTrigger = JSON.stringify(trigger);

	360

	361 for (let i = 1; i < urlSchemes.length; i++)

	362 {

	363 let altTrigger = Object.assign(JSON.parse(stringifiedTrigger), {

	364 "url-filter": "^" + urlSchemes[i] +

	365 trigger["url-filter"].substring(urlSchemes[0].length + 1)

	366 });

	367 rules.push({trigger: altTrigger, action: {type: action}});

	368 }

	369 }

319 }	370 }

320	371

321 function hasNonASCI(obj)	372 function hasNonASCI(obj)

322 {	373 {

323 if (typeof obj == "string")	374 if (typeof obj == "string")

324 {	375 {

325 if (/[^\x00-\x7F]/.test(obj))	376 if (/[^\x00-\x7F]/.test(obj))

326 return true;	377 return true;

327 }	378 }

328	379

(...skipping 203 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
532 {	583 {

533 convertFilterAddRules(rules, filter, "block", true,	584 convertFilterAddRules(rules, filter, "block", true,

534 requestFilterExceptionDomains);	585 requestFilterExceptionDomains);

535 }	586 }

536	587

537 for (let filter of this.requestExceptions)	588 for (let filter of this.requestExceptions)

538 convertFilterAddRules(rules, filter, "ignore-previous-rules", true);	589 convertFilterAddRules(rules, filter, "ignore-previous-rules", true);

539	590

540 return rules.filter(rule => !hasNonASCI(rule));	591 return rules.filter(rule => !hasNonASCI(rule));

541 };	592 };

OLD	NEW

« no previous file with comments | « no previous file | node_modules/filterClasses.js » ('j') | node_modules/filterClasses.js » ('J')