Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Side by Side Diff: lib/abp2blocklist.js

Issue 29452289: Issue 5283 - Add support for $websocket and $webrtc (Closed) Base URL: https://hg.adblockplus.org/abp2blocklist
Patch Set: Created May 31, 2017, 2:42 a.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | node_modules/filterClasses.js » ('j') | node_modules/filterClasses.js » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * This file is part of Adblock Plus <https://adblockplus.org/>, 2 * This file is part of Adblock Plus <https://adblockplus.org/>,
3 * Copyright (C) 2006-2017 eyeo GmbH 3 * Copyright (C) 2006-2017 eyeo GmbH
4 * 4 *
5 * Adblock Plus is free software: you can redistribute it and/or modify 5 * Adblock Plus is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 3 as 6 * it under the terms of the GNU General Public License version 3 as
7 * published by the Free Software Foundation. 7 * published by the Free Software Foundation.
8 * 8 *
9 * Adblock Plus is distributed in the hope that it will be useful, 9 * Adblock Plus is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
(...skipping 15 matching lines...) Expand all
26 const typeMap = filterClasses.RegExpFilter.typeMap; 26 const typeMap = filterClasses.RegExpFilter.typeMap;
27 const whitelistableRequestTypes = (typeMap.IMAGE 27 const whitelistableRequestTypes = (typeMap.IMAGE
28 | typeMap.STYLESHEET 28 | typeMap.STYLESHEET
29 | typeMap.SCRIPT 29 | typeMap.SCRIPT
30 | typeMap.FONT 30 | typeMap.FONT
31 | typeMap.MEDIA 31 | typeMap.MEDIA
32 | typeMap.POPUP 32 | typeMap.POPUP
33 | typeMap.OBJECT 33 | typeMap.OBJECT
34 | typeMap.OBJECT_SUBREQUEST 34 | typeMap.OBJECT_SUBREQUEST
35 | typeMap.XMLHTTPREQUEST 35 | typeMap.XMLHTTPREQUEST
36 | typeMap.WEBSOCKET
37 | typeMap.WEBRTC
36 | typeMap.PING 38 | typeMap.PING
37 | typeMap.SUBDOCUMENT 39 | typeMap.SUBDOCUMENT
38 | typeMap.OTHER); 40 | typeMap.OTHER);
39 41
40 function parseDomains(domains, included, excluded) 42 function parseDomains(domains, included, excluded)
41 { 43 {
42 for (let domain in domains) 44 for (let domain in domains)
43 { 45 {
44 if (domain != "") 46 if (domain != "")
45 { 47 {
(...skipping 11 matching lines...) Expand all
57 function escapeRegExp(s) 59 function escapeRegExp(s)
58 { 60 {
59 return s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); 61 return s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
60 } 62 }
61 63
62 function matchDomain(domain) 64 function matchDomain(domain)
63 { 65 {
64 return "^https?://([^/:]*\\.)?" + escapeRegExp(domain).toLowerCase() + "[/:]"; 66 return "^https?://([^/:]*\\.)?" + escapeRegExp(domain).toLowerCase() + "[/:]";
65 } 67 }
66 68
69 function getURLSchemes(contentType)
70 {
71 if (contentType == typeMap.WEBSOCKET)
72 return ["wss?://"];
73
74 if (contentType == typeMap.WEBRTC)
75 return ["stuns?:", "turns?:"];
76
77 return ["https?://"];
78 }
79
67 function findSubdomainsInList(domain, list) 80 function findSubdomainsInList(domain, list)
68 { 81 {
69 let subdomains = []; 82 let subdomains = [];
70 let suffixLength = domain.length + 1; 83 let suffixLength = domain.length + 1;
71 84
72 for (let name of list) 85 for (let name of list)
73 { 86 {
74 if (name.length > suffixLength && name.slice(-suffixLength) == "." + domain) 87 if (name.length > suffixLength && name.slice(-suffixLength) == "." + domain)
75 subdomains.push(name.slice(0, -suffixLength)); 88 subdomains.push(name.slice(0, -suffixLength));
76 } 89 }
(...skipping 13 matching lines...) Expand all
90 return {matchDomains: included.map(matchDomain), selector: filter.selector}; 103 return {matchDomains: included.map(matchDomain), selector: filter.selector};
91 } 104 }
92 105
93 /** 106 /**
94 * Parse the given filter "regexpSource" string. Producing a regular expression, 107 * Parse the given filter "regexpSource" string. Producing a regular expression,
95 * extracting the hostname (if any), deciding if the regular expression is safe 108 * extracting the hostname (if any), deciding if the regular expression is safe
96 * to be converted + matched as lower case and noting if the source contains 109 * to be converted + matched as lower case and noting if the source contains
97 * anything after the hostname.) 110 * anything after the hostname.)
98 * 111 *
99 * @param {string} text regexpSource property of a filter 112 * @param {string} text regexpSource property of a filter
113 * @param {string} urlScheme The URL scheme to use in the regular expression
100 * @returns {object} An object containing a regular expression string, a bool 114 * @returns {object} An object containing a regular expression string, a bool
101 * indicating if the filter can be safely matched as lower 115 * indicating if the filter can be safely matched as lower
102 * case, a hostname string (or undefined) and a bool 116 * case, a hostname string (or undefined) and a bool
103 * indicating if the source only contains a hostname or not: 117 * indicating if the source only contains a hostname or not:
104 * {regexp: "...", 118 * {regexp: "...",
105 * canSafelyMatchAsLowercase: true/false, 119 * canSafelyMatchAsLowercase: true/false,
106 * hostname: "...", 120 * hostname: "...",
107 * justHostname: true/false} 121 * justHostname: true/false}
108 */ 122 */
109 function parseFilterRegexpSource(text) 123 function parseFilterRegexpSource(text, urlScheme)
110 { 124 {
111 let regexp = []; 125 let regexp = [];
112 let lastIndex = text.length - 1; 126 let lastIndex = text.length - 1;
113 let hostname; 127 let hostname;
114 let hostnameStart = null; 128 let hostnameStart = null;
115 let hostnameFinished = false; 129 let hostnameFinished = false;
116 let justHostname = false; 130 let justHostname = false;
117 let canSafelyMatchAsLowercase = false; 131 let canSafelyMatchAsLowercase = false;
118 132
133 if (!urlScheme)
134 urlScheme = getURLSchemes()[0];
135
119 for (let i = 0; i < text.length; i++) 136 for (let i = 0; i < text.length; i++)
120 { 137 {
121 let c = text[i]; 138 let c = text[i];
122 139
123 if (hostnameFinished) 140 if (hostnameFinished)
124 justHostname = false; 141 justHostname = false;
125 142
126 // If we're currently inside the hostname we have to be careful not to 143 // If we're currently inside the hostname we have to be careful not to
127 // escape any characters until after we have converted it to punycode. 144 // escape any characters until after we have converted it to punycode.
128 if (hostnameStart != null && !hostnameFinished) 145 if (hostnameStart != null && !hostnameFinished)
(...skipping 30 matching lines...) Expand all
159 } 176 }
160 if (i == lastIndex) 177 if (i == lastIndex)
161 { 178 {
162 regexp.push("$"); 179 regexp.push("$");
163 break; 180 break;
164 } 181 }
165 if (i == 1 && text[0] == "|") 182 if (i == 1 && text[0] == "|")
166 { 183 {
167 hostnameStart = i + 1; 184 hostnameStart = i + 1;
168 canSafelyMatchAsLowercase = true; 185 canSafelyMatchAsLowercase = true;
169 regexp.push("https?://([^/]+\\.)?"); 186 regexp.push(urlScheme + "([^/]+\\.)?");
170 break; 187 break;
171 } 188 }
172 regexp.push("\\|"); 189 regexp.push("\\|");
173 break; 190 break;
174 case "/": 191 case "/":
175 if (!hostnameFinished && 192 if (!hostnameFinished &&
176 text.charAt(i-2) == ":" && text.charAt(i-1) == "/") 193 text.charAt(i-2) == ":" && text.charAt(i-1) == "/")
177 { 194 {
178 hostnameStart = i + 1; 195 hostnameStart = i + 1;
179 canSafelyMatchAsLowercase = true; 196 canSafelyMatchAsLowercase = true;
180 } 197 }
181 regexp.push("/"); 198 regexp.push("/");
182 break; 199 break;
183 case ".": case "+": case "$": case "?": 200 case ".": case "+": case "$": case "?":
184 case "{": case "}": case "(": case ")": 201 case "{": case "}": case "(": case ")":
185 case "[": case "]": case "\\": 202 case "[": case "]": case "\\":
186 regexp.push("\\", c); 203 regexp.push("\\", c);
187 break; 204 break;
188 default: 205 default:
189 if (hostnameFinished && (c >= "a" && c <= "z" || 206 if (hostnameFinished && (c >= "a" && c <= "z" ||
190 c >= "A" && c <= "Z")) 207 c >= "A" && c <= "Z"))
191 canSafelyMatchAsLowercase = false; 208 canSafelyMatchAsLowercase = false;
192 regexp.push(c); 209 regexp.push(c);
193 } 210 }
194 } 211 }
195 212
213 if (regexp.length == 0 || regexp[0] != "^")
214 regexp.unshift("^" + urlScheme + ".*");
215
196 return { 216 return {
197 regexp: regexp.join(""), 217 regexp: regexp.join(""),
198 canSafelyMatchAsLowercase: canSafelyMatchAsLowercase, 218 canSafelyMatchAsLowercase: canSafelyMatchAsLowercase,
199 hostname: hostname, 219 hostname: hostname,
200 justHostname: justHostname 220 justHostname: justHostname
201 }; 221 };
202 } 222 }
203 223
204 function getResourceTypes(filter) 224 function getResourceTypes(contentType)
205 { 225 {
206 let types = []; 226 let types = [];
207 227
208 if (filter.contentType & typeMap.IMAGE) 228 if (contentType & typeMap.IMAGE)
209 types.push("image"); 229 types.push("image");
210 if (filter.contentType & typeMap.STYLESHEET) 230 if (contentType & typeMap.STYLESHEET)
211 types.push("style-sheet"); 231 types.push("style-sheet");
212 if (filter.contentType & typeMap.SCRIPT) 232 if (contentType & typeMap.SCRIPT)
213 types.push("script"); 233 types.push("script");
214 if (filter.contentType & typeMap.FONT) 234 if (contentType & typeMap.FONT)
215 types.push("font"); 235 types.push("font");
216 if (filter.contentType & (typeMap.MEDIA | typeMap.OBJECT)) 236 if (contentType & (typeMap.MEDIA | typeMap.OBJECT))
217 types.push("media"); 237 types.push("media");
218 if (filter.contentType & typeMap.POPUP) 238 if (contentType & typeMap.POPUP)
219 types.push("popup"); 239 types.push("popup");
220 if (filter.contentType & (typeMap.XMLHTTPREQUEST | 240 if (contentType & (typeMap.XMLHTTPREQUEST |
241 typeMap.WEBSOCKET |
242 typeMap.WEBRTC |
221 typeMap.OBJECT_SUBREQUEST | 243 typeMap.OBJECT_SUBREQUEST |
222 typeMap.PING | 244 typeMap.PING |
223 typeMap.OTHER)) 245 typeMap.OTHER))
246 {
224 types.push("raw"); 247 types.push("raw");
225 if (filter.contentType & typeMap.SUBDOCUMENT) 248 }
249 if (contentType & typeMap.SUBDOCUMENT)
226 types.push("document"); 250 types.push("document");
227 251
228 return types; 252 return types;
229 } 253 }
230 254
231 function convertFilterAddRules(rules, filter, action, withResourceTypes, 255 function convertFilterAddRules(rules, filter, action, withResourceTypes,
232 exceptionDomains) 256 exceptionDomains)
233 { 257 {
234 let parsed = parseFilterRegexpSource(filter.regexpSource); 258 let contentType = filter.contentType;
259
260 // Support WebSocket and WebRTC only if they're the only option. If we try to
261 // support them otherwise (e.g. $xmlhttprequest,websocket,webrtc), we end up
262 // having to generate multiple rules, which bloats the rule set and is not
263 // really necessary in practice.
264 if ((contentType & typeMap.WEBSOCKET && contentType != typeMap.WEBSOCKET) ||
265 (contentType & typeMap.WEBRTC && contentType != typeMap.WEBRTC))
266 {
267 contentType &= ~(typeMap.WEBSOCKET | typeMap.WEBRTC);
268 }
269
270 let urlSchemes = getURLSchemes(contentType);
271 let parsed = parseFilterRegexpSource(filter.regexpSource, urlSchemes[0]);
235 272
236 // For the special case of $document whitelisting filters with just a domain 273 // For the special case of $document whitelisting filters with just a domain
237 // we can generate an equivalent blocking rule exception using if-domain. 274 // we can generate an equivalent blocking rule exception using if-domain.
238 if (filter instanceof filterClasses.WhitelistFilter && 275 if (filter instanceof filterClasses.WhitelistFilter &&
239 filter.contentType & typeMap.DOCUMENT && 276 contentType & typeMap.DOCUMENT &&
240 parsed.justHostname) 277 parsed.justHostname)
241 { 278 {
242 rules.push({ 279 rules.push({
243 trigger: { 280 trigger: {
244 "url-filter": ".*", 281 "url-filter": ".*",
245 "if-domain": ["*" + parsed.hostname] 282 "if-domain": ["*" + parsed.hostname]
246 }, 283 },
247 action: {type: "ignore-previous-rules"} 284 action: {type: "ignore-previous-rules"}
248 }); 285 });
249 // If the filter contains other supported options we'll need to generate 286 // If the filter contains other supported options we'll need to generate
250 // further rules for it, but if not we can simply return now. 287 // further rules for it, but if not we can simply return now.
251 if (!(filter.contentType & whitelistableRequestTypes)) 288 if (!(contentType & whitelistableRequestTypes))
252 return; 289 return;
253 } 290 }
254 291
255 let trigger = {"url-filter": parsed.regexp}; 292 let trigger = {"url-filter": parsed.regexp};
256 293
257 // Limit rules to HTTP(S) URLs
Manish Jethani 2017/05/31 02:51:43 This has been moved into parseFilterRegexpSource.
258 if (!/^(\^|http)/i.test(trigger["url-filter"]))
259 trigger["url-filter"] = "^https?://.*" + trigger["url-filter"];
260
261 // For rules containing only a hostname we know that we're matching against 294 // For rules containing only a hostname we know that we're matching against
262 // a lowercase string unless the matchCase option was passed. 295 // a lowercase string unless the matchCase option was passed.
263 if (parsed.canSafelyMatchAsLowercase && !filter.matchCase) 296 if (parsed.canSafelyMatchAsLowercase && !filter.matchCase)
264 trigger["url-filter"] = trigger["url-filter"].toLowerCase(); 297 trigger["url-filter"] = trigger["url-filter"].toLowerCase();
265 298
266 if (parsed.canSafelyMatchAsLowercase || filter.matchCase) 299 if (parsed.canSafelyMatchAsLowercase || filter.matchCase)
267 trigger["url-filter-is-case-sensitive"] = true; 300 trigger["url-filter-is-case-sensitive"] = true;
268 301
269 let included = []; 302 let included = [];
270 let excluded = []; 303 let excluded = [];
271 304
272 parseDomains(filter.domains, included, excluded); 305 parseDomains(filter.domains, included, excluded);
273 306
274 if (exceptionDomains) 307 if (exceptionDomains)
275 excluded = excluded.concat(exceptionDomains); 308 excluded = excluded.concat(exceptionDomains);
276 309
277 if (withResourceTypes) 310 if (withResourceTypes)
278 { 311 {
279 trigger["resource-type"] = getResourceTypes(filter); 312 trigger["resource-type"] = getResourceTypes(contentType);
280 313
281 if (trigger["resource-type"].length == 0) 314 if (trigger["resource-type"].length == 0)
282 return; 315 return;
283 } 316 }
284 317
285 if (filter.thirdParty != null) 318 if (filter.thirdParty != null)
286 trigger["load-type"] = [filter.thirdParty ? "third-party" : "first-party"]; 319 trigger["load-type"] = [filter.thirdParty ? "third-party" : "first-party"];
287 320
288 if (included.length > 0) 321 if (included.length > 0)
289 { 322 {
(...skipping 19 matching lines...) Expand all
309 trigger["if-domain"].push("*" + name); 342 trigger["if-domain"].push("*" + name);
310 } 343 }
311 } 344 }
312 } 345 }
313 else if (excluded.length > 0) 346 else if (excluded.length > 0)
314 { 347 {
315 trigger["unless-domain"] = excluded.map(name => "*" + name); 348 trigger["unless-domain"] = excluded.map(name => "*" + name);
316 } 349 }
317 350
318 rules.push({trigger: trigger, action: {type: action}}); 351 rules.push({trigger: trigger, action: {type: action}});
352
353 // Generate additional rules for any alternative URL schemes.
354 if (urlSchemes.length > 1 &&
355 trigger["url-filter"].startsWith("^" + urlSchemes[0]))
356 {
357 // Always make a deep copy of the rule, since rules may have to be
358 // manipulated individually at a later stage.
359 let stringifiedTrigger = JSON.stringify(trigger);
360
361 for (let i = 1; i < urlSchemes.length; i++)
362 {
363 let altTrigger = Object.assign(JSON.parse(stringifiedTrigger), {
364 "url-filter": "^" + urlSchemes[i] +
365 trigger["url-filter"].substring(urlSchemes[0].length + 1)
366 });
367 rules.push({trigger: altTrigger, action: {type: action}});
368 }
369 }
319 } 370 }
320 371
321 function hasNonASCI(obj) 372 function hasNonASCI(obj)
322 { 373 {
323 if (typeof obj == "string") 374 if (typeof obj == "string")
324 { 375 {
325 if (/[^\x00-\x7F]/.test(obj)) 376 if (/[^\x00-\x7F]/.test(obj))
326 return true; 377 return true;
327 } 378 }
328 379
(...skipping 203 matching lines...) Expand 10 before | Expand all | Expand 10 after
532 { 583 {
533 convertFilterAddRules(rules, filter, "block", true, 584 convertFilterAddRules(rules, filter, "block", true,
534 requestFilterExceptionDomains); 585 requestFilterExceptionDomains);
535 } 586 }
536 587
537 for (let filter of this.requestExceptions) 588 for (let filter of this.requestExceptions)
538 convertFilterAddRules(rules, filter, "ignore-previous-rules", true); 589 convertFilterAddRules(rules, filter, "ignore-previous-rules", true);
539 590
540 return rules.filter(rule => !hasNonASCI(rule)); 591 return rules.filter(rule => !hasNonASCI(rule));
541 }; 592 };
OLDNEW
« no previous file with comments | « no previous file | node_modules/filterClasses.js » ('j') | node_modules/filterClasses.js » ('J')

Powered by Google App Engine
This is Rietveld