Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Side by Side Diff: lib/contentBlockerList.js

Issue 29336753: Issue 3671 - Split out contentBlockerList API (Closed)
Patch Set: Reworked API into ContentBlockerList class and addressed other feedback Created Feb. 22, 2016, 12:21 p.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View unified diff | Download patch
« no previous file with comments | « adblockplus.js ('k') | node_modules/filterClasses.js » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 /*
2 * This file is part of Adblock Plus <https://adblockplus.org/>,
3 * Copyright (C) 2006-2016 Eyeo GmbH
4 *
5 * Adblock Plus is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 3 as
7 * published by the Free Software Foundation.
8 *
9 * Adblock Plus is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.
16 */
17
18 /** @module contentBlockerList */
19
20 "use strict";
21
22 let filterClasses = require("filterClasses");
23 let getBaseDomain = require("urlHelpers").getBaseDomain;
24 let punycode = require("punycode");
25
26 const selectorLimit = 5000;
27 const typeMap = filterClasses.RegExpFilter.typeMap;
28
29 function parseDomains(domains, included, excluded)
30 {
31 for (let domain in domains)
32 {
33 if (domain != "")
34 {
35 let enabled = domains[domain];
36 domain = punycode.toASCII(domain.toLowerCase());
37
38 if (!enabled)
39 excluded.push(domain);
40 else if (!domains[""])
41 included.push(domain);
42 }
43 }
44 }
45
46 function escapeRegExp(s)
47 {
48 return s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
49 }
50
51 function matchDomain(domain)
52 {
53 return "^https?://([^/:]*\\.)?" + escapeRegExp(domain) + "[/:]";
54 }
55
56 function convertElemHideFilter(filter, elemhideSelectorExceptions)
57 {
58 let included = [];
59 let excluded = [];
60 let rules = [];
61
62 parseDomains(filter.domains, included, excluded);
63
64 if (excluded.length == 0 && !(filter.selector in elemhideSelectorExceptions))
65 return {matchDomains: included.map(matchDomain), selector: filter.selector};
66 }
67
68 function toRegExp(text)
69 {
70 let result = [];
71 let lastIndex = text.length - 1;
72
73 for (let i = 0; i < text.length; i++)
74 {
75 let c = text[i];
76
77 switch (c)
78 {
79 case "*":
80 if (result.length > 0 && i < lastIndex && text[i + 1] != "*")
81 result.push(".*");
82 break;
83 case "^":
84 if (i < lastIndex)
85 result.push(".");
86 break;
87 case "|":
88 if (i == 0)
89 {
90 result.push("^");
91 break;
92 }
93 if (i == lastIndex)
94 {
95 result.push("$");
96 break;
97 }
98 if (i == 1 && text[0] == "|")
99 {
100 result.push("https?://");
101 break;
102 }
103 case ".": case "+": case "?": case "$":
104 case "{": case "}": case "(": case ")":
105 case "[": case "]": case "\\":
106 result.push("\\");
Sebastian Noack 2016/02/22 17:35:28 Nit: Perhaps we should change the code here to:
Sebastian Noack 2016/02/22 17:37:31 s/missing drop-through/missing break/
kzar 2016/02/22 18:09:29 Done.
107 default:
108 result.push(c);
109 }
110 }
111
112 return result.join("");
113 }
114
115 function getRegExpSource(filter)
116 {
117 let source = toRegExp(filter.regexpSource.replace(
118 // Safari expects punycode, filter lists use unicode
119 /^(\|\||\|?https?:\/\/)([\w\-.*\u0080-\uFFFF]+)/i,
120 function (match, prefix, domain)
121 {
122 return prefix + punycode.toASCII(domain);
123 }
124 ));
125
126 // Limit rules to to HTTP(S) URLs
127 if (!/^(\^|http)/i.test(source))
128 source = "^https?://.*" + source;
129
130 return source;
131 }
132
133 function getResourceTypes(filter)
134 {
135 let types = [];
136
137 if (filter.contentType & typeMap.IMAGE)
138 types.push("image");
139 if (filter.contentType & typeMap.STYLESHEET)
140 types.push("style-sheet");
141 if (filter.contentType & typeMap.SCRIPT)
142 types.push("script");
143 if (filter.contentType & typeMap.FONT)
144 types.push("font");
145 if (filter.contentType & (typeMap.MEDIA | typeMap.OBJECT))
146 types.push("media");
147 if (filter.contentType & typeMap.POPUP)
148 types.push("popup");
149 if (filter.contentType & (typeMap.XMLHTTPREQUEST |
150 typeMap.OBJECT_SUBREQUEST |
151 typeMap.PING | typeMap.OTHER))
Sebastian Noack 2016/02/22 17:35:28 Nit: I think it reads slightly better, and looks m
kzar 2016/02/22 18:09:29 Done.
152 types.push("raw");
153 if (filter.contentType & typeMap.SUBDOCUMENT)
154 types.push("document");
155
156 return types;
157 }
158
159 function addDomainPrefix(domains)
160 {
161 let result = [];
162
163 for (let domain of domains)
164 {
165 result.push(domain);
166
167 if (getBaseDomain(domain) == domain)
168 result.push("www." + domain);
169 }
170
171 return result;
172 }
173
174 function convertFilter(filter, action, withResourceTypes)
175 {
176 let trigger = {"url-filter": getRegExpSource(filter)};
177 let included = [];
178 let excluded = [];
179
180 parseDomains(filter.domains, included, excluded);
181
182 if (withResourceTypes)
183 trigger["resource-type"] = getResourceTypes(filter);
184 if (filter.thirdParty != null)
185 trigger["load-type"] = [filter.thirdParty ? "third-party" : "first-party"];
186
187 if (included.length > 0)
188 trigger["if-domain"] = addDomainPrefix(included);
189 else if (excluded.length > 0)
190 trigger["unless-domain"] = addDomainPrefix(excluded);
191
192 return {trigger: trigger, action: {type: action}};
193 }
194
195 function hasNonASCI(obj)
196 {
197 if (typeof obj == "string")
198 {
199 if (/[^\x00-\x7F]/.test(obj))
200 return true;
201 }
202
203 if (typeof obj == "object")
204 {
205 if (obj instanceof Array)
206 for (let item of obj)
207 if (hasNonASCI(item))
208 return true;
209
210 let names = Object.getOwnPropertyNames(obj);
211 for (let name of names)
212 if (hasNonASCI(obj[name]))
213 return true;
214 }
215
216 return false;
217 }
218
219 function convertIDSelectorsToAttributeSelectors(selector)
220 {
221 // First we figure out where all the IDs are
222 let sep = "";
223 let start = null;
224 let positions = [];
225 for (let i = 0; i < selector.length; i++)
226 {
227 let chr = selector[i];
228
229 if (chr == "\\") // ignore escaped characters
230 i++;
231 else if (chr == sep) // don't match IDs within quoted text
232 sep = ""; // e.g. [attr="#Hello"]
233 else if (sep == "")
234 {
235 if (chr == '"' || chr == "'")
236 sep = chr;
237 else if (start == null) // look for the start of an ID
238 {
239 if (chr == "#")
240 start = i;
241 }
242 else if (chr != "-" && chr != "_" &&
243 (chr < "0" ||
244 chr > "9" && chr < "A" ||
245 chr > "Z" && chr < "a" ||
246 chr > "z" && chr < "\x80")) // look for the end of the ID
247 {
248 positions.push({start: start, end: i});
249 start = null;
250 }
251 }
252 }
253 if (start != null)
254 positions.push({start: start, end: selector.length});
255
256 // Now replace them all with the [id="someID"] form
257 let newSelector = [];
258 let i = 0;
259 for (let pos of positions)
260 {
261 newSelector.push(selector.substring(i, pos.start));
262 newSelector.push('[id=', selector.substring(pos.start + 1, pos.end), ']');
263 i = pos.end;
264 }
265 newSelector.push(selector.substring(i));
266
267 return newSelector.join("");
268 }
269
270 let ContentBlockerList =
271 /**
272 * Create a new Adblock Plus filter to content blocker list converter
273 *
274 * @constructor
275 */
276 exports.ContentBlockerList = function ()
277 {
278 this.requestFilters = [];
279 this.requestExceptions = [];
280 this.elemhideFilters = [];
281 this.elemhideExceptions = [];
282 this.elemhideSelectorExceptions = new Map();
283 };
284
285 ContentBlockerList.prototype = {};
Sebastian Noack 2016/02/22 17:35:28 There is no need to set the prototype to an empty
kzar 2016/02/22 18:09:29 Done.
286
287 /**
288 * Add Adblock Plus filter to be converted
289 *
290 * @param {Filter} filter Filter to convert
291 */
292 ContentBlockerList.prototype.addFilter = function(filter)
293 {
294 if (filter.sitekeys)
295 return;
296 if (filter instanceof filterClasses.RegExpFilter && !filter.regexpSource)
297 return;
298
299 if (filter instanceof filterClasses.BlockingFilter)
300 this.requestFilters.push(filter);
301
302 if (filter instanceof filterClasses.WhitelistFilter)
303 {
304 if (filter.contentType & (typeMap.IMAGE
305 | typeMap.STYLESHEET
306 | typeMap.SCRIPT
307 | typeMap.FONT
308 | typeMap.MEDIA
309 | typeMap.POPUP
310 | typeMap.OBJECT
311 | typeMap.OBJECT_SUBREQUEST
312 | typeMap.XMLHTTPREQUEST
313 | typeMap.PING
314 | typeMap.SUBDOCUMENT
315 | typeMap.OTHER))
316 this.requestExceptions.push(filter);
317
318 if (filter.contentType & typeMap.ELEMHIDE)
319 this.elemhideExceptions.push(filter);
320 }
321
322 if (filter instanceof filterClasses.ElemHideFilter)
323 this.elemhideFilters.push(filter);
324
325 if (filter instanceof filterClasses.ElemHideException)
326 {
327 let domains = this.elemhideSelectorExceptions[filter.selector];
328 if (!domains)
329 domains = this.elemhideSelectorExceptions[filter.selector] = [];
330
331 parseDomains(filter.domains, domains, []);
332 }
333 };
334
335 /**
336 * Generate content blocker list for all filters that were added
337 *
338 * @returns {Filter} filter Filter to convert
339 */
340 ContentBlockerList.prototype.generateRules = function(filter)
341 {
342 let rules = [];
343
344 function addRule(rule)
345 {
346 if (!hasNonASCI(rule))
347 rules.push(rule);
348 }
349
350 let groupedElemhideFilters = new Map();
351 for (let filter of this.elemhideFilters)
352 {
353 let result = convertElemHideFilter(filter, this.elemhideSelectorExceptions);
354 if (!result)
355 continue;
356
357 if (result.matchDomains.length == 0)
358 result.matchDomains = ["^https?://"];
359
360 for (let matchDomain of result.matchDomains)
361 {
362 let group = groupedElemhideFilters.get(matchDomain) || [];
363 group.push(result.selector);
364 groupedElemhideFilters.set(matchDomain, group);
365 }
366 }
367
368 groupedElemhideFilters.forEach((selectors, matchDomain) =>
369 {
370 while (selectors.length)
371 {
372 let selector = selectors.splice(0, selectorLimit).join(", ");
373
374 // As of Safari 9.0 element IDs are matched as lowercase. We work around
375 // this by converting to the attribute format [id="elementID"]
376 selector = convertIDSelectorsToAttributeSelectors(selector);
377
378 addRule({
379 trigger: {"url-filter": matchDomain},
380 action: {type: "css-display-none",
381 selector: selector}
382 });
383 }
384 });
385
386 for (let filter of this.elemhideExceptions)
387 addRule(convertFilter(filter, "ignore-previous-rules", false));
388 for (let filter of this.requestFilters)
389 addRule(convertFilter(filter, "block", true));
390 for (let filter of this.requestExceptions)
391 addRule(convertFilter(filter, "ignore-previous-rules", true));
392
393 return rules;
394 };
OLDNEW
« no previous file with comments | « adblockplus.js ('k') | node_modules/filterClasses.js » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld