Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Side by Side Diff: abp2blocklist.js

Issue 29336753: Issue 3671 - Split out contentBlockerList API (Closed)
Patch Set: Use Sebastian's regexp suggestion Created Feb. 24, 2016, 7:18 p.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View unified diff | Download patch
« no previous file with comments | « README.md ('k') | adblockplus.js » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /*
2 * This file is part of Adblock Plus <https://adblockplus.org/>,
3 * Copyright (C) 2006-2016 Eyeo GmbH
4 *
5 * Adblock Plus is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 3 as
7 * published by the Free Software Foundation.
8 *
9 * Adblock Plus is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.
16 */
17
1 "use strict"; 18 "use strict";
2 19
3 let readline = require("readline"); 20 let readline = require("readline");
4 let punycode = require("punycode"); 21 let Filter = require("filterClasses").Filter;
5 let tldjs = require("tldjs"); 22 let ContentBlockerList = require("./lib/contentBlockerList.js").ContentBlockerLi st;
6 let filterClasses = require("./adblockplus.js");
7 23
8 let typeMap = filterClasses.RegExpFilter.typeMap; 24 var rl = readline.createInterface({input: process.stdin, terminal: false});
25 var blockerList = new ContentBlockerList();
9 26
10 const selectorLimit = 5000; 27 rl.on("line", line =>
28 {
29 if (/^\s*[^\[\s]/.test(line))
30 blockerList.addFilter(Filter.fromText(line));
31 });
11 32
12 let requestFilters = []; 33 rl.on("close", () =>
13 let requestExceptions = [];
14 let elemhideFilters = [];
15 let elemhideExceptions = [];
16 let elemhideSelectorExceptions = new Map();
17
18 function recordException(filter)
19 { 34 {
20 if (filter.contentType & (typeMap.IMAGE 35 console.log(JSON.stringify(blockerList.generateRules(),
21 | typeMap.STYLESHEET 36 null, "\t"));
22 | typeMap.SCRIPT 37 });
23 | typeMap.FONT
24 | typeMap.MEDIA
25 | typeMap.POPUP
26 | typeMap.OBJECT
27 | typeMap.OBJECT_SUBREQUEST
28 | typeMap.XMLHTTPREQUEST
29 | typeMap.PING
30 | typeMap.SUBDOCUMENT
31 | typeMap.OTHER))
32 requestExceptions.push(filter);
33
34 if (filter.contentType & typeMap.ELEMHIDE)
35 elemhideExceptions.push(filter);
36 }
37
38 function parseDomains(domains, included, excluded)
39 {
40 for (let domain in domains)
41 {
42 if (domain != "")
43 {
44 let enabled = domains[domain];
45 domain = punycode.toASCII(domain.toLowerCase());
46
47 if (!enabled)
48 excluded.push(domain);
49 else if (!domains[""])
50 included.push(domain);
51 }
52 }
53 }
54
55 function recordSelectorException(filter)
56 {
57 let domains = elemhideSelectorExceptions[filter.selector];
58 if (!domains)
59 domains = elemhideSelectorExceptions[filter.selector] = [];
60
61 parseDomains(filter.domains, domains, []);
62 }
63
64 function parseFilter(line)
65 {
66 if (line.charAt(0) == "[")
67 return;
68
69 let filter = filterClasses.Filter.fromText(line);
70
71 if (filter.sitekeys)
72 return;
73 if (filter instanceof filterClasses.RegExpFilter && !filter.regexpSource)
74 return;
75
76 if (filter instanceof filterClasses.BlockingFilter)
77 requestFilters.push(filter);
78 if (filter instanceof filterClasses.WhitelistFilter)
79 recordException(filter);
80 if (filter instanceof filterClasses.ElemHideFilter)
81 elemhideFilters.push(filter);
82 if (filter instanceof filterClasses.ElemHideException)
83 recordSelectorException(filter);
84 }
85
86 function escapeRegExp(s)
87 {
88 return s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
89 }
90
91 function matchDomain(domain)
92 {
93 return "^https?://([^/:]*\\.)?" + escapeRegExp(domain) + "[/:]";
94 }
95
96 function convertElemHideFilter(filter)
97 {
98 let included = [];
99 let excluded = [];
100 let rules = [];
101
102 parseDomains(filter.domains, included, excluded);
103
104 if (excluded.length == 0 && !(filter.selector in elemhideSelectorExceptions))
105 return {matchDomains: included.map(matchDomain), selector: filter.selector};
106 }
107
108 function toRegExp(text)
109 {
110 let result = "";
111 let lastIndex = text.length - 1;
112
113 for (let i = 0; i < text.length; i++)
114 {
115 let c = text[i];
116
117 switch (c)
118 {
119 case "*":
120 if (result.length > 0 && i < lastIndex && text[i + 1] != "*")
121 result += ".*";
122 break;
123 case "^":
124 if (i < lastIndex)
125 result += ".";
126 break;
127 case "|":
128 if (i == 0)
129 {
130 result += "^";
131 break;
132 }
133 if (i == lastIndex)
134 {
135 result += "$";
136 break;
137 }
138 if (i == 1 && text[0] == "|")
139 {
140 result += "https?://";
141 break;
142 }
143 case ".": case "+": case "?": case "$":
144 case "{": case "}": case "(": case ")":
145 case "[": case "]": case "\\":
146 result += "\\";
147 default:
148 result += c;
149 }
150 }
151
152 return result;
153 }
154
155 function getRegExpSource(filter)
156 {
157 let source = toRegExp(filter.regexpSource.replace(
158 // Safari expects punycode, filter lists use unicode
159 /^(\|\||\|?https?:\/\/)([\w\-.*\u0080-\uFFFF]+)/i,
160 function (match, prefix, domain)
161 {
162 return prefix + punycode.toASCII(domain);
163 }
164 ));
165
166 // Limit rules to to HTTP(S) URLs
167 if (!/^(\^|http)/i.test(source))
168 source = "^https?://.*" + source;
169
170 return source;
171 }
172
173 function getResourceTypes(filter)
174 {
175 let types = [];
176
177 if (filter.contentType & typeMap.IMAGE)
178 types.push("image");
179 if (filter.contentType & typeMap.STYLESHEET)
180 types.push("style-sheet");
181 if (filter.contentType & typeMap.SCRIPT)
182 types.push("script");
183 if (filter.contentType & typeMap.FONT)
184 types.push("font");
185 if (filter.contentType & (typeMap.MEDIA | typeMap.OBJECT))
186 types.push("media");
187 if (filter.contentType & typeMap.POPUP)
188 types.push("popup");
189 if (filter.contentType & (typeMap.XMLHTTPREQUEST | typeMap.OBJECT_SUBREQUEST
190 | typeMap.PING | typeMap.OTHER))
191 types.push("raw");
192 if (filter.contentType & typeMap.SUBDOCUMENT)
193 types.push("document");
194
195 return types;
196 }
197
198 function addDomainPrefix(domains)
199 {
200 let result = [];
201
202 for (let domain of domains)
203 {
204 result.push(domain);
205
206 if (tldjs.getSubdomain(domain) == "")
207 result.push("www." + domain);
208 }
209
210 return result;
211 }
212
213 function convertFilter(filter, action, withResourceTypes)
214 {
215 let trigger = {"url-filter": getRegExpSource(filter)};
216 let included = [];
217 let excluded = [];
218
219 parseDomains(filter.domains, included, excluded);
220
221 if (withResourceTypes)
222 trigger["resource-type"] = getResourceTypes(filter);
223 if (filter.thirdParty != null)
224 trigger["load-type"] = [filter.thirdParty ? "third-party" : "first-party"];
225
226 if (included.length > 0)
227 trigger["if-domain"] = addDomainPrefix(included);
228 else if (excluded.length > 0)
229 trigger["unless-domain"] = addDomainPrefix(excluded);
230
231 return {trigger: trigger, action: {type: action}};
232 }
233
234 function hasNonASCI(obj)
235 {
236 if (typeof obj == "string")
237 {
238 if (/[^\x00-\x7F]/.test(obj))
239 return true;
240 }
241
242 if (typeof obj == "object")
243 {
244 if (obj instanceof Array)
245 for (let item of obj)
246 if (hasNonASCI(item))
247 return true;
248
249 for (let name of Object.getOwnPropertyNames(obj))
250 if (hasNonASCI(obj[name]))
251 return true;
252 }
253
254 return false;
255 }
256
257 function convertIDSelectorsToAttributeSelectors(selector)
258 {
259 // First we figure out where all the IDs are
260 let sep = "";
261 let start = null;
262 let positions = [];
263 for (let i = 0; i < selector.length; i++)
264 {
265 let chr = selector[i];
266
267 if (chr == "\\") // ignore escaped characters
268 i++;
269 else if (chr == sep) // don't match IDs within quoted text
270 sep = ""; // e.g. [attr="#Hello"]
271 else if (sep == "")
272 {
273 if (chr == '"' || chr == "'")
274 sep = chr;
275 else if (start == null) // look for the start of an ID
276 {
277 if (chr == "#")
278 start = i;
279 }
280 else if (chr != "-" && chr != "_" &&
281 (chr < "0" ||
282 chr > "9" && chr < "A" ||
283 chr > "Z" && chr < "a" ||
284 chr > "z" && chr < "\x80")) // look for the end of the ID
285 {
286 positions.push({start: start, end: i});
287 start = null;
288 }
289 }
290 }
291 if (start != null)
292 positions.push({start: start, end: selector.length});
293
294 // Now replace them all with the [id="someID"] form
295 let newSelector = [];
296 let i = 0;
297 for (let pos of positions)
298 {
299 newSelector.push(selector.substring(i, pos.start));
300 newSelector.push('[id=' + selector.substring(pos.start + 1, pos.end) + ']');
301 i = pos.end;
302 }
303 newSelector.push(selector.substring(i));
304
305 return newSelector.join("");
306 }
307
308 function logRules()
309 {
310 let rules = [];
311
312 function addRule(rule)
313 {
314 if (!hasNonASCI(rule))
315 rules.push(rule);
316 }
317
318 let groupedElemhideFilters = new Map();
319 for (let filter of elemhideFilters)
320 {
321 let result = convertElemHideFilter(filter);
322 if (!result)
323 continue;
324
325 if (result.matchDomains.length == 0)
326 result.matchDomains = ["^https?://"];
327
328 for (let matchDomain of result.matchDomains)
329 {
330 let group = groupedElemhideFilters.get(matchDomain) || [];
331 group.push(result.selector);
332 groupedElemhideFilters.set(matchDomain, group);
333 }
334 }
335
336 groupedElemhideFilters.forEach((selectors, matchDomain) =>
337 {
338 while (selectors.length)
339 {
340 let selector = selectors.splice(0, selectorLimit).join(", ");
341
342 // As of Safari 9.0 element IDs are matched as lowercase. We work around
343 // this by converting to the attribute format [id="elementID"]
344 selector = convertIDSelectorsToAttributeSelectors(selector);
345
346 addRule({
347 trigger: {"url-filter": matchDomain},
348 action: {type: "css-display-none",
349 selector: selector}
350 });
351 }
352 });
353
354 for (let filter of elemhideExceptions)
355 addRule(convertFilter(filter, "ignore-previous-rules", false));
356
357 for (let filter of requestFilters)
358 addRule(convertFilter(filter, "block", true));
359 for (let filter of requestExceptions)
360 addRule(convertFilter(filter, "ignore-previous-rules", true));
361
362 console.log(JSON.stringify(rules, null, "\t"));
363 }
364
365 let rl = readline.createInterface({input: process.stdin, terminal: false});
366 rl.on("line", parseFilter);
367 rl.on("close", logRules);
OLDNEW
« no previous file with comments | « README.md ('k') | adblockplus.js » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld