Left: | ||
Right: |
OLD | NEW |
---|---|
(Empty) | |
1 /* | |
2 * This file is part of Adblock Plus <https://adblockplus.org/>, | |
3 * Copyright (C) 2006-2016 Eyeo GmbH | |
4 * | |
5 * Adblock Plus is free software: you can redistribute it and/or modify | |
6 * it under the terms of the GNU General Public License version 3 as | |
7 * published by the Free Software Foundation. | |
8 * | |
9 * Adblock Plus is distributed in the hope that it will be useful, | |
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
12 * GNU General Public License for more details. | |
13 * | |
14 * You should have received a copy of the GNU General Public License | |
15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. | |
16 */ | |
17 | |
18 /** @module contentBlockerList */ | |
19 | |
20 "use strict"; | |
21 | |
22 let filterClasses = require("filterClasses"); | |
23 let getBaseDomain = require("urlHelpers").getBaseDomain; | |
24 let punycode = require("punycode"); | |
25 | |
26 const selectorLimit = 5000; | |
27 const typeMap = filterClasses.RegExpFilter.typeMap; | |
28 | |
29 function parseDomains(domains, included, excluded) | |
30 { | |
31 for (let domain in domains) | |
32 { | |
33 if (domain != "") | |
34 { | |
35 let enabled = domains[domain]; | |
36 domain = punycode.toASCII(domain.toLowerCase()); | |
37 | |
38 if (!enabled) | |
39 excluded.push(domain); | |
40 else if (!domains[""]) | |
41 included.push(domain); | |
42 } | |
43 } | |
44 } | |
45 | |
46 function escapeRegExp(s) | |
47 { | |
48 return s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); | |
49 } | |
50 | |
51 function matchDomain(domain) | |
52 { | |
53 return "^https?://([^/:]*\\.)?" + escapeRegExp(domain) + "[/:]"; | |
54 } | |
55 | |
56 function convertElemHideFilter(filter, elemhideSelectorExceptions) | |
57 { | |
58 let included = []; | |
59 let excluded = []; | |
60 let rules = []; | |
61 | |
62 parseDomains(filter.domains, included, excluded); | |
63 | |
64 if (excluded.length == 0 && !(filter.selector in elemhideSelectorExceptions)) | |
65 return {matchDomains: included.map(matchDomain), selector: filter.selector}; | |
66 } | |
67 | |
68 function toRegExp(text) | |
69 { | |
70 let result = []; | |
71 let lastIndex = text.length - 1; | |
72 | |
73 for (let i = 0; i < text.length; i++) | |
74 { | |
75 let c = text[i]; | |
76 | |
77 switch (c) | |
78 { | |
79 case "*": | |
80 if (result.length > 0 && i < lastIndex && text[i + 1] != "*") | |
81 result.push(".*"); | |
82 break; | |
83 case "^": | |
84 if (i < lastIndex) | |
85 result.push("."); | |
86 break; | |
87 case "|": | |
88 if (i == 0) | |
89 { | |
90 result.push("^"); | |
91 break; | |
92 } | |
93 if (i == lastIndex) | |
94 { | |
95 result.push("$"); | |
96 break; | |
97 } | |
98 if (i == 1 && text[0] == "|") | |
99 { | |
100 result.push("https?://"); | |
101 break; | |
102 } | |
103 case ".": case "+": case "?": case "$": | |
104 case "{": case "}": case "(": case ")": | |
105 case "[": case "]": case "\\": | |
106 result.push("\\"); | |
Sebastian Noack
2016/02/22 17:35:28
Nit: Perhaps we should change the code here to:
Sebastian Noack
2016/02/22 17:37:31
s/missing drop-through/missing break/
kzar
2016/02/22 18:09:29
Done.
| |
107 default: | |
108 result.push(c); | |
109 } | |
110 } | |
111 | |
112 return result.join(""); | |
113 } | |
114 | |
115 function getRegExpSource(filter) | |
116 { | |
117 let source = toRegExp(filter.regexpSource.replace( | |
118 // Safari expects punycode, filter lists use unicode | |
119 /^(\|\||\|?https?:\/\/)([\w\-.*\u0080-\uFFFF]+)/i, | |
120 function (match, prefix, domain) | |
121 { | |
122 return prefix + punycode.toASCII(domain); | |
123 } | |
124 )); | |
125 | |
126 // Limit rules to to HTTP(S) URLs | |
127 if (!/^(\^|http)/i.test(source)) | |
128 source = "^https?://.*" + source; | |
129 | |
130 return source; | |
131 } | |
132 | |
133 function getResourceTypes(filter) | |
134 { | |
135 let types = []; | |
136 | |
137 if (filter.contentType & typeMap.IMAGE) | |
138 types.push("image"); | |
139 if (filter.contentType & typeMap.STYLESHEET) | |
140 types.push("style-sheet"); | |
141 if (filter.contentType & typeMap.SCRIPT) | |
142 types.push("script"); | |
143 if (filter.contentType & typeMap.FONT) | |
144 types.push("font"); | |
145 if (filter.contentType & (typeMap.MEDIA | typeMap.OBJECT)) | |
146 types.push("media"); | |
147 if (filter.contentType & typeMap.POPUP) | |
148 types.push("popup"); | |
149 if (filter.contentType & (typeMap.XMLHTTPREQUEST | | |
150 typeMap.OBJECT_SUBREQUEST | | |
151 typeMap.PING | typeMap.OTHER)) | |
Sebastian Noack
2016/02/22 17:35:28
Nit: I think it reads slightly better, and looks m
kzar
2016/02/22 18:09:29
Done.
| |
152 types.push("raw"); | |
153 if (filter.contentType & typeMap.SUBDOCUMENT) | |
154 types.push("document"); | |
155 | |
156 return types; | |
157 } | |
158 | |
159 function addDomainPrefix(domains) | |
160 { | |
161 let result = []; | |
162 | |
163 for (let domain of domains) | |
164 { | |
165 result.push(domain); | |
166 | |
167 if (getBaseDomain(domain) == domain) | |
168 result.push("www." + domain); | |
169 } | |
170 | |
171 return result; | |
172 } | |
173 | |
174 function convertFilter(filter, action, withResourceTypes) | |
175 { | |
176 let trigger = {"url-filter": getRegExpSource(filter)}; | |
177 let included = []; | |
178 let excluded = []; | |
179 | |
180 parseDomains(filter.domains, included, excluded); | |
181 | |
182 if (withResourceTypes) | |
183 trigger["resource-type"] = getResourceTypes(filter); | |
184 if (filter.thirdParty != null) | |
185 trigger["load-type"] = [filter.thirdParty ? "third-party" : "first-party"]; | |
186 | |
187 if (included.length > 0) | |
188 trigger["if-domain"] = addDomainPrefix(included); | |
189 else if (excluded.length > 0) | |
190 trigger["unless-domain"] = addDomainPrefix(excluded); | |
191 | |
192 return {trigger: trigger, action: {type: action}}; | |
193 } | |
194 | |
195 function hasNonASCI(obj) | |
196 { | |
197 if (typeof obj == "string") | |
198 { | |
199 if (/[^\x00-\x7F]/.test(obj)) | |
200 return true; | |
201 } | |
202 | |
203 if (typeof obj == "object") | |
204 { | |
205 if (obj instanceof Array) | |
206 for (let item of obj) | |
207 if (hasNonASCI(item)) | |
208 return true; | |
209 | |
210 let names = Object.getOwnPropertyNames(obj); | |
211 for (let name of names) | |
212 if (hasNonASCI(obj[name])) | |
213 return true; | |
214 } | |
215 | |
216 return false; | |
217 } | |
218 | |
219 function convertIDSelectorsToAttributeSelectors(selector) | |
220 { | |
221 // First we figure out where all the IDs are | |
222 let sep = ""; | |
223 let start = null; | |
224 let positions = []; | |
225 for (let i = 0; i < selector.length; i++) | |
226 { | |
227 let chr = selector[i]; | |
228 | |
229 if (chr == "\\") // ignore escaped characters | |
230 i++; | |
231 else if (chr == sep) // don't match IDs within quoted text | |
232 sep = ""; // e.g. [attr="#Hello"] | |
233 else if (sep == "") | |
234 { | |
235 if (chr == '"' || chr == "'") | |
236 sep = chr; | |
237 else if (start == null) // look for the start of an ID | |
238 { | |
239 if (chr == "#") | |
240 start = i; | |
241 } | |
242 else if (chr != "-" && chr != "_" && | |
243 (chr < "0" || | |
244 chr > "9" && chr < "A" || | |
245 chr > "Z" && chr < "a" || | |
246 chr > "z" && chr < "\x80")) // look for the end of the ID | |
247 { | |
248 positions.push({start: start, end: i}); | |
249 start = null; | |
250 } | |
251 } | |
252 } | |
253 if (start != null) | |
254 positions.push({start: start, end: selector.length}); | |
255 | |
256 // Now replace them all with the [id="someID"] form | |
257 let newSelector = []; | |
258 let i = 0; | |
259 for (let pos of positions) | |
260 { | |
261 newSelector.push(selector.substring(i, pos.start)); | |
262 newSelector.push('[id=', selector.substring(pos.start + 1, pos.end), ']'); | |
263 i = pos.end; | |
264 } | |
265 newSelector.push(selector.substring(i)); | |
266 | |
267 return newSelector.join(""); | |
268 } | |
269 | |
270 let ContentBlockerList = | |
271 /** | |
272 * Create a new Adblock Plus filter to content blocker list converter | |
273 * | |
274 * @constructor | |
275 */ | |
276 exports.ContentBlockerList = function () | |
277 { | |
278 this.requestFilters = []; | |
279 this.requestExceptions = []; | |
280 this.elemhideFilters = []; | |
281 this.elemhideExceptions = []; | |
282 this.elemhideSelectorExceptions = new Map(); | |
283 }; | |
284 | |
285 ContentBlockerList.prototype = {}; | |
Sebastian Noack
2016/02/22 17:35:28
There is no need to set the prototype to an empty
kzar
2016/02/22 18:09:29
Done.
| |
286 | |
287 /** | |
288 * Add Adblock Plus filter to be converted | |
289 * | |
290 * @param {Filter} filter Filter to convert | |
291 */ | |
292 ContentBlockerList.prototype.addFilter = function(filter) | |
293 { | |
294 if (filter.sitekeys) | |
295 return; | |
296 if (filter instanceof filterClasses.RegExpFilter && !filter.regexpSource) | |
297 return; | |
298 | |
299 if (filter instanceof filterClasses.BlockingFilter) | |
300 this.requestFilters.push(filter); | |
301 | |
302 if (filter instanceof filterClasses.WhitelistFilter) | |
303 { | |
304 if (filter.contentType & (typeMap.IMAGE | |
305 | typeMap.STYLESHEET | |
306 | typeMap.SCRIPT | |
307 | typeMap.FONT | |
308 | typeMap.MEDIA | |
309 | typeMap.POPUP | |
310 | typeMap.OBJECT | |
311 | typeMap.OBJECT_SUBREQUEST | |
312 | typeMap.XMLHTTPREQUEST | |
313 | typeMap.PING | |
314 | typeMap.SUBDOCUMENT | |
315 | typeMap.OTHER)) | |
316 this.requestExceptions.push(filter); | |
317 | |
318 if (filter.contentType & typeMap.ELEMHIDE) | |
319 this.elemhideExceptions.push(filter); | |
320 } | |
321 | |
322 if (filter instanceof filterClasses.ElemHideFilter) | |
323 this.elemhideFilters.push(filter); | |
324 | |
325 if (filter instanceof filterClasses.ElemHideException) | |
326 { | |
327 let domains = this.elemhideSelectorExceptions[filter.selector]; | |
328 if (!domains) | |
329 domains = this.elemhideSelectorExceptions[filter.selector] = []; | |
330 | |
331 parseDomains(filter.domains, domains, []); | |
332 } | |
333 }; | |
334 | |
335 /** | |
336 * Generate content blocker list for all filters that were added | |
337 * | |
338 * @returns {Filter} filter Filter to convert | |
339 */ | |
340 ContentBlockerList.prototype.generateRules = function(filter) | |
341 { | |
342 let rules = []; | |
343 | |
344 function addRule(rule) | |
345 { | |
346 if (!hasNonASCI(rule)) | |
347 rules.push(rule); | |
348 } | |
349 | |
350 let groupedElemhideFilters = new Map(); | |
351 for (let filter of this.elemhideFilters) | |
352 { | |
353 let result = convertElemHideFilter(filter, this.elemhideSelectorExceptions); | |
354 if (!result) | |
355 continue; | |
356 | |
357 if (result.matchDomains.length == 0) | |
358 result.matchDomains = ["^https?://"]; | |
359 | |
360 for (let matchDomain of result.matchDomains) | |
361 { | |
362 let group = groupedElemhideFilters.get(matchDomain) || []; | |
363 group.push(result.selector); | |
364 groupedElemhideFilters.set(matchDomain, group); | |
365 } | |
366 } | |
367 | |
368 groupedElemhideFilters.forEach((selectors, matchDomain) => | |
369 { | |
370 while (selectors.length) | |
371 { | |
372 let selector = selectors.splice(0, selectorLimit).join(", "); | |
373 | |
374 // As of Safari 9.0 element IDs are matched as lowercase. We work around | |
375 // this by converting to the attribute format [id="elementID"] | |
376 selector = convertIDSelectorsToAttributeSelectors(selector); | |
377 | |
378 addRule({ | |
379 trigger: {"url-filter": matchDomain}, | |
380 action: {type: "css-display-none", | |
381 selector: selector} | |
382 }); | |
383 } | |
384 }); | |
385 | |
386 for (let filter of this.elemhideExceptions) | |
387 addRule(convertFilter(filter, "ignore-previous-rules", false)); | |
388 for (let filter of this.requestFilters) | |
389 addRule(convertFilter(filter, "block", true)); | |
390 for (let filter of this.requestExceptions) | |
391 addRule(convertFilter(filter, "ignore-previous-rules", true)); | |
392 | |
393 return rules; | |
394 }; | |
OLD | NEW |