Left: | ||
Right: |
OLD | NEW |
---|---|
1 "use strict"; | 1 "use strict"; |
Sebastian Noack
2016/02/21 21:21:43
Mind adding the missing license disclaimer here?
kzar
2016/02/22 12:28:05
Done.
| |
2 | 2 |
3 let readline = require("readline"); | 3 let readline = require("readline"); |
4 let punycode = require("punycode"); | |
5 let tldjs = require("tldjs"); | |
6 let filterClasses = require("./adblockplus.js"); | |
7 | 4 |
8 let typeMap = filterClasses.RegExpFilter.typeMap; | 5 // Hack to force `require("url")` to return our URL module instead of the |
6 // one included with Node.js! We manually load it here and then tweak the cache. | |
7 // (After loading our code we must clear it again so that Node.js libraries will | |
8 // be given the URL module that they expect.) | |
9 require("url.js"); | |
10 require.cache["url"] = require.cache[require.resolve("url.js")]; | |
Sebastian Noack
2016/02/21 20:34:44
I'd rather rename our url module, here and in Adbl
kzar
2016/02/22 12:28:05
OK I've renamed the module urlHelpers and removed
Sebastian Noack
2016/02/22 17:35:27
I might have got an even a better idea. How about
kzar
2016/02/22 18:09:28
Sounds good, done.
| |
9 | 11 |
10 const selectorLimit = 5000; | 12 let Filter = require("filterClasses").Filter; |
13 let contentBlockerLists = require("./lib/contentBlockerLists.js"); | |
11 | 14 |
12 let requestFilters = []; | 15 delete require.cache["url"]; |
13 let requestExceptions = []; | |
14 let elemhideFilters = []; | |
15 let elemhideExceptions = []; | |
16 let elemhideSelectorExceptions = new Map(); | |
17 | 16 |
18 function recordException(filter) | 17 var rl = readline.createInterface({input: process.stdin, terminal: false}); |
18 var filters = []; | |
19 | |
20 rl.on("line", line => | |
19 { | 21 { |
20 if (filter.contentType & (typeMap.IMAGE | 22 if (line.charAt(0) != "[") |
Sebastian Noack
2016/02/21 20:34:44
What's about empty lines? It seems that we didn't
kzar
2016/02/22 12:28:05
It appears that contentBlockerLists.js logic was h
Sebastian Noack
2016/02/22 17:35:27
OK, I looked into it and figured that this case wa
kzar
2016/02/22 18:09:28
Done.
As for lines with only white-space, we're c
Sebastian Noack
2016/02/22 18:20:26
Yeah, I already figured that myself. The point is
kzar
2016/02/22 19:46:01
OK add the custom subscription of http://static.kz
| |
21 | typeMap.STYLESHEET | 23 filters.push(Filter.fromText(line)); |
22 | typeMap.SCRIPT | 24 }); |
23 | typeMap.FONT | |
24 | typeMap.MEDIA | |
25 | typeMap.POPUP | |
26 | typeMap.OBJECT | |
27 | typeMap.OBJECT_SUBREQUEST | |
28 | typeMap.XMLHTTPREQUEST | |
29 | typeMap.PING | |
30 | typeMap.SUBDOCUMENT | |
31 | typeMap.OTHER)) | |
32 requestExceptions.push(filter); | |
33 | 25 |
34 if (filter.contentType & typeMap.ELEMHIDE) | 26 rl.on("close", () => |
35 elemhideExceptions.push(filter); | |
36 } | |
37 | |
38 function parseDomains(domains, included, excluded) | |
39 { | 27 { |
40 for (let domain in domains) | 28 console.log(JSON.stringify(contentBlockerLists.convertFilters(filters), |
41 { | 29 null, "\t")); |
42 if (domain != "") | 30 }); |
43 { | |
44 let enabled = domains[domain]; | |
45 domain = punycode.toASCII(domain.toLowerCase()); | |
46 | |
47 if (!enabled) | |
48 excluded.push(domain); | |
49 else if (!domains[""]) | |
50 included.push(domain); | |
51 } | |
52 } | |
53 } | |
54 | |
55 function recordSelectorException(filter) | |
56 { | |
57 let domains = elemhideSelectorExceptions[filter.selector]; | |
58 if (!domains) | |
59 domains = elemhideSelectorExceptions[filter.selector] = []; | |
60 | |
61 parseDomains(filter.domains, domains, []); | |
62 } | |
63 | |
64 function parseFilter(line) | |
65 { | |
66 if (line.charAt(0) == "[") | |
67 return; | |
68 | |
69 let filter = filterClasses.Filter.fromText(line); | |
70 | |
71 if (filter.sitekeys) | |
72 return; | |
73 if (filter instanceof filterClasses.RegExpFilter && !filter.regexpSource) | |
74 return; | |
75 | |
76 if (filter instanceof filterClasses.BlockingFilter) | |
77 requestFilters.push(filter); | |
78 if (filter instanceof filterClasses.WhitelistFilter) | |
79 recordException(filter); | |
80 if (filter instanceof filterClasses.ElemHideFilter) | |
81 elemhideFilters.push(filter); | |
82 if (filter instanceof filterClasses.ElemHideException) | |
83 recordSelectorException(filter); | |
84 } | |
85 | |
86 function escapeRegExp(s) | |
87 { | |
88 return s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); | |
89 } | |
90 | |
91 function matchDomain(domain) | |
92 { | |
93 return "^https?://([^/:]*\\.)?" + escapeRegExp(domain) + "[/:]"; | |
94 } | |
95 | |
96 function convertElemHideFilter(filter) | |
97 { | |
98 let included = []; | |
99 let excluded = []; | |
100 let rules = []; | |
101 | |
102 parseDomains(filter.domains, included, excluded); | |
103 | |
104 if (excluded.length == 0 && !(filter.selector in elemhideSelectorExceptions)) | |
105 return {matchDomains: included.map(matchDomain), selector: filter.selector}; | |
106 } | |
107 | |
108 function toRegExp(text) | |
109 { | |
110 let result = ""; | |
111 let lastIndex = text.length - 1; | |
112 | |
113 for (let i = 0; i < text.length; i++) | |
114 { | |
115 let c = text[i]; | |
116 | |
117 switch (c) | |
118 { | |
119 case "*": | |
120 if (result.length > 0 && i < lastIndex && text[i + 1] != "*") | |
121 result += ".*"; | |
122 break; | |
123 case "^": | |
124 if (i < lastIndex) | |
125 result += "."; | |
126 break; | |
127 case "|": | |
128 if (i == 0) | |
129 { | |
130 result += "^"; | |
131 break; | |
132 } | |
133 if (i == lastIndex) | |
134 { | |
135 result += "$"; | |
136 break; | |
137 } | |
138 if (i == 1 && text[0] == "|") | |
139 { | |
140 result += "https?://"; | |
141 break; | |
142 } | |
143 case ".": case "+": case "?": case "$": | |
144 case "{": case "}": case "(": case ")": | |
145 case "[": case "]": case "\\": | |
146 result += "\\"; | |
147 default: | |
148 result += c; | |
149 } | |
150 } | |
151 | |
152 return result; | |
153 } | |
154 | |
155 function getRegExpSource(filter) | |
156 { | |
157 let source = toRegExp(filter.regexpSource.replace( | |
158 // Safari expects punycode, filter lists use unicode | |
159 /^(\|\||\|?https?:\/\/)([\w\-.*\u0080-\uFFFF]+)/i, | |
160 function (match, prefix, domain) | |
161 { | |
162 return prefix + punycode.toASCII(domain); | |
163 } | |
164 )); | |
165 | |
166 // Limit rules to to HTTP(S) URLs | |
167 if (!/^(\^|http)/i.test(source)) | |
168 source = "^https?://.*" + source; | |
169 | |
170 return source; | |
171 } | |
172 | |
173 function getResourceTypes(filter) | |
174 { | |
175 let types = []; | |
176 | |
177 if (filter.contentType & typeMap.IMAGE) | |
178 types.push("image"); | |
179 if (filter.contentType & typeMap.STYLESHEET) | |
180 types.push("style-sheet"); | |
181 if (filter.contentType & typeMap.SCRIPT) | |
182 types.push("script"); | |
183 if (filter.contentType & typeMap.FONT) | |
184 types.push("font"); | |
185 if (filter.contentType & (typeMap.MEDIA | typeMap.OBJECT)) | |
186 types.push("media"); | |
187 if (filter.contentType & typeMap.POPUP) | |
188 types.push("popup"); | |
189 if (filter.contentType & (typeMap.XMLHTTPREQUEST | typeMap.OBJECT_SUBREQUEST | |
190 | typeMap.PING | typeMap.OTHER)) | |
191 types.push("raw"); | |
192 if (filter.contentType & typeMap.SUBDOCUMENT) | |
193 types.push("document"); | |
194 | |
195 return types; | |
196 } | |
197 | |
198 function addDomainPrefix(domains) | |
199 { | |
200 let result = []; | |
201 | |
202 for (let domain of domains) | |
203 { | |
204 result.push(domain); | |
205 | |
206 if (tldjs.getSubdomain(domain) == "") | |
207 result.push("www." + domain); | |
208 } | |
209 | |
210 return result; | |
211 } | |
212 | |
213 function convertFilter(filter, action, withResourceTypes) | |
214 { | |
215 let trigger = {"url-filter": getRegExpSource(filter)}; | |
216 let included = []; | |
217 let excluded = []; | |
218 | |
219 parseDomains(filter.domains, included, excluded); | |
220 | |
221 if (withResourceTypes) | |
222 trigger["resource-type"] = getResourceTypes(filter); | |
223 if (filter.thirdParty != null) | |
224 trigger["load-type"] = [filter.thirdParty ? "third-party" : "first-party"]; | |
225 | |
226 if (included.length > 0) | |
227 trigger["if-domain"] = addDomainPrefix(included); | |
228 else if (excluded.length > 0) | |
229 trigger["unless-domain"] = addDomainPrefix(excluded); | |
230 | |
231 return {trigger: trigger, action: {type: action}}; | |
232 } | |
233 | |
234 function hasNonASCI(obj) | |
235 { | |
236 if (typeof obj == "string") | |
237 { | |
238 if (/[^\x00-\x7F]/.test(obj)) | |
239 return true; | |
240 } | |
241 | |
242 if (typeof obj == "object") | |
243 { | |
244 if (obj instanceof Array) | |
245 for (let item of obj) | |
246 if (hasNonASCI(item)) | |
247 return true; | |
248 | |
249 for (let name of Object.getOwnPropertyNames(obj)) | |
250 if (hasNonASCI(obj[name])) | |
251 return true; | |
252 } | |
253 | |
254 return false; | |
255 } | |
256 | |
257 function convertIDSelectorsToAttributeSelectors(selector) | |
258 { | |
259 // First we figure out where all the IDs are | |
260 let sep = ""; | |
261 let start = null; | |
262 let positions = []; | |
263 for (let i = 0; i < selector.length; i++) | |
264 { | |
265 let chr = selector[i]; | |
266 | |
267 if (chr == "\\") // ignore escaped characters | |
268 i++; | |
269 else if (chr == sep) // don't match IDs within quoted text | |
270 sep = ""; // e.g. [attr="#Hello"] | |
271 else if (sep == "") | |
272 { | |
273 if (chr == '"' || chr == "'") | |
274 sep = chr; | |
275 else if (start == null) // look for the start of an ID | |
276 { | |
277 if (chr == "#") | |
278 start = i; | |
279 } | |
280 else if (chr != "-" && chr != "_" && | |
281 (chr < "0" || | |
282 chr > "9" && chr < "A" || | |
283 chr > "Z" && chr < "a" || | |
284 chr > "z" && chr < "\x80")) // look for the end of the ID | |
285 { | |
286 positions.push({start: start, end: i}); | |
287 start = null; | |
288 } | |
289 } | |
290 } | |
291 if (start != null) | |
292 positions.push({start: start, end: selector.length}); | |
293 | |
294 // Now replace them all with the [id="someID"] form | |
295 let newSelector = []; | |
296 let i = 0; | |
297 for (let pos of positions) | |
298 { | |
299 newSelector.push(selector.substring(i, pos.start)); | |
300 newSelector.push('[id=' + selector.substring(pos.start + 1, pos.end) + ']'); | |
301 i = pos.end; | |
302 } | |
303 newSelector.push(selector.substring(i)); | |
304 | |
305 return newSelector.join(""); | |
306 } | |
307 | |
308 function logRules() | |
309 { | |
310 let rules = []; | |
311 | |
312 function addRule(rule) | |
313 { | |
314 if (!hasNonASCI(rule)) | |
315 rules.push(rule); | |
316 } | |
317 | |
318 let groupedElemhideFilters = new Map(); | |
319 for (let filter of elemhideFilters) | |
320 { | |
321 let result = convertElemHideFilter(filter); | |
322 if (!result) | |
323 continue; | |
324 | |
325 if (result.matchDomains.length == 0) | |
326 result.matchDomains = ["^https?://"]; | |
327 | |
328 for (let matchDomain of result.matchDomains) | |
329 { | |
330 let group = groupedElemhideFilters.get(matchDomain) || []; | |
331 group.push(result.selector); | |
332 groupedElemhideFilters.set(matchDomain, group); | |
333 } | |
334 } | |
335 | |
336 groupedElemhideFilters.forEach((selectors, matchDomain) => | |
337 { | |
338 while (selectors.length) | |
339 { | |
340 let selector = selectors.splice(0, selectorLimit).join(", "); | |
341 | |
342 // As of Safari 9.0 element IDs are matched as lowercase. We work around | |
343 // this by converting to the attribute format [id="elementID"] | |
344 selector = convertIDSelectorsToAttributeSelectors(selector); | |
345 | |
346 addRule({ | |
347 trigger: {"url-filter": matchDomain}, | |
348 action: {type: "css-display-none", | |
349 selector: selector} | |
350 }); | |
351 } | |
352 }); | |
353 | |
354 for (let filter of elemhideExceptions) | |
355 addRule(convertFilter(filter, "ignore-previous-rules", false)); | |
356 | |
357 for (let filter of requestFilters) | |
358 addRule(convertFilter(filter, "block", true)); | |
359 for (let filter of requestExceptions) | |
360 addRule(convertFilter(filter, "ignore-previous-rules", true)); | |
361 | |
362 console.log(JSON.stringify(rules, null, "\t")); | |
363 } | |
364 | |
365 let rl = readline.createInterface({input: process.stdin, terminal: false}); | |
366 rl.on("line", parseFilter); | |
367 rl.on("close", logRules); | |
OLD | NEW |