Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Delta Between Two Patch Sets: abp2blocklist.js

Issue 29336349: Issue 3585 - Merge element hiding rules for the same domain (Closed)
Left Patch Set: Created Feb. 13, 2016, 7:27 p.m.
Right Patch Set: Fixed mistake in generated JSON structure Created Feb. 16, 2016, 7:02 p.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
Left: Side by side diff | Download
Right: Side by side diff | Download
« no previous file with change/comment | « no previous file | no next file » | no next file with change/comment »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
LEFTRIGHT
1 "use strict"; 1 "use strict";
2 2
3 var readline = require("readline"); 3 let readline = require("readline");
4 var punycode = require("punycode"); 4 let punycode = require("punycode");
5 var tldjs = require("tldjs"); 5 let tldjs = require("tldjs");
6 var filterClasses = require("./adblockplus.js"); 6 let filterClasses = require("./adblockplus.js");
7 7
8 var typeMap = filterClasses.RegExpFilter.typeMap; 8 let typeMap = filterClasses.RegExpFilter.typeMap;
9 9
10 var requestFilters = []; 10 const selectorLimit = 5000;
11 var requestExceptions = []; 11
12 var elemhideFilters = []; 12 let requestFilters = [];
13 var elemhideExceptions = []; 13 let requestExceptions = [];
14 var elemhideSelectorExceptions = Object.create(null); 14 let elemhideFilters = [];
15 let elemhideExceptions = [];
16 let elemhideSelectorExceptions = new Map();
15 17
16 function recordException(filter) 18 function recordException(filter)
17 { 19 {
18 if (filter.contentType & (typeMap.IMAGE 20 if (filter.contentType & (typeMap.IMAGE
19 | typeMap.STYLESHEET 21 | typeMap.STYLESHEET
20 | typeMap.SCRIPT 22 | typeMap.SCRIPT
21 | typeMap.FONT 23 | typeMap.FONT
22 | typeMap.MEDIA 24 | typeMap.MEDIA
23 | typeMap.POPUP 25 | typeMap.POPUP
24 | typeMap.OBJECT 26 | typeMap.OBJECT
25 | typeMap.OBJECT_SUBREQUEST 27 | typeMap.OBJECT_SUBREQUEST
26 | typeMap.XMLHTTPREQUEST 28 | typeMap.XMLHTTPREQUEST
27 | typeMap.PING 29 | typeMap.PING
28 | typeMap.SUBDOCUMENT 30 | typeMap.SUBDOCUMENT
29 | typeMap.OTHER)) 31 | typeMap.OTHER))
30 requestExceptions.push(filter); 32 requestExceptions.push(filter);
31 33
32 if (filter.contentType & typeMap.ELEMHIDE) 34 if (filter.contentType & typeMap.ELEMHIDE)
33 elemhideExceptions.push(filter); 35 elemhideExceptions.push(filter);
34 } 36 }
35 37
36 function parseDomains(domains, included, excluded) 38 function parseDomains(domains, included, excluded)
37 { 39 {
38 for (var domain in domains) 40 for (let domain in domains)
39 { 41 {
40 if (domain != "") 42 if (domain != "")
41 { 43 {
42 var enabled = domains[domain]; 44 let enabled = domains[domain];
43 domain = punycode.toASCII(domain.toLowerCase()); 45 domain = punycode.toASCII(domain.toLowerCase());
44 46
45 if (!enabled) 47 if (!enabled)
46 excluded.push(domain); 48 excluded.push(domain);
47 else if (!domains[""]) 49 else if (!domains[""])
48 included.push(domain); 50 included.push(domain);
49 } 51 }
50 } 52 }
51 } 53 }
52 54
53 function recordSelectorException(filter) 55 function recordSelectorException(filter)
54 { 56 {
55 var domains = elemhideSelectorExceptions[filter.selector]; 57 let domains = elemhideSelectorExceptions[filter.selector];
56 if (!domains) 58 if (!domains)
57 domains = elemhideSelectorExceptions[filter.selector] = []; 59 domains = elemhideSelectorExceptions[filter.selector] = [];
58 60
59 parseDomains(filter.domains, domains, []); 61 parseDomains(filter.domains, domains, []);
60 } 62 }
61 63
62 function parseFilter(line) 64 function parseFilter(line)
63 { 65 {
64 if (line.charAt(0) == "[") 66 if (line.charAt(0) == "[")
65 return; 67 return;
66 68
67 var filter = filterClasses.Filter.fromText(line); 69 let filter = filterClasses.Filter.fromText(line);
68 70
69 if (filter.sitekeys) 71 if (filter.sitekeys)
70 return; 72 return;
71 if (filter instanceof filterClasses.RegExpFilter && !filter.regexpSource) 73 if (filter instanceof filterClasses.RegExpFilter && !filter.regexpSource)
72 return; 74 return;
73 75
74 if (filter instanceof filterClasses.BlockingFilter) 76 if (filter instanceof filterClasses.BlockingFilter)
75 requestFilters.push(filter); 77 requestFilters.push(filter);
76 if (filter instanceof filterClasses.WhitelistFilter) 78 if (filter instanceof filterClasses.WhitelistFilter)
77 recordException(filter); 79 recordException(filter);
78 if (filter instanceof filterClasses.ElemHideFilter) 80 if (filter instanceof filterClasses.ElemHideFilter)
79 elemhideFilters.push(filter); 81 elemhideFilters.push(filter);
80 if (filter instanceof filterClasses.ElemHideException) 82 if (filter instanceof filterClasses.ElemHideException)
81 recordSelectorException(filter); 83 recordSelectorException(filter);
82 } 84 }
83 85
84 function escapeRegExp(s) 86 function escapeRegExp(s)
85 { 87 {
86 return s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); 88 return s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
87 } 89 }
88 90
89 function matchDomain(domain) 91 function matchDomain(domain)
90 { 92 {
91 return "^https?://([^/:]*\\.)?" + escapeRegExp(domain) + "[/:]"; 93 return "^https?://([^/:]*\\.)?" + escapeRegExp(domain) + "[/:]";
92 } 94 }
93 95
94 function convertElemHideFilter(filter) 96 function convertElemHideFilter(filter)
95 { 97 {
96 var included = []; 98 let included = [];
97 var excluded = []; 99 let excluded = [];
98 var rules = []; 100 let rules = [];
99 101
100 parseDomains(filter.domains, included, excluded); 102 parseDomains(filter.domains, included, excluded);
101 103
102 if (excluded.length == 0 && !(filter.selector in elemhideSelectorExceptions)) 104 if (excluded.length == 0 && !(filter.selector in elemhideSelectorExceptions))
103 return [included.map(matchDomain), filter.selector]; 105 return {matchDomains: included.map(matchDomain), selector: filter.selector};
104 } 106 }
105 107
106 function toRegExp(text) 108 function toRegExp(text)
107 { 109 {
108 var result = ""; 110 let result = "";
109 var lastIndex = text.length - 1; 111 let lastIndex = text.length - 1;
110 112
111 for (var i = 0; i < text.length; i++) 113 for (let i = 0; i < text.length; i++)
112 { 114 {
113 var c = text[i]; 115 let c = text[i];
114 116
115 switch (c) 117 switch (c)
116 { 118 {
117 case "*": 119 case "*":
118 if (result.length > 0 && i < lastIndex && text[i + 1] != "*") 120 if (result.length > 0 && i < lastIndex && text[i + 1] != "*")
119 result += ".*"; 121 result += ".*";
120 break; 122 break;
121 case "^": 123 case "^":
122 if (i < lastIndex) 124 if (i < lastIndex)
123 result += "."; 125 result += ".";
(...skipping 21 matching lines...) Expand all
145 default: 147 default:
146 result += c; 148 result += c;
147 } 149 }
148 } 150 }
149 151
150 return result; 152 return result;
151 } 153 }
152 154
153 function getRegExpSource(filter) 155 function getRegExpSource(filter)
154 { 156 {
155 var source = toRegExp(filter.regexpSource.replace( 157 let source = toRegExp(filter.regexpSource.replace(
156 // Safari expects punycode, filter lists use unicode 158 // Safari expects punycode, filter lists use unicode
157 /^(\|\||\|?https?:\/\/)([\w\-.*\u0080-\uFFFF]+)/i, 159 /^(\|\||\|?https?:\/\/)([\w\-.*\u0080-\uFFFF]+)/i,
158 function (match, prefix, domain) 160 function (match, prefix, domain)
159 { 161 {
160 return prefix + punycode.toASCII(domain); 162 return prefix + punycode.toASCII(domain);
161 } 163 }
162 )); 164 ));
163 165
164 // Limit rules to to HTTP(S) URLs 166 // Limit rules to to HTTP(S) URLs
165 if (!/^(\^|http)/i.test(source)) 167 if (!/^(\^|http)/i.test(source))
166 source = "^https?://.*" + source; 168 source = "^https?://.*" + source;
167 169
168 return source; 170 return source;
169 } 171 }
170 172
171 function getResourceTypes(filter) 173 function getResourceTypes(filter)
172 { 174 {
173 var types = []; 175 let types = [];
174 176
175 if (filter.contentType & typeMap.IMAGE) 177 if (filter.contentType & typeMap.IMAGE)
176 types.push("image"); 178 types.push("image");
177 if (filter.contentType & typeMap.STYLESHEET) 179 if (filter.contentType & typeMap.STYLESHEET)
178 types.push("style-sheet"); 180 types.push("style-sheet");
179 if (filter.contentType & typeMap.SCRIPT) 181 if (filter.contentType & typeMap.SCRIPT)
180 types.push("script"); 182 types.push("script");
181 if (filter.contentType & typeMap.FONT) 183 if (filter.contentType & typeMap.FONT)
182 types.push("font"); 184 types.push("font");
183 if (filter.contentType & (typeMap.MEDIA | typeMap.OBJECT)) 185 if (filter.contentType & (typeMap.MEDIA | typeMap.OBJECT))
184 types.push("media"); 186 types.push("media");
185 if (filter.contentType & typeMap.POPUP) 187 if (filter.contentType & typeMap.POPUP)
186 types.push("popup"); 188 types.push("popup");
187 if (filter.contentType & (typeMap.XMLHTTPREQUEST | typeMap.OBJECT_SUBREQUEST 189 if (filter.contentType & (typeMap.XMLHTTPREQUEST | typeMap.OBJECT_SUBREQUEST
188 | typeMap.PING | typeMap.OTHER)) 190 | typeMap.PING | typeMap.OTHER))
189 types.push("raw"); 191 types.push("raw");
190 if (filter.contentType & typeMap.SUBDOCUMENT) 192 if (filter.contentType & typeMap.SUBDOCUMENT)
191 types.push("document"); 193 types.push("document");
192 194
193 return types; 195 return types;
194 } 196 }
195 197
196 function addDomainPrefix(domains) 198 function addDomainPrefix(domains)
197 { 199 {
198 var result = []; 200 let result = [];
199 201
200 for (var i = 0; i < domains.length; i++) 202 for (let domain of domains)
201 { 203 {
202 var domain = domains[i];
203 result.push(domain); 204 result.push(domain);
204 205
205 if (tldjs.getSubdomain(domain) == "") 206 if (tldjs.getSubdomain(domain) == "")
206 result.push("www." + domain); 207 result.push("www." + domain);
207 } 208 }
208 209
209 return result; 210 return result;
210 } 211 }
211 212
212 function convertFilter(filter, action, withResourceTypes) 213 function convertFilter(filter, action, withResourceTypes)
213 { 214 {
214 var trigger = {"url-filter": getRegExpSource(filter)}; 215 let trigger = {"url-filter": getRegExpSource(filter)};
215 var included = []; 216 let included = [];
216 var excluded = []; 217 let excluded = [];
217 218
218 parseDomains(filter.domains, included, excluded); 219 parseDomains(filter.domains, included, excluded);
219 220
220 if (withResourceTypes) 221 if (withResourceTypes)
221 trigger["resource-type"] = getResourceTypes(filter); 222 trigger["resource-type"] = getResourceTypes(filter);
222 if (filter.thirdParty != null) 223 if (filter.thirdParty != null)
223 trigger["load-type"] = [filter.thirdParty ? "third-party" : "first-party"]; 224 trigger["load-type"] = [filter.thirdParty ? "third-party" : "first-party"];
224 225
225 if (included.length > 0) 226 if (included.length > 0)
226 trigger["if-domain"] = addDomainPrefix(included); 227 trigger["if-domain"] = addDomainPrefix(included);
227 else if (excluded.length > 0) 228 else if (excluded.length > 0)
228 trigger["unless-domain"] = addDomainPrefix(excluded); 229 trigger["unless-domain"] = addDomainPrefix(excluded);
229 230
230 return {trigger: trigger, action: {type: action}}; 231 return {trigger: trigger, action: {type: action}};
231 } 232 }
232 233
233 function hasNonASCI(obj) 234 function hasNonASCI(obj)
234 { 235 {
235 if (typeof obj == "string") 236 if (typeof obj == "string")
236 { 237 {
237 if (/[^\x00-\x7F]/.test(obj)) 238 if (/[^\x00-\x7F]/.test(obj))
238 return true; 239 return true;
239 } 240 }
240 241
241 if (typeof obj == "object") 242 if (typeof obj == "object")
242 { 243 {
243 var i;
244 if (obj instanceof Array) 244 if (obj instanceof Array)
245 for (i = 0; i < obj.length; i++) 245 for (let item of obj)
246 if (hasNonASCI(obj[i])) 246 if (hasNonASCI(item))
247 return true; 247 return true;
248 248
249 var names = Object.getOwnPropertyNames(obj); 249 for (let name of Object.getOwnPropertyNames(obj))
250 for (i = 0; i < names.length; i++) 250 if (hasNonASCI(obj[name]))
251 if (hasNonASCI(obj[names[i]]))
252 return true; 251 return true;
253 } 252 }
254 253
255 return false; 254 return false;
256 } 255 }
257 256
258 function logRules() 257 function logRules()
259 { 258 {
260 var rules = []; 259 let rules = [];
261 var i;
262 260
263 function addRule(rule) 261 function addRule(rule)
264 { 262 {
265 if (!hasNonASCI(rule)) 263 if (!hasNonASCI(rule))
266 rules.push(rule); 264 rules.push(rule);
267 } 265 }
268 266
269 let groupedElemhideFilters = new Map(); 267 let groupedElemhideFilters = new Map();
270 for (let filter of elemhideFilters) 268 for (let filter of elemhideFilters)
Sebastian Noack 2016/02/15 14:33:11 Does these ES2015 features even work in node.js wi
kzar 2016/02/15 18:19:20 (As discussed in IRC we will use ES2015 features t
271 { 269 {
272 let result = convertElemHideFilter(filter); 270 let result = convertElemHideFilter(filter);
273 if (!result) 271 if (!result)
274 continue; 272 continue;
275 let targetDomains = result[0]; 273
276 let selector = result[1]; 274 if (result.matchDomains.length == 0)
277 275 result.matchDomains = ["^https?://"];
278 if (targetDomains.length == 0) 276
279 targetDomains = ["^https?://"]; 277 for (let matchDomain of result.matchDomains)
280 278 {
281 for (let domain of targetDomains) 279 let group = groupedElemhideFilters.get(matchDomain) || [];
282 { 280 group.push(result.selector);
283 if (!groupedElemhideFilters.has(domain)) 281 groupedElemhideFilters.set(matchDomain, group);
284 groupedElemhideFilters.set(domain, []); 282 }
285 groupedElemhideFilters.get(domain).push(selector); 283 }
286 } 284
287 } 285 groupedElemhideFilters.forEach((selectors, matchDomain) =>
288 286 {
289 groupedElemhideFilters.forEach((selectors, domain) => 287 while (selectors.length)
290 { 288 {
291 let rule = {trigger: {"url-filter": domain}, 289 addRule({
292 action: {type: "css-display-none"}}; 290 trigger: {"url-filter": matchDomain},
293 291 action: {type: "css-display-none",
294 if (selectors.length == 1) 292 selector: selectors.splice(0, selectorLimit).join(", ")}
295 rule["action"]["selector"] = selectors[0]; 293 });
296 else 294 }
297 rule["action"]["selector"] = ":matches(" + selectors.join(", ") + ")";
kzar 2016/02/13 19:33:24 Note: I wasn't sure if we need to escape the selec
Sebastian Noack 2016/02/15 14:33:11 Well, if any CSS selector is invalid it will break
kzar 2016/02/15 18:19:20 Acknowledged.
298
299 addRule(rule);
300 }); 295 });
301 296
302 for (i = 0; i < elemhideExceptions.length; i++) 297 for (let filter of elemhideExceptions)
303 addRule(convertFilter(elemhideExceptions[i], "ignore-previous-rules", false) ); 298 addRule(convertFilter(filter, "ignore-previous-rules", false));
304 299
305 for (i = 0; i < requestFilters.length; i++) 300 for (let filter of requestFilters)
306 addRule(convertFilter(requestFilters[i], "block", true)); 301 addRule(convertFilter(filter, "block", true));
307 for (i = 0; i < requestExceptions.length; i++) 302 for (let filter of requestExceptions)
308 addRule(convertFilter(requestExceptions[i], "ignore-previous-rules", true)); 303 addRule(convertFilter(filter, "ignore-previous-rules", true));
309 304
310 console.log(JSON.stringify(rules, null, "\t")); 305 console.log(JSON.stringify(rules, null, "\t"));
311 } 306 }
312 307
313 var rl = readline.createInterface({input: process.stdin, terminal: false}); 308 let rl = readline.createInterface({input: process.stdin, terminal: false});
314 rl.on("line", parseFilter); 309 rl.on("line", parseFilter);
315 rl.on("close", logRules); 310 rl.on("close", logRules);
LEFTRIGHT
« no previous file | no next file » | Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Toggle Comments ('s')

Powered by Google App Engine
This is Rietveld