Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Side by Side Diff: lib/abp2blocklist.js

Issue 29340694: Issue 3956 - Convert domain whitelisting filters (Closed)
Patch Set: Created April 20, 2016, 5:09 p.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * This file is part of Adblock Plus <https://adblockplus.org/>, 2 * This file is part of Adblock Plus <https://adblockplus.org/>,
3 * Copyright (C) 2006-2016 Eyeo GmbH 3 * Copyright (C) 2006-2016 Eyeo GmbH
4 * 4 *
5 * Adblock Plus is free software: you can redistribute it and/or modify 5 * Adblock Plus is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 3 as 6 * it under the terms of the GNU General Public License version 3 as
7 * published by the Free Software Foundation. 7 * published by the Free Software Foundation.
8 * 8 *
9 * Adblock Plus is distributed in the hope that it will be useful, 9 * Adblock Plus is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
(...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after
59 let excluded = []; 59 let excluded = [];
60 let rules = []; 60 let rules = [];
61 61
62 parseDomains(filter.domains, included, excluded); 62 parseDomains(filter.domains, included, excluded);
63 63
64 if (excluded.length == 0 && !(filter.selector in elemhideSelectorExceptions)) 64 if (excluded.length == 0 && !(filter.selector in elemhideSelectorExceptions))
65 return {matchDomains: included.map(matchDomain), selector: filter.selector}; 65 return {matchDomains: included.map(matchDomain), selector: filter.selector};
66 } 66 }
67 67
68 /** 68 /**
69 * Convert the given filter "regexpSource" string into a regular expression, 69 * Parse the given filter "regexpSource" string. Producing a regular expression,
70 * handling the conversion of unicode inside hostnames to punycode. 70 * extracting the hostname (if any), deciding if the regular expression is safe
71 * (Also deciding if the regular expression can be safely converted to and 71 * to be converted + matched as lower case and noting if the source contains
72 * matched as lower case or not.) 72 * anything after the hostname.)
73 * 73 *
74 * @param {string} text regexpSource property of a filter 74 * @param {string} text regexpSource property of a filter
75 * @returns {object} An object containing a regular expression string and a bool 75 * @returns {object} An object containing a regular expression string, a bool
76 * indicating if the filter can be safely matched as lower 76 * indicating if the filter can be safely matched as lower
77 * case: {regexp: "...", canSafelyMatchAsLowercase: true/false } 77 * case, a hostname string (or undefined) and a bool
78 * indicating if the source only contains a hostname or not:
79 * {regexp: "...",
80 * canSafelyMatchAsLowercase: true/false,
81 * hostname: "...",
82 * justHostname: true/false}
78 */ 83 */
79 function toRegExp(text) 84 function parseFilterRegexpSource(text)
80 { 85 {
81 let result = []; 86 let regexp = [];
82 let lastIndex = text.length - 1; 87 let lastIndex = text.length - 1;
88 let hostname;
83 let hostnameStart = null; 89 let hostnameStart = null;
84 let hostnameFinished = false; 90 let hostnameFinished = false;
91 let justHostname = false;
85 let canSafelyMatchAsLowercase = false; 92 let canSafelyMatchAsLowercase = false;
86 93
87 for (let i = 0; i < text.length; i++) 94 for (let i = 0; i < text.length; i++)
88 { 95 {
89 let c = text[i]; 96 let c = text[i];
90 97
98 if (hostnameFinished)
99 justHostname = false;
100
91 // If we're currently inside the hostname we have to be careful not to 101 // If we're currently inside the hostname we have to be careful not to
92 // escape any characters until after we have converted it to punycode. 102 // escape any characters until after we have converted it to punycode.
93 if (hostnameStart != null && !hostnameFinished) 103 if (hostnameStart != null && !hostnameFinished)
94 { 104 {
95 let endingChar = (c == "*" || c == "^" || 105 let endingChar = (c == "*" || c == "^" ||
96 c == "?" || c == "/" || c == "|"); 106 c == "?" || c == "/" || c == "|");
97 if (!endingChar && i != lastIndex) 107 if (!endingChar && i != lastIndex)
98 continue; 108 continue;
99 109
100 let hostname = text.substring(hostnameStart, endingChar ? i : i + 1); 110 hostname = punycode.toASCII(
101 hostnameFinished = true; 111 text.substring(hostnameStart, endingChar ? i : i + 1)
102 result.push(escapeRegExp(punycode.toASCII(hostname))); 112 );
113 hostnameFinished = justHostname = true;
114 regexp.push(escapeRegExp(hostname));
103 if (!endingChar) 115 if (!endingChar)
104 break; 116 break;
105 } 117 }
106 118
107 switch (c) 119 switch (c)
108 { 120 {
109 case "*": 121 case "*":
110 if (result.length > 0 && i < lastIndex && text[i + 1] != "*") 122 if (regexp.length > 0 && i < lastIndex && text[i + 1] != "*")
111 result.push(".*"); 123 regexp.push(".*");
112 break; 124 break;
113 case "^": 125 case "^":
114 if (i < lastIndex) 126 if (i < lastIndex)
115 result.push("."); 127 regexp.push(".");
116 break; 128 break;
117 case "|": 129 case "|":
118 if (i == 0) 130 if (i == 0)
119 { 131 {
120 result.push("^"); 132 regexp.push("^");
121 break; 133 break;
122 } 134 }
123 if (i == lastIndex) 135 if (i == lastIndex)
124 { 136 {
125 result.push("$"); 137 regexp.push("$");
126 break; 138 break;
127 } 139 }
128 if (i == 1 && text[0] == "|") 140 if (i == 1 && text[0] == "|")
129 { 141 {
130 hostnameStart = i + 1; 142 hostnameStart = i + 1;
131 canSafelyMatchAsLowercase = true; 143 canSafelyMatchAsLowercase = true;
132 result.push("https?://"); 144 regexp.push("https?://");
133 break; 145 break;
134 } 146 }
135 result.push("\\|"); 147 regexp.push("\\|");
136 break; 148 break;
137 case "/": 149 case "/":
138 if (!hostnameFinished && 150 if (!hostnameFinished &&
139 text.charAt(i-2) == ":" && text.charAt(i-1) == "/") 151 text.charAt(i-2) == ":" && text.charAt(i-1) == "/")
140 { 152 {
141 hostnameStart = i + 1; 153 hostnameStart = i + 1;
142 canSafelyMatchAsLowercase = true; 154 canSafelyMatchAsLowercase = true;
143 } 155 }
144 result.push("/"); 156 regexp.push("/");
145 break; 157 break;
146 case ".": case "+": case "$": case "?": 158 case ".": case "+": case "$": case "?":
147 case "{": case "}": case "(": case ")": 159 case "{": case "}": case "(": case ")":
148 case "[": case "]": case "\\": 160 case "[": case "]": case "\\":
149 result.push("\\", c); 161 regexp.push("\\", c);
150 break; 162 break;
151 default: 163 default:
152 if (hostnameFinished && (c >= "a" && c <= "z" || 164 if (hostnameFinished && (c >= "a" && c <= "z" ||
153 c >= "A" && c <= "Z")) 165 c >= "A" && c <= "Z"))
154 canSafelyMatchAsLowercase = false; 166 canSafelyMatchAsLowercase = false;
155 result.push(c); 167 regexp.push(c);
156 } 168 }
157 } 169 }
158 170
159 return {regexp: result.join(""), 171 return {
160 canSafelyMatchAsLowercase: canSafelyMatchAsLowercase}; 172 regexp: regexp.join(""),
161 } 173 canSafelyMatchAsLowercase: canSafelyMatchAsLowercase,
162 174 hostname: hostname,
163 function getRegExpTrigger(filter) 175 justHostname: justHostname
164 { 176 };
165 let result = toRegExp(filter.regexpSource);
166
167 let trigger = {"url-filter": result.regexp};
168
169 // Limit rules to to HTTP(S) URLs
170 if (!/^(\^|http)/i.test(trigger["url-filter"]))
171 trigger["url-filter"] = "^https?://.*" + trigger["url-filter"];
172
173 // For rules containing only a hostname we know that we're matching against
174 // a lowercase string unless the matchCase option was passed.
175 if (result.canSafelyMatchAsLowercase && !filter.matchCase)
176 trigger["url-filter"] = trigger["url-filter"].toLowerCase();
177
178 if (result.canSafelyMatchAsLowercase || filter.matchCase)
179 trigger["url-filter-is-case-sensitive"] = true;
180
181 return trigger;
182 } 177 }
183 178
184 function getResourceTypes(filter) 179 function getResourceTypes(filter)
185 { 180 {
186 let types = []; 181 let types = [];
187 182
188 if (filter.contentType & typeMap.IMAGE) 183 if (filter.contentType & typeMap.IMAGE)
189 types.push("image"); 184 types.push("image");
190 if (filter.contentType & typeMap.STYLESHEET) 185 if (filter.contentType & typeMap.STYLESHEET)
191 types.push("style-sheet"); 186 types.push("style-sheet");
(...skipping 26 matching lines...) Expand all
218 213
219 if (tldjs.getDomain(domain) == domain) 214 if (tldjs.getDomain(domain) == domain)
220 result.push("www." + domain); 215 result.push("www." + domain);
221 } 216 }
222 217
223 return result; 218 return result;
224 } 219 }
225 220
226 function convertFilter(filter, action, withResourceTypes) 221 function convertFilter(filter, action, withResourceTypes)
227 { 222 {
228 let trigger = getRegExpTrigger(filter); 223 let parsed = parseFilterRegexpSource(filter.regexpSource);
224
225 // For the special case of $document whitelisting filters with just a domain
226 // we can generate an equivalent blocking rule exception using if-domain.
227 if (filter.contentType == typeMap.DOCUMENT && parsed.justHostname)
Sebastian Noack 2016/05/12 12:12:25 For filters like example.com$document,image we wou
kzar 2016/05/16 16:22:36 Done.
228 return {trigger: {"url-filter": ".*",
Sebastian Noack 2016/05/12 12:12:26 Nit: Mind wrapping the nested object for better re
Sebastian Noack 2016/05/12 12:12:26 Wouldn't an empty string be sufficient as url-filt
kzar 2016/05/16 16:22:36 Done.
kzar 2016/05/16 16:22:36 Unfortunately this causes a "Extension compilation
229 "if-domain": addDomainPrefix([parsed.hostname])},
230 action: {type: "ignore-previous-rules"}};
231
232 let trigger = {"url-filter": parsed.regexp};
233
234 // Limit rules to to HTTP(S) URLs
Sebastian Noack 2016/05/12 12:12:26 Typo: to to
kzar 2016/05/16 16:22:36 Done.
235 if (!/^(\^|http)/i.test(trigger["url-filter"]))
236 trigger["url-filter"] = "^https?://.*" + trigger["url-filter"];
237
238 // For rules containing only a hostname we know that we're matching against
239 // a lowercase string unless the matchCase option was passed.
240 if (parsed.canSafelyMatchAsLowercase && !filter.matchCase)
241 trigger["url-filter"] = trigger["url-filter"].toLowerCase();
242
243 if (parsed.canSafelyMatchAsLowercase || filter.matchCase)
244 trigger["url-filter-is-case-sensitive"] = true;
245
229 let included = []; 246 let included = [];
230 let excluded = []; 247 let excluded = [];
231 248
232 parseDomains(filter.domains, included, excluded); 249 parseDomains(filter.domains, included, excluded);
233 250
234 if (withResourceTypes) 251 if (withResourceTypes)
235 trigger["resource-type"] = getResourceTypes(filter); 252 trigger["resource-type"] = getResourceTypes(filter);
236 if (filter.thirdParty != null) 253 if (filter.thirdParty != null)
237 trigger["load-type"] = [filter.thirdParty ? "third-party" : "first-party"]; 254 trigger["load-type"] = [filter.thirdParty ? "third-party" : "first-party"];
238 255
(...skipping 106 matching lines...) Expand 10 before | Expand all | Expand 10 after
345 return; 362 return;
346 if (filter instanceof filterClasses.RegExpFilter && 363 if (filter instanceof filterClasses.RegExpFilter &&
347 filter.regexpSource == null) 364 filter.regexpSource == null)
348 return; 365 return;
349 366
350 if (filter instanceof filterClasses.BlockingFilter) 367 if (filter instanceof filterClasses.BlockingFilter)
351 this.requestFilters.push(filter); 368 this.requestFilters.push(filter);
352 369
353 if (filter instanceof filterClasses.WhitelistFilter) 370 if (filter instanceof filterClasses.WhitelistFilter)
354 { 371 {
355 if (filter.contentType & (typeMap.IMAGE 372 if (filter.contentType & (typeMap.DOCUMENT
373 | typeMap.IMAGE
356 | typeMap.STYLESHEET 374 | typeMap.STYLESHEET
357 | typeMap.SCRIPT 375 | typeMap.SCRIPT
358 | typeMap.FONT 376 | typeMap.FONT
359 | typeMap.MEDIA 377 | typeMap.MEDIA
360 | typeMap.POPUP 378 | typeMap.POPUP
361 | typeMap.OBJECT 379 | typeMap.OBJECT
362 | typeMap.OBJECT_SUBREQUEST 380 | typeMap.OBJECT_SUBREQUEST
363 | typeMap.XMLHTTPREQUEST 381 | typeMap.XMLHTTPREQUEST
364 | typeMap.PING 382 | typeMap.PING
365 | typeMap.SUBDOCUMENT 383 | typeMap.SUBDOCUMENT
(...skipping 71 matching lines...) Expand 10 before | Expand all | Expand 10 after
437 455
438 for (let filter of this.elemhideExceptions) 456 for (let filter of this.elemhideExceptions)
439 addRule(convertFilter(filter, "ignore-previous-rules", false)); 457 addRule(convertFilter(filter, "ignore-previous-rules", false));
440 for (let filter of this.requestFilters) 458 for (let filter of this.requestFilters)
441 addRule(convertFilter(filter, "block", true)); 459 addRule(convertFilter(filter, "block", true));
442 for (let filter of this.requestExceptions) 460 for (let filter of this.requestExceptions)
443 addRule(convertFilter(filter, "ignore-previous-rules", true)); 461 addRule(convertFilter(filter, "ignore-previous-rules", true));
444 462
445 return rules; 463 return rules;
446 }; 464 };
OLDNEW
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld