Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Side by Side Diff: lib/contentBlockerList.js

Issue 29336787: Issue 3670 - Make rules case sensitive where possible (Closed)
Patch Set: Fixed mistake with logic for * and ^, addressed other feedback Created Feb. 24, 2016, 10:18 p.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * This file is part of Adblock Plus <https://adblockplus.org/>, 2 * This file is part of Adblock Plus <https://adblockplus.org/>,
3 * Copyright (C) 2006-2016 Eyeo GmbH 3 * Copyright (C) 2006-2016 Eyeo GmbH
4 * 4 *
5 * Adblock Plus is free software: you can redistribute it and/or modify 5 * Adblock Plus is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 3 as 6 * it under the terms of the GNU General Public License version 3 as
7 * published by the Free Software Foundation. 7 * published by the Free Software Foundation.
8 * 8 *
9 * Adblock Plus is distributed in the hope that it will be useful, 9 * Adblock Plus is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after
43 } 43 }
44 } 44 }
45 45
46 function escapeRegExp(s) 46 function escapeRegExp(s)
47 { 47 {
48 return s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); 48 return s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
49 } 49 }
50 50
51 function matchDomain(domain) 51 function matchDomain(domain)
52 { 52 {
53 return "^https?://([^/:]*\\.)?" + escapeRegExp(domain) + "[/:]"; 53 return "^https?://([^/:]*\\.)?" + escapeRegExp(domain).toLowerCase() + "[/:]";
54 } 54 }
55 55
56 function convertElemHideFilter(filter, elemhideSelectorExceptions) 56 function convertElemHideFilter(filter, elemhideSelectorExceptions)
57 { 57 {
58 let included = []; 58 let included = [];
59 let excluded = []; 59 let excluded = [];
60 let rules = []; 60 let rules = [];
61 61
62 parseDomains(filter.domains, included, excluded); 62 parseDomains(filter.domains, included, excluded);
63 63
64 if (excluded.length == 0 && !(filter.selector in elemhideSelectorExceptions)) 64 if (excluded.length == 0 && !(filter.selector in elemhideSelectorExceptions))
65 return {matchDomains: included.map(matchDomain), selector: filter.selector}; 65 return {matchDomains: included.map(matchDomain), selector: filter.selector};
66 } 66 }
67 67
68 // Convert the "regexpSource" part of a filter's text to a regular expression,
69 // also deciding if the expression can safely be converted to and matched as
70 // lowercase.
68 function toRegExp(text) 71 function toRegExp(text)
69 { 72 {
70 let result = []; 73 let result = [];
71 let lastIndex = text.length - 1; 74 let lastIndex = text.length - 1;
75 let hostnameStarted = false;
76 let hostnameFinished = false;
77 let caseSensitive = false;
72 78
73 for (let i = 0; i < text.length; i++) 79 for (let i = 0; i < text.length; i++)
74 { 80 {
75 let c = text[i]; 81 let c = text[i];
76 82
77 switch (c) 83 switch (c)
78 { 84 {
79 case "*": 85 case "*":
86 if (hostnameStarted)
87 hostnameFinished = true;
80 if (result.length > 0 && i < lastIndex && text[i + 1] != "*") 88 if (result.length > 0 && i < lastIndex && text[i + 1] != "*")
81 result.push(".*"); 89 result.push(".*");
82 break; 90 break;
83 case "^": 91 case "^":
92 if (hostnameStarted)
93 hostnameFinished = true;
84 if (i < lastIndex) 94 if (i < lastIndex)
85 result.push("."); 95 result.push(".");
86 break; 96 break;
87 case "|": 97 case "|":
88 if (i == 0) 98 if (i == 0)
89 { 99 {
90 result.push("^"); 100 result.push("^");
91 break; 101 break;
92 } 102 }
93 if (i == lastIndex) 103 if (i == lastIndex)
94 { 104 {
95 result.push("$"); 105 result.push("$");
96 break; 106 break;
97 } 107 }
98 if (i == 1 && text[0] == "|") 108 if (i == 1 && text[0] == "|")
99 { 109 {
100 result.push("https?://"); 110 result.push("https?://");
111 hostnameStarted = caseSensitive = true;
Sebastian Noack 2016/02/24 22:53:49 Nit: Sometimes you have the regular expression con
kzar 2016/02/24 23:15:20 Done.
101 break; 112 break;
102 } 113 }
103 case ".": case "+": case "?": case "$": 114 result.push("\\", c);
104 case "{": case "}": case "(": case ")": 115 break;
105 case "[": case "]": case "\\": 116 case "/":
117 if (hostnameStarted)
118 hostnameFinished = true;
119 else if (text.charAt(i-2) == ":" && text.charAt(i-1) == "/")
120 hostnameStarted = caseSensitive = true;
121 result.push("/");
Sebastian Noack 2016/02/24 22:53:49 If we move this case just above the default case,
kzar 2016/02/24 23:15:20 Done.
122 break;
123 case "?":
124 if (hostnameStarted)
125 hostnameFinished = true;
126 case ".": case "+": case "$": case "{":
Sebastian Noack 2016/02/24 22:53:48 Nit: It doesn't really matter, but you wrap after
kzar 2016/02/24 23:15:20 Done.
127 case "}": case "(": case ")": case "[":
128 case "]": case "\\":
106 result.push("\\", c); 129 result.push("\\", c);
107 break; 130 break;
108 default: 131 default:
132 if (hostnameFinished && (c >= "a" && c <= "z" ||
133 c >= "A" && c <= "Z"))
134 caseSensitive = false;
109 result.push(c); 135 result.push(c);
110 } 136 }
111 } 137 }
112 138
113 return result.join(""); 139 return {regexp: result.join(""), caseSensitive: caseSensitive};
114 } 140 }
115 141
116 function getRegExpSource(filter) 142 function getRegExpTrigger(filter)
117 { 143 {
118 let source = toRegExp(filter.regexpSource.replace( 144 let result = toRegExp(filter.regexpSource.replace(
119 // Safari expects punycode, filter lists use unicode 145 // Safari expects punycode, filter lists use unicode
120 /^(\|\||\|?https?:\/\/)([\w\-.*\u0080-\uFFFF]+)/i, 146 /^(\|\||\|?https?:\/\/)([\w\-.*\u0080-\uFFFF]+)/i,
121 function (match, prefix, domain) 147 function (match, prefix, domain)
122 { 148 {
123 return prefix + punycode.toASCII(domain); 149 return prefix + punycode.toASCII(domain);
124 } 150 }
125 )); 151 ));
126 152
153 let trigger = {"url-filter": result.regexp};
154
127 // Limit rules to to HTTP(S) URLs 155 // Limit rules to to HTTP(S) URLs
128 if (!/^(\^|http)/i.test(source)) 156 if (!/^(\^|http)/i.test(trigger["url-filter"]))
129 source = "^https?://.*" + source; 157 trigger["url-filter"] = "^https?://.*" + trigger["url-filter"];
130 158
131 return source; 159 // For rules containing only a hostname we know that we're matching against
160 // a lowercase string and can therefore enable case sensitive matching.
161 if (result.caseSensitive)
Sebastian Noack 2016/02/24 22:53:49 I think the logic here would be a little more stra
Sebastian Noack 2016/02/24 23:07:37 Ah wait, the logic is incorrect anyway, with eithe
kzar 2016/02/24 23:15:20 Done.
162 {
163 trigger["url-filter"] = trigger["url-filter"].toLowerCase();
164 trigger["url-filter-is-case-sensitive"] = true;
165 }
166
167 return trigger;
132 } 168 }
133 169
134 function getResourceTypes(filter) 170 function getResourceTypes(filter)
135 { 171 {
136 let types = []; 172 let types = [];
137 173
138 if (filter.contentType & typeMap.IMAGE) 174 if (filter.contentType & typeMap.IMAGE)
139 types.push("image"); 175 types.push("image");
140 if (filter.contentType & typeMap.STYLESHEET) 176 if (filter.contentType & typeMap.STYLESHEET)
141 types.push("style-sheet"); 177 types.push("style-sheet");
(...skipping 26 matching lines...) Expand all
168 204
169 if (tldjs.getDomain(domain) == domain) 205 if (tldjs.getDomain(domain) == domain)
170 result.push("www." + domain); 206 result.push("www." + domain);
171 } 207 }
172 208
173 return result; 209 return result;
174 } 210 }
175 211
176 function convertFilter(filter, action, withResourceTypes) 212 function convertFilter(filter, action, withResourceTypes)
177 { 213 {
178 let trigger = {"url-filter": getRegExpSource(filter)}; 214 let trigger = getRegExpTrigger(filter);
179 let included = []; 215 let included = [];
180 let excluded = []; 216 let excluded = [];
181 217
182 parseDomains(filter.domains, included, excluded); 218 if (filter.matchCase)
219 trigger["url-filter-is-case-sensitive"] = true;
220
221 parseDomains(filter.domains, included, excluded);
183 222
184 if (withResourceTypes) 223 if (withResourceTypes)
185 trigger["resource-type"] = getResourceTypes(filter); 224 trigger["resource-type"] = getResourceTypes(filter);
186 if (filter.thirdParty != null) 225 if (filter.thirdParty != null)
187 trigger["load-type"] = [filter.thirdParty ? "third-party" : "first-party"]; 226 trigger["load-type"] = [filter.thirdParty ? "third-party" : "first-party"];
188 227
189 if (included.length > 0) 228 if (included.length > 0)
190 trigger["if-domain"] = addDomainPrefix(included); 229 trigger["if-domain"] = addDomainPrefix(included);
191 else if (excluded.length > 0) 230 else if (excluded.length > 0)
192 trigger["unless-domain"] = addDomainPrefix(excluded); 231 trigger["unless-domain"] = addDomainPrefix(excluded);
(...skipping 177 matching lines...) Expand 10 before | Expand all | Expand 10 after
370 { 409 {
371 while (selectors.length) 410 while (selectors.length)
372 { 411 {
373 let selector = selectors.splice(0, selectorLimit).join(", "); 412 let selector = selectors.splice(0, selectorLimit).join(", ");
374 413
375 // As of Safari 9.0 element IDs are matched as lowercase. We work around 414 // As of Safari 9.0 element IDs are matched as lowercase. We work around
376 // this by converting to the attribute format [id="elementID"] 415 // this by converting to the attribute format [id="elementID"]
377 selector = convertIDSelectorsToAttributeSelectors(selector); 416 selector = convertIDSelectorsToAttributeSelectors(selector);
378 417
379 addRule({ 418 addRule({
380 trigger: {"url-filter": matchDomain}, 419 trigger: {"url-filter": matchDomain,
420 "url-filter-is-case-sensitive": true},
381 action: {type: "css-display-none", 421 action: {type: "css-display-none",
382 selector: selector} 422 selector: selector}
383 }); 423 });
384 } 424 }
385 }); 425 });
386 426
387 for (let filter of this.elemhideExceptions) 427 for (let filter of this.elemhideExceptions)
388 addRule(convertFilter(filter, "ignore-previous-rules", false)); 428 addRule(convertFilter(filter, "ignore-previous-rules", false));
389 for (let filter of this.requestFilters) 429 for (let filter of this.requestFilters)
390 addRule(convertFilter(filter, "block", true)); 430 addRule(convertFilter(filter, "block", true));
391 for (let filter of this.requestExceptions) 431 for (let filter of this.requestExceptions)
392 addRule(convertFilter(filter, "ignore-previous-rules", true)); 432 addRule(convertFilter(filter, "ignore-previous-rules", true));
393 433
394 return rules; 434 return rules;
395 }; 435 };
OLDNEW
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld