Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Delta Between Two Patch Sets: lib/contentBlockerList.js

Issue 29336787: Issue 3670 - Make rules case sensitive where possible (Closed)
Left Patch Set: Handle "/" separately and the fall through Created Feb. 24, 2016, 9:18 p.m.
Right Patch Set: Improved documentation of toRegexp function and switched to JSDoc syntax Created Feb. 25, 2016, 3:21 p.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
Left: Side by side diff | Download
Right: Side by side diff | Download
« no previous file with change/comment | « no previous file | no next file » | no next file with change/comment »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
LEFTRIGHT
1 /* 1 /*
2 * This file is part of Adblock Plus <https://adblockplus.org/>, 2 * This file is part of Adblock Plus <https://adblockplus.org/>,
3 * Copyright (C) 2006-2016 Eyeo GmbH 3 * Copyright (C) 2006-2016 Eyeo GmbH
4 * 4 *
5 * Adblock Plus is free software: you can redistribute it and/or modify 5 * Adblock Plus is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 3 as 6 * it under the terms of the GNU General Public License version 3 as
7 * published by the Free Software Foundation. 7 * published by the Free Software Foundation.
8 * 8 *
9 * Adblock Plus is distributed in the hope that it will be useful, 9 * Adblock Plus is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
(...skipping 47 matching lines...) Expand 10 before | Expand all | Expand 10 after
58 let included = []; 58 let included = [];
59 let excluded = []; 59 let excluded = [];
60 let rules = []; 60 let rules = [];
61 61
62 parseDomains(filter.domains, included, excluded); 62 parseDomains(filter.domains, included, excluded);
63 63
64 if (excluded.length == 0 && !(filter.selector in elemhideSelectorExceptions)) 64 if (excluded.length == 0 && !(filter.selector in elemhideSelectorExceptions))
65 return {matchDomains: included.map(matchDomain), selector: filter.selector}; 65 return {matchDomains: included.map(matchDomain), selector: filter.selector};
66 } 66 }
67 67
68 // Convert the "regexpSource" part of a filter's text to a regular expression, 68 /**
69 // also deciding if the expression can safely be converted to and matched as 69 * Convert the given filter "regexpSource" string into a regular expression.
70 // lowercase. 70 * (Also deciding if the regular expression can be safely converted to and
71 * matched as lower case or not.)
72 *
73 * @param {string} text regexpSource property of a filter
74 * @returns {object} An object containing a regular expression string and a bool
75 * indicating if the filter can be safely matched as lower
76 * case: {regexp: "...", caseSenstive: true/false}
77 */
71 function toRegExp(text) 78 function toRegExp(text)
72 { 79 {
73 let result = []; 80 let result = [];
74 let lastIndex = text.length - 1; 81 let lastIndex = text.length - 1;
75 let hostnameStarted = false; 82 let hostnameStarted = false;
76 let hostnameFinished = false; 83 let hostnameFinished = false;
77 let caseSensitive = false; 84 let caseSensitive = false;
78 85
79 for (let i = 0; i < text.length; i++) 86 for (let i = 0; i < text.length; i++)
80 { 87 {
81 let c = text[i]; 88 let c = text[i];
82 89
83 switch (c) 90 switch (c)
84 { 91 {
85 case "*": 92 case "*":
93 if (hostnameStarted)
94 hostnameFinished = true;
86 if (result.length > 0 && i < lastIndex && text[i + 1] != "*") 95 if (result.length > 0 && i < lastIndex && text[i + 1] != "*")
87 result.push(".*"); 96 result.push(".*");
88 break; 97 break;
89 case "^": 98 case "^":
99 if (hostnameStarted)
100 hostnameFinished = true;
90 if (i < lastIndex) 101 if (i < lastIndex)
91 result.push("."); 102 result.push(".");
92 break; 103 break;
93 case "|": 104 case "|":
94 if (i == 0) 105 if (i == 0)
95 { 106 {
96 result.push("^"); 107 result.push("^");
97 break; 108 break;
98 } 109 }
99 if (i == lastIndex) 110 if (i == lastIndex)
100 { 111 {
101 result.push("$"); 112 result.push("$");
102 break; 113 break;
103 } 114 }
104 if (i == 1 && text[0] == "|") 115 if (i == 1 && text[0] == "|")
105 { 116 {
117 hostnameStarted = caseSensitive = true;
106 result.push("https?://"); 118 result.push("https?://");
107 hostnameStarted = caseSensitive = true;
108 break; 119 break;
109 } 120 }
110 result.push("\\", c); 121 result.push("\\", c);
111 break; 122 break;
112 case "/": 123 case "?":
113 if (!hostnameStarted && i >= 2 && text[i-2] == ":" && text[i-1] == "/")
Sebastian Noack 2016/02/24 21:47:06 Note that when you use charAt() as in my initial s
kzar 2016/02/24 22:19:45 Done.
114 {
115 hostnameStarted = caseSensitive = true;
116 result.push("/");
117 break;
Sebastian Noack 2016/02/24 21:47:07 If I get the logic here straight, there is an inco
kzar 2016/02/24 22:19:46 No, for all other occurrences of slashes we fall t
118 }
119 case "?": case "*": case "^":
Sebastian Noack 2016/02/24 21:47:06 "*" and "^" are already handled and bail out above
kzar 2016/02/24 22:19:46 Done.
120 if (hostnameStarted) 124 if (hostnameStarted)
121 hostnameFinished = true; 125 hostnameFinished = true;
122 if (c != "?") 126 case ".": case "+": case "$": case "{": case "}":
123 { 127 case "(": case ")": case "[": case "]": case "\\":
124 result.push(c);
125 break;
126 }
127 case ".": case "+": case "$": case "{":
128 case "}": case "(": case ")": case "[":
129 case "]": case "\\":
130 result.push("\\", c); 128 result.push("\\", c);
131 break; 129 break;
130 case "/":
131 if (hostnameStarted)
132 hostnameFinished = true;
133 else if (text.charAt(i-2) == ":" && text.charAt(i-1) == "/")
134 hostnameStarted = caseSensitive = true;
132 default: 135 default:
133 if (hostnameFinished && (c >= "a" && c <= "z" || c >= "A" && c <= "Z")) 136 if (hostnameFinished && (c >= "a" && c <= "z" ||
Sebastian Noack 2016/02/24 21:47:06 Note that this check can be simlified with a regex
kzar 2016/02/24 22:19:45 Done.
137 c >= "A" && c <= "Z"))
134 caseSensitive = false; 138 caseSensitive = false;
135 result.push(c); 139 result.push(c);
136 } 140 }
137 } 141 }
138 142
139 return {regexp: result.join(""), caseSensitive: caseSensitive}; 143 return {regexp: result.join(""), caseSensitive: caseSensitive};
140 } 144 }
141 145
142 function getRegExpTrigger(filter) 146 function getRegExpTrigger(filter)
143 { 147 {
144 let result = toRegExp(filter.regexpSource.replace( 148 let result = toRegExp(filter.regexpSource.replace(
145 // Safari expects punycode, filter lists use unicode 149 // Safari expects punycode, filter lists use unicode
146 /^(\|\||\|?https?:\/\/)([\w\-.*\u0080-\uFFFF]+)/i, 150 /^(\|\||\|?https?:\/\/)([\w\-.*\u0080-\uFFFF]+)/i,
147 function (match, prefix, domain) 151 function (match, prefix, domain)
148 { 152 {
149 return prefix + punycode.toASCII(domain); 153 return prefix + punycode.toASCII(domain);
150 } 154 }
151 )); 155 ));
152 156
153 let trigger = {"url-filter": result.regexp}; 157 let trigger = {"url-filter": result.regexp};
154 158
155 // Limit rules to to HTTP(S) URLs 159 // Limit rules to to HTTP(S) URLs
156 if (!/^(\^|http)/i.test(trigger["url-filter"])) 160 if (!/^(\^|http)/i.test(trigger["url-filter"]))
157 trigger["url-filter"] = "^https?://.*" + trigger["url-filter"]; 161 trigger["url-filter"] = "^https?://.*" + trigger["url-filter"];
158 162
159 // For rules containing only a hostname we know that we're matching against 163 // For rules containing only a hostname we know that we're matching against
160 // a lowercase string and can therefore enable case sensitive matching. 164 // a lowercase string unless the matchCase option was passed.
161 if (result.caseSensitive) 165 if (result.caseSensitive && !filter.matchCase)
162 {
163 trigger["url-filter"] = trigger["url-filter"].toLowerCase(); 166 trigger["url-filter"] = trigger["url-filter"].toLowerCase();
167
168 if (result.caseSensitive || filter.matchCase)
164 trigger["url-filter-is-case-sensitive"] = true; 169 trigger["url-filter-is-case-sensitive"] = true;
165 }
166 170
167 return trigger; 171 return trigger;
168 } 172 }
169 173
170 function getResourceTypes(filter) 174 function getResourceTypes(filter)
171 { 175 {
172 let types = []; 176 let types = [];
173 177
174 if (filter.contentType & typeMap.IMAGE) 178 if (filter.contentType & typeMap.IMAGE)
175 types.push("image"); 179 types.push("image");
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after
208 212
209 return result; 213 return result;
210 } 214 }
211 215
212 function convertFilter(filter, action, withResourceTypes) 216 function convertFilter(filter, action, withResourceTypes)
213 { 217 {
214 let trigger = getRegExpTrigger(filter); 218 let trigger = getRegExpTrigger(filter);
215 let included = []; 219 let included = [];
216 let excluded = []; 220 let excluded = [];
217 221
218 if (filter.matchCase) 222 parseDomains(filter.domains, included, excluded);
219 trigger["url-filter-is-case-sensitive"] = true;
220
221 parseDomains(filter.domains, included, excluded);
222 223
223 if (withResourceTypes) 224 if (withResourceTypes)
224 trigger["resource-type"] = getResourceTypes(filter); 225 trigger["resource-type"] = getResourceTypes(filter);
225 if (filter.thirdParty != null) 226 if (filter.thirdParty != null)
226 trigger["load-type"] = [filter.thirdParty ? "third-party" : "first-party"]; 227 trigger["load-type"] = [filter.thirdParty ? "third-party" : "first-party"];
227 228
228 if (included.length > 0) 229 if (included.length > 0)
229 trigger["if-domain"] = addDomainPrefix(included); 230 trigger["if-domain"] = addDomainPrefix(included);
230 else if (excluded.length > 0) 231 else if (excluded.length > 0)
231 trigger["unless-domain"] = addDomainPrefix(excluded); 232 trigger["unless-domain"] = addDomainPrefix(excluded);
(...skipping 194 matching lines...) Expand 10 before | Expand all | Expand 10 after
426 427
427 for (let filter of this.elemhideExceptions) 428 for (let filter of this.elemhideExceptions)
428 addRule(convertFilter(filter, "ignore-previous-rules", false)); 429 addRule(convertFilter(filter, "ignore-previous-rules", false));
429 for (let filter of this.requestFilters) 430 for (let filter of this.requestFilters)
430 addRule(convertFilter(filter, "block", true)); 431 addRule(convertFilter(filter, "block", true));
431 for (let filter of this.requestExceptions) 432 for (let filter of this.requestExceptions)
432 addRule(convertFilter(filter, "ignore-previous-rules", true)); 433 addRule(convertFilter(filter, "ignore-previous-rules", true));
433 434
434 return rules; 435 return rules;
435 }; 436 };
LEFTRIGHT
« no previous file | no next file » | Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Toggle Comments ('s')

Powered by Google App Engine
This is Rietveld