Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Side by Side Diff: lib/abp2blocklist.js

Issue 29337803: Issue 3710 - Unify hostname logic (Closed)
Patch Set: Created Feb. 27, 2016, 2:23 p.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View unified diff | Download patch
« no previous file with comments | « abp2blocklist.js ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * This file is part of Adblock Plus <https://adblockplus.org/>, 2 * This file is part of Adblock Plus <https://adblockplus.org/>,
3 * Copyright (C) 2006-2016 Eyeo GmbH 3 * Copyright (C) 2006-2016 Eyeo GmbH
4 * 4 *
5 * Adblock Plus is free software: you can redistribute it and/or modify 5 * Adblock Plus is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 3 as 6 * it under the terms of the GNU General Public License version 3 as
7 * published by the Free Software Foundation. 7 * published by the Free Software Foundation.
8 * 8 *
9 * Adblock Plus is distributed in the hope that it will be useful, 9 * Adblock Plus is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
(...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after
59 let excluded = []; 59 let excluded = [];
60 let rules = []; 60 let rules = [];
61 61
62 parseDomains(filter.domains, included, excluded); 62 parseDomains(filter.domains, included, excluded);
63 63
64 if (excluded.length == 0 && !(filter.selector in elemhideSelectorExceptions)) 64 if (excluded.length == 0 && !(filter.selector in elemhideSelectorExceptions))
65 return {matchDomains: included.map(matchDomain), selector: filter.selector}; 65 return {matchDomains: included.map(matchDomain), selector: filter.selector};
66 } 66 }
67 67
68 /** 68 /**
69 * Convert the given filter "regexpSource" string into a regular expression. 69 * Convert the given filter "regexpSource" string into a regular expression,
70 * handling the conversion of unicode inside hostnames to punycode.
70 * (Also deciding if the regular expression can be safely converted to and 71 * (Also deciding if the regular expression can be safely converted to and
71 * matched as lower case or not.) 72 * matched as lower case or not.)
72 * 73 *
73 * @param {string} text regexpSource property of a filter 74 * @param {string} text regexpSource property of a filter
74 * @returns {object} An object containing a regular expression string and a bool 75 * @returns {object} An object containing a regular expression string and a bool
75 * indicating if the filter can be safely matched as lower 76 * indicating if the filter can be safely matched as lower
76 * case: {regexp: "...", caseSenstive: true/false} 77 * case: {regexp: "...", canSafelyMatchAsLowercase: true/false }
77 */ 78 */
78 function toRegExp(text) 79 function toRegExp(text)
79 { 80 {
80 let result = []; 81 let result = [];
81 let lastIndex = text.length - 1; 82 let lastIndex = text.length - 1;
82 let hostnameStarted = false; 83 let hostnameStart = null;
83 let hostnameFinished = false; 84 let hostnameFinished = false;
84 let caseSensitive = false; 85 let canSafelyMatchAsLowercase = false;
85 86
86 for (let i = 0; i < text.length; i++) 87 for (let i = 0; i < text.length; i++)
87 { 88 {
88 let c = text[i]; 89 let c = text[i];
89 90
91 // If we're currently inside the hostname we have to be careful not to
92 // escape any characters until after we have converted it to punycode.
93 if (hostnameStart != null && !hostnameFinished)
94 {
95 if (c == "*" || c == "^" || c == "?" || c == "/" || i == lastIndex)
Sebastian Noack 2016/02/27 20:30:38 If you turn the logic here the other way around, y
Sebastian Noack 2016/02/27 20:30:38 I'm not entirely sure if the case of last index is
kzar 2016/02/27 21:28:53 Done.
kzar 2016/02/27 21:28:53 Good point but it's even more complicated, what if
96 {
97 hostnameFinished = true;
98 let hostname = text.substring(hostnameStart, i);
99 result.push(escapeRegExp(punycode.toASCII(hostname)));
100 }
101 else
102 continue;
103 }
104
90 switch (c) 105 switch (c)
91 { 106 {
92 case "*": 107 case "*":
93 if (hostnameStarted)
94 hostnameFinished = true;
95 if (result.length > 0 && i < lastIndex && text[i + 1] != "*") 108 if (result.length > 0 && i < lastIndex && text[i + 1] != "*")
96 result.push(".*"); 109 result.push(".*");
97 break; 110 break;
98 case "^": 111 case "^":
99 if (hostnameStarted)
100 hostnameFinished = true;
101 if (i < lastIndex) 112 if (i < lastIndex)
102 result.push("."); 113 result.push(".");
103 break; 114 break;
104 case "|": 115 case "|":
105 if (i == 0) 116 if (i == 0)
106 { 117 {
107 result.push("^"); 118 result.push("^");
108 break; 119 break;
109 } 120 }
110 if (i == lastIndex) 121 if (i == lastIndex)
111 { 122 {
112 result.push("$"); 123 result.push("$");
113 break; 124 break;
114 } 125 }
115 if (i == 1 && text[0] == "|") 126 if (i == 1 && text[0] == "|")
116 { 127 {
117 hostnameStarted = caseSensitive = true;
118 result.push("https?://"); 128 result.push("https?://");
Sebastian Noack 2016/02/27 20:30:38 Nit: Mind moving that line to just above the break
kzar 2016/02/27 21:28:53 Done.
129 hostnameStart = i + 1;
130 canSafelyMatchAsLowercase = true;
119 break; 131 break;
120 } 132 }
121 result.push("\\", c); 133 result.push("\\|");
122 break; 134 break;
123 case "?": 135 case "/":
124 if (hostnameStarted) 136 result.push("/");
Sebastian Noack 2016/02/27 20:30:38 Nit: Mind moving that line to just above the break
kzar 2016/02/27 21:28:53 Done.
125 hostnameFinished = true; 137 if (!hostnameFinished &&
Sebastian Noack 2016/02/27 20:30:38 Nit: It doesn't matter, but I personally find that
kzar 2016/02/27 21:28:53 I'd rather leave this one as it is.
126 case ".": case "+": case "$": case "{": case "}": 138 text.charAt(i-2) == ":" && text.charAt(i-1) == "/")
139 {
140 hostnameStart = i + 1;
141 canSafelyMatchAsLowercase = true;
142 }
143 break;
144 case ".": case "+": case "$": case "{": case "}": case "?":
Sebastian Noack 2016/02/27 20:30:38 Nit: I think the way this block was originally wra
kzar 2016/02/27 21:28:53 Done.
127 case "(": case ")": case "[": case "]": case "\\": 145 case "(": case ")": case "[": case "]": case "\\":
128 result.push("\\", c); 146 result.push("\\", c);
129 break; 147 break;
130 case "/":
131 if (hostnameStarted)
132 hostnameFinished = true;
133 else if (text.charAt(i-2) == ":" && text.charAt(i-1) == "/")
134 hostnameStarted = caseSensitive = true;
kzar 2016/02/27 14:29:34 (I've switched this around as I decided that sneak
135 default: 148 default:
136 if (hostnameFinished && (c >= "a" && c <= "z" || 149 if (hostnameFinished && (c >= "a" && c <= "z" ||
137 c >= "A" && c <= "Z")) 150 c >= "A" && c <= "Z"))
138 caseSensitive = false; 151 canSafelyMatchAsLowercase = false;
139 result.push(c); 152 result.push(c);
140 } 153 }
141 } 154 }
142 155
143 return {regexp: result.join(""), caseSensitive: caseSensitive}; 156 return {regexp: result.join(""),
157 canSafelyMatchAsLowercase: canSafelyMatchAsLowercase};
144 } 158 }
145 159
146 function getRegExpTrigger(filter) 160 function getRegExpTrigger(filter)
147 { 161 {
148 let result = toRegExp(filter.regexpSource.replace( 162 let result = toRegExp(filter.regexpSource);
149 // Safari expects punycode, filter lists use unicode
150 /^(\|\||\|?https?:\/\/)([\w\-.*\u0080-\uFFFF]+)/i,
151 function (match, prefix, domain)
152 {
153 return prefix + punycode.toASCII(domain);
154 }
155 ));
156 163
157 let trigger = {"url-filter": result.regexp}; 164 let trigger = {"url-filter": result.regexp};
158 165
159 // Limit rules to to HTTP(S) URLs 166 // Limit rules to to HTTP(S) URLs
160 if (!/^(\^|http)/i.test(trigger["url-filter"])) 167 if (!/^(\^|http)/i.test(trigger["url-filter"]))
161 trigger["url-filter"] = "^https?://.*" + trigger["url-filter"]; 168 trigger["url-filter"] = "^https?://.*" + trigger["url-filter"];
162 169
163 // For rules containing only a hostname we know that we're matching against 170 // For rules containing only a hostname we know that we're matching against
164 // a lowercase string unless the matchCase option was passed. 171 // a lowercase string unless the matchCase option was passed.
165 if (result.caseSensitive && !filter.matchCase) 172 if (result.canSafelyMatchAsLowercase && !filter.matchCase)
166 trigger["url-filter"] = trigger["url-filter"].toLowerCase(); 173 trigger["url-filter"] = trigger["url-filter"].toLowerCase();
167 174
168 if (result.caseSensitive || filter.matchCase) 175 if (result.canSafelyMatchAsLowercase || filter.matchCase)
169 trigger["url-filter-is-case-sensitive"] = true; 176 trigger["url-filter-is-case-sensitive"] = true;
170 177
171 return trigger; 178 return trigger;
172 } 179 }
173 180
174 function getResourceTypes(filter) 181 function getResourceTypes(filter)
175 { 182 {
176 let types = []; 183 let types = [];
177 184
178 if (filter.contentType & typeMap.IMAGE) 185 if (filter.contentType & typeMap.IMAGE)
(...skipping 248 matching lines...) Expand 10 before | Expand all | Expand 10 after
427 434
428 for (let filter of this.elemhideExceptions) 435 for (let filter of this.elemhideExceptions)
429 addRule(convertFilter(filter, "ignore-previous-rules", false)); 436 addRule(convertFilter(filter, "ignore-previous-rules", false));
430 for (let filter of this.requestFilters) 437 for (let filter of this.requestFilters)
431 addRule(convertFilter(filter, "block", true)); 438 addRule(convertFilter(filter, "block", true));
432 for (let filter of this.requestExceptions) 439 for (let filter of this.requestExceptions)
433 addRule(convertFilter(filter, "ignore-previous-rules", true)); 440 addRule(convertFilter(filter, "ignore-previous-rules", true));
434 441
435 return rules; 442 return rules;
436 }; 443 };
OLDNEW
« no previous file with comments | « abp2blocklist.js ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld