Left: | ||
Right: |
OLD | NEW |
---|---|
1 /* | 1 /* |
2 * This file is part of Adblock Plus <https://adblockplus.org/>, | 2 * This file is part of Adblock Plus <https://adblockplus.org/>, |
3 * Copyright (C) 2006-2016 Eyeo GmbH | 3 * Copyright (C) 2006-2016 Eyeo GmbH |
4 * | 4 * |
5 * Adblock Plus is free software: you can redistribute it and/or modify | 5 * Adblock Plus is free software: you can redistribute it and/or modify |
6 * it under the terms of the GNU General Public License version 3 as | 6 * it under the terms of the GNU General Public License version 3 as |
7 * published by the Free Software Foundation. | 7 * published by the Free Software Foundation. |
8 * | 8 * |
9 * Adblock Plus is distributed in the hope that it will be useful, | 9 * Adblock Plus is distributed in the hope that it will be useful, |
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of | 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
(...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
59 let excluded = []; | 59 let excluded = []; |
60 let rules = []; | 60 let rules = []; |
61 | 61 |
62 parseDomains(filter.domains, included, excluded); | 62 parseDomains(filter.domains, included, excluded); |
63 | 63 |
64 if (excluded.length == 0 && !(filter.selector in elemhideSelectorExceptions)) | 64 if (excluded.length == 0 && !(filter.selector in elemhideSelectorExceptions)) |
65 return {matchDomains: included.map(matchDomain), selector: filter.selector}; | 65 return {matchDomains: included.map(matchDomain), selector: filter.selector}; |
66 } | 66 } |
67 | 67 |
68 /** | 68 /** |
69 * Convert the given filter "regexpSource" string into a regular expression. | 69 * Convert the given filter "regexpSource" string into a regular expression, |
70 * handling the conversion of unicode inside hostnames to punycode. | |
70 * (Also deciding if the regular expression can be safely converted to and | 71 * (Also deciding if the regular expression can be safely converted to and |
71 * matched as lower case or not.) | 72 * matched as lower case or not.) |
72 * | 73 * |
73 * @param {string} text regexpSource property of a filter | 74 * @param {string} text regexpSource property of a filter |
74 * @returns {object} An object containing a regular expression string and a bool | 75 * @returns {object} An object containing a regular expression string and a bool |
75 * indicating if the filter can be safely matched as lower | 76 * indicating if the filter can be safely matched as lower |
76 * case: {regexp: "...", caseSenstive: true/false} | 77 * case: {regexp: "...", canSafelyMatchAsLowercase: true/false } |
77 */ | 78 */ |
78 function toRegExp(text) | 79 function toRegExp(text) |
79 { | 80 { |
80 let result = []; | 81 let result = []; |
81 let lastIndex = text.length - 1; | 82 let lastIndex = text.length - 1; |
82 let hostnameStarted = false; | 83 let hostnameStart = null; |
83 let hostnameFinished = false; | 84 let hostnameFinished = false; |
84 let caseSensitive = false; | 85 let canSafelyMatchAsLowercase = false; |
85 | 86 |
86 for (let i = 0; i < text.length; i++) | 87 for (let i = 0; i < text.length; i++) |
87 { | 88 { |
88 let c = text[i]; | 89 let c = text[i]; |
89 | 90 |
91 // If we're currently inside the hostname we have to be careful not to | |
92 // escape any characters until after we have converted it to punycode. | |
93 if (hostnameStart != null && !hostnameFinished) | |
94 { | |
95 let endingChar = (c == "*" || c == "^" || c == "?" || c == "/"); | |
Sebastian Noack
2016/02/27 23:06:16
I know we didn't handle it before, but what's if w
kzar
2016/03/07 17:06:48
So do we want to always consider "|" to end the ho
Sebastian Noack
2016/03/08 09:31:22
I guess, for simplicity we can just assume that an
kzar
2016/03/08 12:36:01
Yea, sounds good to me. Also we already know that
| |
96 if (!endingChar && i != lastIndex) | |
97 continue; | |
98 | |
99 let hostname = text.substring(hostnameStart, endingChar ? i : i + 1); | |
100 hostnameFinished = true; | |
101 result.push(escapeRegExp(punycode.toASCII(hostname))); | |
102 if (!endingChar) | |
103 break; | |
104 } | |
105 | |
90 switch (c) | 106 switch (c) |
91 { | 107 { |
92 case "*": | 108 case "*": |
93 if (hostnameStarted) | |
94 hostnameFinished = true; | |
95 if (result.length > 0 && i < lastIndex && text[i + 1] != "*") | 109 if (result.length > 0 && i < lastIndex && text[i + 1] != "*") |
96 result.push(".*"); | 110 result.push(".*"); |
97 break; | 111 break; |
98 case "^": | 112 case "^": |
99 if (hostnameStarted) | |
100 hostnameFinished = true; | |
101 if (i < lastIndex) | 113 if (i < lastIndex) |
102 result.push("."); | 114 result.push("."); |
103 break; | 115 break; |
104 case "|": | 116 case "|": |
105 if (i == 0) | 117 if (i == 0) |
106 { | 118 { |
107 result.push("^"); | 119 result.push("^"); |
108 break; | 120 break; |
109 } | 121 } |
110 if (i == lastIndex) | 122 if (i == lastIndex) |
111 { | 123 { |
112 result.push("$"); | 124 result.push("$"); |
113 break; | 125 break; |
114 } | 126 } |
115 if (i == 1 && text[0] == "|") | 127 if (i == 1 && text[0] == "|") |
116 { | 128 { |
117 hostnameStarted = caseSensitive = true; | 129 hostnameStart = i + 1; |
130 canSafelyMatchAsLowercase = true; | |
118 result.push("https?://"); | 131 result.push("https?://"); |
119 break; | 132 break; |
120 } | 133 } |
134 result.push("\\|"); | |
135 break; | |
136 case "/": | |
137 if (!hostnameFinished && | |
138 text.charAt(i-2) == ":" && text.charAt(i-1) == "/") | |
139 { | |
140 hostnameStart = i + 1; | |
141 canSafelyMatchAsLowercase = true; | |
142 } | |
143 result.push("/"); | |
144 break; | |
145 case ".": case "+": case "$": case "?": | |
146 case "{": case "}": case "(": case ")": | |
147 case "[": case "]": case "\\": | |
121 result.push("\\", c); | 148 result.push("\\", c); |
122 break; | 149 break; |
123 case "?": | |
124 if (hostnameStarted) | |
125 hostnameFinished = true; | |
126 case ".": case "+": case "$": case "{": case "}": | |
127 case "(": case ")": case "[": case "]": case "\\": | |
128 result.push("\\", c); | |
129 break; | |
130 case "/": | |
131 if (hostnameStarted) | |
132 hostnameFinished = true; | |
133 else if (text.charAt(i-2) == ":" && text.charAt(i-1) == "/") | |
134 hostnameStarted = caseSensitive = true; | |
135 default: | 150 default: |
136 if (hostnameFinished && (c >= "a" && c <= "z" || | 151 if (hostnameFinished && (c >= "a" && c <= "z" || |
137 c >= "A" && c <= "Z")) | 152 c >= "A" && c <= "Z")) |
138 caseSensitive = false; | 153 canSafelyMatchAsLowercase = false; |
139 result.push(c); | 154 result.push(c); |
140 } | 155 } |
141 } | 156 } |
142 | 157 |
143 return {regexp: result.join(""), caseSensitive: caseSensitive}; | 158 return {regexp: result.join(""), |
159 canSafelyMatchAsLowercase: canSafelyMatchAsLowercase}; | |
144 } | 160 } |
145 | 161 |
146 function getRegExpTrigger(filter) | 162 function getRegExpTrigger(filter) |
147 { | 163 { |
148 let result = toRegExp(filter.regexpSource.replace( | 164 let result = toRegExp(filter.regexpSource); |
149 // Safari expects punycode, filter lists use unicode | |
150 /^(\|\||\|?https?:\/\/)([\w\-.*\u0080-\uFFFF]+)/i, | |
151 function (match, prefix, domain) | |
152 { | |
153 return prefix + punycode.toASCII(domain); | |
154 } | |
155 )); | |
156 | 165 |
157 let trigger = {"url-filter": result.regexp}; | 166 let trigger = {"url-filter": result.regexp}; |
158 | 167 |
159 // Limit rules to to HTTP(S) URLs | 168 // Limit rules to to HTTP(S) URLs |
160 if (!/^(\^|http)/i.test(trigger["url-filter"])) | 169 if (!/^(\^|http)/i.test(trigger["url-filter"])) |
161 trigger["url-filter"] = "^https?://.*" + trigger["url-filter"]; | 170 trigger["url-filter"] = "^https?://.*" + trigger["url-filter"]; |
162 | 171 |
163 // For rules containing only a hostname we know that we're matching against | 172 // For rules containing only a hostname we know that we're matching against |
164 // a lowercase string unless the matchCase option was passed. | 173 // a lowercase string unless the matchCase option was passed. |
165 if (result.caseSensitive && !filter.matchCase) | 174 if (result.canSafelyMatchAsLowercase && !filter.matchCase) |
166 trigger["url-filter"] = trigger["url-filter"].toLowerCase(); | 175 trigger["url-filter"] = trigger["url-filter"].toLowerCase(); |
167 | 176 |
168 if (result.caseSensitive || filter.matchCase) | 177 if (result.canSafelyMatchAsLowercase || filter.matchCase) |
169 trigger["url-filter-is-case-sensitive"] = true; | 178 trigger["url-filter-is-case-sensitive"] = true; |
170 | 179 |
171 return trigger; | 180 return trigger; |
172 } | 181 } |
173 | 182 |
174 function getResourceTypes(filter) | 183 function getResourceTypes(filter) |
175 { | 184 { |
176 let types = []; | 185 let types = []; |
177 | 186 |
178 if (filter.contentType & typeMap.IMAGE) | 187 if (filter.contentType & typeMap.IMAGE) |
(...skipping 248 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
427 | 436 |
428 for (let filter of this.elemhideExceptions) | 437 for (let filter of this.elemhideExceptions) |
429 addRule(convertFilter(filter, "ignore-previous-rules", false)); | 438 addRule(convertFilter(filter, "ignore-previous-rules", false)); |
430 for (let filter of this.requestFilters) | 439 for (let filter of this.requestFilters) |
431 addRule(convertFilter(filter, "block", true)); | 440 addRule(convertFilter(filter, "block", true)); |
432 for (let filter of this.requestExceptions) | 441 for (let filter of this.requestExceptions) |
433 addRule(convertFilter(filter, "ignore-previous-rules", true)); | 442 addRule(convertFilter(filter, "ignore-previous-rules", true)); |
434 | 443 |
435 return rules; | 444 return rules; |
436 }; | 445 }; |
OLD | NEW |