Left: | ||
Right: |
LEFT | RIGHT |
---|---|
1 /* | 1 /* |
2 * This file is part of Adblock Plus <https://adblockplus.org/>, | 2 * This file is part of Adblock Plus <https://adblockplus.org/>, |
3 * Copyright (C) 2006-2017 eyeo GmbH | 3 * Copyright (C) 2006-2017 eyeo GmbH |
4 * | 4 * |
5 * Adblock Plus is free software: you can redistribute it and/or modify | 5 * Adblock Plus is free software: you can redistribute it and/or modify |
6 * it under the terms of the GNU General Public License version 3 as | 6 * it under the terms of the GNU General Public License version 3 as |
7 * published by the Free Software Foundation. | 7 * published by the Free Software Foundation. |
8 * | 8 * |
9 * Adblock Plus is distributed in the hope that it will be useful, | 9 * Adblock Plus is distributed in the hope that it will be useful, |
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of | 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
(...skipping 103 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
114 * case, a hostname string (or undefined) and a bool | 114 * case, a hostname string (or undefined) and a bool |
115 * indicating if the source only contains a hostname or not: | 115 * indicating if the source only contains a hostname or not: |
116 * {regexp: "...", | 116 * {regexp: "...", |
117 * canSafelyMatchAsLowercase: true/false, | 117 * canSafelyMatchAsLowercase: true/false, |
118 * hostname: "...", | 118 * hostname: "...", |
119 * justHostname: true/false} | 119 * justHostname: true/false} |
120 */ | 120 */ |
121 function parseFilterRegexpSource(text) | 121 function parseFilterRegexpSource(text) |
122 { | 122 { |
123 let regexp = []; | 123 let regexp = []; |
124 let lastIndex = text.length - 1; | 124 |
125 // Convert the text into an array of Unicode characters. | |
126 // | |
127 // In the case of surrogate pairs (the smiley emoji, for example), one | |
128 // Unicode code point is represented by two JavaScript characters together. | |
129 // We want to iterate over Unicode code points rather than JavaScript | |
130 // characters. | |
131 let characters = Array.from(text); | |
132 | |
133 let lastIndex = characters.length - 1; | |
125 let hostname; | 134 let hostname; |
126 let hostnameStart = null; | 135 let hostnameStart = null; |
127 let hostnameFinished = false; | 136 let hostnameFinished = false; |
128 let justHostname = false; | 137 let justHostname = false; |
129 let canSafelyMatchAsLowercase = false; | 138 let canSafelyMatchAsLowercase = false; |
130 | 139 |
131 for (let i = 0; i < text.length; i++) | 140 for (let i = 0; i < characters.length; i++) |
132 { | 141 { |
133 let c = text[i]; | 142 let c = characters[i]; |
134 | 143 |
135 if (hostnameFinished) | 144 if (hostnameFinished) |
136 justHostname = false; | 145 justHostname = false; |
137 | 146 |
138 // If we're currently inside the hostname we have to be careful not to | 147 // If we're currently inside the hostname we have to be careful not to |
139 // escape any characters until after we have converted it to punycode. | 148 // escape any characters until after we have converted it to punycode. |
140 if (hostnameStart != null && !hostnameFinished) | 149 if (hostnameStart != null && !hostnameFinished) |
141 { | 150 { |
142 let endingChar = (c == "*" || c == "^" || | 151 let endingChar = (c == "*" || c == "^" || |
143 c == "?" || c == "/" || c == "|"); | 152 c == "?" || c == "/" || c == "|"); |
144 if (!endingChar && i != lastIndex) | 153 if (!endingChar && i != lastIndex) |
145 continue; | 154 continue; |
146 | 155 |
147 hostname = punycode.toASCII( | 156 hostname = punycode.toASCII( |
148 text.substring(hostnameStart, endingChar ? i : i + 1).toLowerCase() | 157 characters.slice(hostnameStart, endingChar ? i : i + 1).join("") |
Manish Jethani
2017/07/12 08:59:56
This was the only conflict during rebase, since te
| |
158 .toLowerCase() | |
149 ); | 159 ); |
150 hostnameFinished = justHostname = true; | 160 hostnameFinished = justHostname = true; |
151 regexp.push(escapeRegExp(hostname)); | 161 regexp.push(escapeRegExp(hostname)); |
152 if (!endingChar) | 162 if (!endingChar) |
153 break; | 163 break; |
154 } | 164 } |
155 | 165 |
156 switch (c) | 166 switch (c) |
157 { | 167 { |
158 case "*": | 168 case "*": |
159 if (regexp.length > 0 && i < lastIndex && text[i + 1] != "*") | 169 if (regexp.length > 0 && i < lastIndex && characters[i + 1] != "*") |
160 regexp.push(".*"); | 170 regexp.push(".*"); |
161 break; | 171 break; |
162 case "^": | 172 case "^": |
163 if (i < lastIndex) | 173 if (i < lastIndex) |
164 regexp.push("."); | 174 regexp.push("."); |
165 break; | 175 break; |
166 case "|": | 176 case "|": |
167 if (i == 0) | 177 if (i == 0) |
168 { | 178 { |
169 regexp.push("^"); | 179 regexp.push("^"); |
170 break; | 180 break; |
171 } | 181 } |
172 if (i == lastIndex) | 182 if (i == lastIndex) |
173 { | 183 { |
174 regexp.push("$"); | 184 regexp.push("$"); |
175 break; | 185 break; |
176 } | 186 } |
177 if (i == 1 && text[0] == "|") | 187 if (i == 1 && characters[0] == "|") |
178 { | 188 { |
179 hostnameStart = i + 1; | 189 hostnameStart = i + 1; |
180 canSafelyMatchAsLowercase = true; | 190 canSafelyMatchAsLowercase = true; |
181 regexp.push("https?://([^/]+\\.)?"); | 191 regexp.push("https?://([^/]+\\.)?"); |
182 break; | 192 break; |
183 } | 193 } |
184 regexp.push("\\|"); | 194 regexp.push("\\|"); |
185 break; | 195 break; |
186 case "/": | 196 case "/": |
187 if (!hostnameFinished && | 197 if (!hostnameFinished && |
188 text.charAt(i-2) == ":" && text.charAt(i-1) == "/") | 198 characters[i - 2] == ":" && characters[i - 1] == "/") |
189 { | 199 { |
190 hostnameStart = i + 1; | 200 hostnameStart = i + 1; |
191 canSafelyMatchAsLowercase = true; | 201 canSafelyMatchAsLowercase = true; |
192 } | 202 } |
193 regexp.push("/"); | 203 regexp.push("/"); |
194 break; | 204 break; |
195 case ".": case "+": case "$": case "?": | 205 case ".": case "+": case "$": case "?": |
196 case "{": case "}": case "(": case ")": | 206 case "{": case "}": case "(": case ")": |
197 case "[": case "]": case "\\": | 207 case "[": case "]": case "\\": |
198 regexp.push("\\", c); | 208 regexp.push("\\", c); |
199 break; | 209 break; |
200 default: | 210 default: |
201 if (hostnameFinished && (c >= "a" && c <= "z" || | 211 if (hostnameFinished && (c >= "a" && c <= "z" || |
202 c >= "A" && c <= "Z")) | 212 c >= "A" && c <= "Z")) |
203 canSafelyMatchAsLowercase = false; | 213 canSafelyMatchAsLowercase = false; |
204 regexp.push(c); | 214 regexp.push(c == "%" ? c : encodeURI(c)); |
205 } | 215 } |
206 } | 216 } |
207 | 217 |
208 return { | 218 return { |
209 regexp: regexp.join(""), | 219 regexp: regexp.join(""), |
210 canSafelyMatchAsLowercase: canSafelyMatchAsLowercase, | 220 canSafelyMatchAsLowercase: canSafelyMatchAsLowercase, |
211 hostname: hostname, | 221 hostname: hostname, |
212 justHostname: justHostname | 222 justHostname: justHostname |
213 }; | 223 }; |
214 } | 224 } |
(...skipping 66 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
281 let included = []; | 291 let included = []; |
282 let excluded = []; | 292 let excluded = []; |
283 | 293 |
284 parseDomains(filter.domains, included, excluded); | 294 parseDomains(filter.domains, included, excluded); |
285 | 295 |
286 if (exceptionDomains) | 296 if (exceptionDomains) |
287 excluded = excluded.concat(exceptionDomains); | 297 excluded = excluded.concat(exceptionDomains); |
288 | 298 |
289 if (withResourceTypes) | 299 if (withResourceTypes) |
290 { | 300 { |
291 trigger["resource-type"] = getResourceTypes(filter); | 301 let resourceTypes = getResourceTypes(filter); |
292 | 302 |
293 if (trigger["resource-type"].length == 0) | 303 // Content blocker rules can't differentiate between sub-document requests |
304 // (iframes) and top-level document requests. To avoid too many false | |
305 // positives, we prevent rules with no hostname part from blocking document | |
306 // requests. | |
307 // | |
308 // Once Safari 11 becomes our minimum supported version, we could change | |
309 // our approach here to use the new "unless-top-url" property instead. | |
310 if (filter instanceof filterClasses.BlockingFilter && !parsed.hostname) | |
311 resourceTypes = resourceTypes.filter(type => type != "document"); | |
312 | |
313 if (resourceTypes.length == 0) | |
294 return; | 314 return; |
315 | |
316 trigger["resource-type"] = resourceTypes; | |
295 } | 317 } |
296 | 318 |
297 if (filter.thirdParty != null) | 319 if (filter.thirdParty != null) |
298 trigger["load-type"] = [filter.thirdParty ? "third-party" : "first-party"]; | 320 trigger["load-type"] = [filter.thirdParty ? "third-party" : "first-party"]; |
299 | 321 |
300 if (included.length > 0) | 322 if (included.length > 0) |
301 { | 323 { |
302 trigger["if-domain"] = []; | 324 trigger["if-domain"] = []; |
303 | 325 |
304 for (let name of included) | 326 for (let name of included) |
(...skipping 15 matching lines...) Expand all Loading... | |
320 { | 342 { |
321 trigger["if-domain"].push("*" + name); | 343 trigger["if-domain"].push("*" + name); |
322 } | 344 } |
323 } | 345 } |
324 } | 346 } |
325 else if (excluded.length > 0) | 347 else if (excluded.length > 0) |
326 { | 348 { |
327 trigger["unless-domain"] = excluded.map(name => "*" + name); | 349 trigger["unless-domain"] = excluded.map(name => "*" + name); |
328 } | 350 } |
329 else if (filter instanceof filterClasses.BlockingFilter && | 351 else if (filter instanceof filterClasses.BlockingFilter && |
330 filter.contentType & typeMap.SUBDOCUMENT) | 352 filter.contentType & typeMap.SUBDOCUMENT && parsed.hostname) |
331 { | 353 { |
354 // Rules with a hostname part are still allowed to block document requests, | |
355 // but we add an exception for top-level documents. | |
356 // | |
357 // Note that we can only do this if there's no "unless-domain" property for | |
358 // now. This also only works in Safari 11 onwards, while older versions | |
359 // simply ignore this property. Once Safari 11 becomes our minimum | |
360 // supported version, we can merge "unless-domain" into "unless-top-url". | |
332 trigger["unless-top-url"] = [trigger["url-filter"]]; | 361 trigger["unless-top-url"] = [trigger["url-filter"]]; |
333 if (trigger["url-filter-is-case-sensitive"]) | 362 if (trigger["url-filter-is-case-sensitive"]) |
334 trigger["top-url-filter-is-case-sensitive"] = true; | 363 trigger["top-url-filter-is-case-sensitive"] = true; |
335 } | 364 } |
336 | 365 |
337 rules.push({trigger: trigger, action: {type: action}}); | 366 rules.push({trigger: trigger, action: {type: action}}); |
338 } | |
339 | |
340 function hasNonASCI(obj) | |
341 { | |
342 if (typeof obj == "string") | |
343 { | |
344 if (/[^\x00-\x7F]/.test(obj)) | |
345 return true; | |
346 } | |
347 | |
348 if (typeof obj == "object") | |
349 { | |
350 if (obj instanceof Array) | |
351 for (let item of obj) | |
352 if (hasNonASCI(item)) | |
353 return true; | |
354 | |
355 let names = Object.getOwnPropertyNames(obj); | |
356 for (let name of names) | |
357 if (hasNonASCI(obj[name])) | |
358 return true; | |
359 } | |
360 | |
361 return false; | |
362 } | 367 } |
363 | 368 |
364 function convertIDSelectorsToAttributeSelectors(selector) | 369 function convertIDSelectorsToAttributeSelectors(selector) |
365 { | 370 { |
366 // First we figure out where all the IDs are | 371 // First we figure out where all the IDs are |
367 let sep = ""; | 372 let sep = ""; |
368 let start = null; | 373 let start = null; |
369 let positions = []; | 374 let positions = []; |
370 for (let i = 0; i < selector.length; i++) | 375 for (let i = 0; i < selector.length; i++) |
371 { | 376 { |
(...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
407 newSelector.push('[id=', selector.substring(pos.start + 1, pos.end), ']'); | 412 newSelector.push('[id=', selector.substring(pos.start + 1, pos.end), ']'); |
408 i = pos.end; | 413 i = pos.end; |
409 } | 414 } |
410 newSelector.push(selector.substring(i)); | 415 newSelector.push(selector.substring(i)); |
411 | 416 |
412 return newSelector.join(""); | 417 return newSelector.join(""); |
413 } | 418 } |
414 | 419 |
415 function addCSSRules(rules, selectors, matchDomain, exceptionDomains) | 420 function addCSSRules(rules, selectors, matchDomain, exceptionDomains) |
416 { | 421 { |
422 let unlessDomain = exceptionDomains.size > 0 ? [] : null; | |
423 | |
424 exceptionDomains.forEach(name => unlessDomain.push("*" + name)); | |
425 | |
417 while (selectors.length) | 426 while (selectors.length) |
418 { | 427 { |
419 let selector = selectors.splice(0, selectorLimit).join(", "); | 428 let selector = selectors.splice(0, selectorLimit).join(", "); |
420 | 429 |
421 // As of Safari 9.0 element IDs are matched as lowercase. We work around | 430 // As of Safari 9.0 element IDs are matched as lowercase. We work around |
422 // this by converting to the attribute format [id="elementID"] | 431 // this by converting to the attribute format [id="elementID"] |
423 selector = convertIDSelectorsToAttributeSelectors(selector); | 432 selector = convertIDSelectorsToAttributeSelectors(selector); |
424 | 433 |
425 let rule = { | 434 let rule = { |
426 trigger: {"url-filter": matchDomain, | 435 trigger: {"url-filter": matchDomain, |
427 "url-filter-is-case-sensitive": true}, | 436 "url-filter-is-case-sensitive": true}, |
428 action: {type: "css-display-none", | 437 action: {type: "css-display-none", |
429 selector: selector} | 438 selector: selector} |
430 }; | 439 }; |
431 | 440 |
432 if (exceptionDomains.size > 0) | 441 if (unlessDomain) |
433 { | 442 rule.trigger["unless-domain"] = unlessDomain; |
434 rule.trigger["unless-domain"] = []; | |
435 exceptionDomains.forEach(name => | |
436 { | |
437 rule.trigger["unless-domain"].push("*" + name); | |
438 }); | |
439 } | |
440 | 443 |
441 rules.push(rule); | 444 rules.push(rule); |
442 } | 445 } |
443 } | 446 } |
444 | 447 |
445 let ContentBlockerList = | 448 let ContentBlockerList = |
446 /** | 449 /** |
447 * Create a new Adblock Plus filter to content blocker list converter | 450 * Create a new Adblock Plus filter to content blocker list converter |
448 * | 451 * |
449 * @constructor | 452 * @constructor |
(...skipping 96 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
546 // Note that as a result of this workaround we end up with a huge rule set in | 549 // Note that as a result of this workaround we end up with a huge rule set in |
547 // terms of the amount of memory used. This can cause Node.js to throw | 550 // terms of the amount of memory used. This can cause Node.js to throw |
548 // "JavaScript heap out of memory". To avoid this, call Node.js with | 551 // "JavaScript heap out of memory". To avoid this, call Node.js with |
549 // --max_old_space_size=4096 | 552 // --max_old_space_size=4096 |
550 let elemhideExceptionDomains = extractFilterDomains(this.elemhideExceptions); | 553 let elemhideExceptionDomains = extractFilterDomains(this.elemhideExceptions); |
551 | 554 |
552 let genericSelectorExceptionDomains = | 555 let genericSelectorExceptionDomains = |
553 extractFilterDomains(this.generichideExceptions); | 556 extractFilterDomains(this.generichideExceptions); |
554 elemhideExceptionDomains.forEach(name => | 557 elemhideExceptionDomains.forEach(name => |
555 { | 558 { |
556 genericSelectorExceptionDomains.add(name); | 559 genericSelectorExceptionDomains.add(name); |
kzar
2017/07/10 12:33:08
I wonder if it would be better to pass two Sets of
Manish Jethani
2017/07/11 11:19:18
We'd still have to combine them into one set if we
kzar
2017/07/11 12:20:03
Fair enough.
| |
557 }); | 560 }); |
558 | 561 |
559 addCSSRules(rules, genericSelectors, "^https?://", | 562 addCSSRules(rules, genericSelectors, "^https?://", |
560 genericSelectorExceptionDomains); | 563 genericSelectorExceptionDomains); |
561 | 564 |
562 groupedElemhideFilters.forEach((selectors, matchDomain) => | 565 groupedElemhideFilters.forEach((selectors, matchDomain) => |
563 { | 566 { |
564 addCSSRules(rules, selectors, matchDomain, elemhideExceptionDomains); | 567 addCSSRules(rules, selectors, matchDomain, elemhideExceptionDomains); |
565 }); | 568 }); |
566 | 569 |
567 let requestFilterExceptionDomains = []; | 570 let requestFilterExceptionDomains = []; |
568 for (let filter of this.genericblockExceptions) | 571 for (let filter of this.genericblockExceptions) |
569 { | 572 { |
570 let parsed = parseFilterRegexpSource(filter.regexpSource); | 573 let parsed = parseFilterRegexpSource(filter.regexpSource); |
571 if (parsed.hostname) | 574 if (parsed.hostname) |
572 requestFilterExceptionDomains.push(parsed.hostname); | 575 requestFilterExceptionDomains.push(parsed.hostname); |
573 } | 576 } |
574 | 577 |
575 for (let filter of this.requestFilters) | 578 for (let filter of this.requestFilters) |
576 { | 579 { |
577 convertFilterAddRules(rules, filter, "block", true, | 580 convertFilterAddRules(rules, filter, "block", true, |
578 requestFilterExceptionDomains); | 581 requestFilterExceptionDomains); |
579 } | 582 } |
580 | 583 |
581 for (let filter of this.requestExceptions) | 584 for (let filter of this.requestExceptions) |
582 convertFilterAddRules(rules, filter, "ignore-previous-rules", true); | 585 convertFilterAddRules(rules, filter, "ignore-previous-rules", true); |
583 | 586 |
584 return rules.filter(rule => !hasNonASCI(rule)); | 587 return rules; |
585 }; | 588 }; |
LEFT | RIGHT |