 Issue 29907586:
  Issue 6994 - Use shortcut matching for location only filters  (Closed)
    
  
    Issue 29907586:
  Issue 6994 - Use shortcut matching for location only filters  (Closed) 
  | Left: | ||
| Right: | 
| LEFT | RIGHT | 
|---|---|
| 1 /* | 1 /* | 
| 2 * This file is part of Adblock Plus <https://adblockplus.org/>, | 2 * This file is part of Adblock Plus <https://adblockplus.org/>, | 
| 3 * Copyright (C) 2006-present eyeo GmbH | 3 * Copyright (C) 2006-present eyeo GmbH | 
| 4 * | 4 * | 
| 5 * Adblock Plus is free software: you can redistribute it and/or modify | 5 * Adblock Plus is free software: you can redistribute it and/or modify | 
| 6 * it under the terms of the GNU General Public License version 3 as | 6 * it under the terms of the GNU General Public License version 3 as | 
| 7 * published by the Free Software Foundation. | 7 * published by the Free Software Foundation. | 
| 8 * | 8 * | 
| 9 * Adblock Plus is distributed in the hope that it will be useful, | 9 * Adblock Plus is distributed in the hope that it will be useful, | 
| 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of | 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of | 
| 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | 
| 12 * GNU General Public License for more details. | 12 * GNU General Public License for more details. | 
| 13 * | 13 * | 
| 14 * You should have received a copy of the GNU General Public License | 14 * You should have received a copy of the GNU General Public License | 
| 15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. | 15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. | 
| 16 */ | 16 */ | 
| 17 | 17 | 
| 18 "use strict"; | 18 "use strict"; | 
| 19 | 19 | 
| 20 /** | 20 /** | 
| 21 * @fileOverview Definition of Filter class and its subclasses. | 21 * @fileOverview Definition of Filter class and its subclasses. | 
| 22 */ | 22 */ | 
| 23 | 23 | 
| 24 const {filterNotifier} = require("./filterNotifier"); | 24 const {filterNotifier} = require("./filterNotifier"); | 
| 25 const {extend} = require("./coreUtils"); | 25 const {extend} = require("./coreUtils"); | 
| 26 const {filterToRegExp} = require("./common"); | 26 const {filterToRegExp} = require("./common"); | 
| 27 | 27 | 
| 28 /** | 28 /** | 
| 29 * Regular expression used to match the <code>||</code> prefix in an otherwise | |
| 30 * literal pattern. | |
| 31 * @type {RegExp} | |
| 32 */ | |
| 33 let doubleAnchorRegExp = new RegExp(filterToRegExp("||") + "$"); | |
| 34 | |
| 35 /** | |
| 29 * All known unique domain sources mapped to their parsed values. | 36 * All known unique domain sources mapped to their parsed values. | 
| 30 * @type {Map.<string,Map.<string,boolean>>} | 37 * @type {Map.<string,Map.<string,boolean>>} | 
| 31 */ | 38 */ | 
| 32 let knownDomainMaps = new Map(); | 39 let knownDomainMaps = new Map(); | 
| 40 | |
| 41 /** | |
| 42 * Checks whether the given pattern is a string of literal characters with no | |
| 43 * wildcards or any other special characters. If the pattern is prefixed with a | |
| 44 * <code>||</code> but otherwise contains no special characters, it is still | |
| 45 * considered to be a literal pattern. | |
| 46 * @param {string} pattern | |
| 47 * @returns {boolean} | |
| 48 */ | |
| 49 function isLiteralPattern(pattern) | |
| 50 { | |
| 51 return !/[*^|]/.test(pattern.replace(/^\|{2}/, "")); | |
| 52 } | |
| 33 | 53 | 
| 34 /** | 54 /** | 
| 35 * Abstract base class for filters | 55 * Abstract base class for filters | 
| 36 * | 56 * | 
| 37 * @param {string} text string representation of the filter | 57 * @param {string} text string representation of the filter | 
| 38 * @constructor | 58 * @constructor | 
| 39 */ | 59 */ | 
| 40 function Filter(text) | 60 function Filter(text) | 
| 41 { | 61 { | 
| 42 this.text = text; | 62 this.text = text; | 
| (...skipping 93 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 136 this._subscriptions = [...this._subscriptions][0]; | 156 this._subscriptions = [...this._subscriptions][0]; | 
| 137 } | 157 } | 
| 138 else if (subscription == this._subscriptions) | 158 else if (subscription == this._subscriptions) | 
| 139 { | 159 { | 
| 140 this._subscriptions = null; | 160 this._subscriptions = null; | 
| 141 } | 161 } | 
| 142 } | 162 } | 
| 143 }, | 163 }, | 
| 144 | 164 | 
| 145 /** | 165 /** | 
| 146 * Serializes the filter to an array of strings for writing out on the disk. | 166 * Serializes the filter for writing out on disk. | 
| 147 * @param {string[]} buffer buffer to push the serialization results into | 167 * @yields {string} | 
| 148 */ | 168 */ | 
| 149 serialize(buffer) | 169 *serialize() | 
| 150 { | 170 { | 
| 151 buffer.push("[Filter]"); | 171 let {text} = this; | 
| 152 buffer.push("text=" + this.text); | 172 | 
| 173 yield "[Filter]"; | |
| 174 yield "text=" + text; | |
| 153 }, | 175 }, | 
| 154 | 176 | 
| 155 toString() | 177 toString() | 
| 156 { | 178 { | 
| 157 return this.text; | 179 return this.text; | 
| 158 } | 180 } | 
| 159 }; | 181 }; | 
| 160 | 182 | 
| 161 /** | 183 /** | 
| 162 * Cache for known filters, maps string representation to filter objects. | 184 * Cache for known filters, maps string representation to filter objects. | 
| (...skipping 156 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 319 /** | 341 /** | 
| 320 * Reason why this filter is invalid | 342 * Reason why this filter is invalid | 
| 321 * @type {string} | 343 * @type {string} | 
| 322 */ | 344 */ | 
| 323 reason: null, | 345 reason: null, | 
| 324 | 346 | 
| 325 /** | 347 /** | 
| 326 * See Filter.serialize() | 348 * See Filter.serialize() | 
| 327 * @inheritdoc | 349 * @inheritdoc | 
| 328 */ | 350 */ | 
| 329 serialize(buffer) {} | 351 *serialize() {} | 
| 330 }); | 352 }); | 
| 331 | 353 | 
| 332 /** | 354 /** | 
| 333 * Class for comments | 355 * Class for comments | 
| 334 * @param {string} text see {@link Filter Filter()} | 356 * @param {string} text see {@link Filter Filter()} | 
| 335 * @constructor | 357 * @constructor | 
| 336 * @augments Filter | 358 * @augments Filter | 
| 337 */ | 359 */ | 
| 338 function CommentFilter(text) | 360 function CommentFilter(text) | 
| 339 { | 361 { | 
| 340 Filter.call(this, text); | 362 Filter.call(this, text); | 
| 341 } | 363 } | 
| 342 exports.CommentFilter = CommentFilter; | 364 exports.CommentFilter = CommentFilter; | 
| 343 | 365 | 
| 344 CommentFilter.prototype = extend(Filter, { | 366 CommentFilter.prototype = extend(Filter, { | 
| 345 type: "comment", | 367 type: "comment", | 
| 346 | 368 | 
| 347 /** | 369 /** | 
| 348 * See Filter.serialize() | 370 * See Filter.serialize() | 
| 349 * @inheritdoc | 371 * @inheritdoc | 
| 350 */ | 372 */ | 
| 351 serialize(buffer) {} | 373 *serialize() {} | 
| 352 }); | 374 }); | 
| 353 | 375 | 
| 354 /** | 376 /** | 
| 355 * Abstract base class for filters that can get hits | 377 * Abstract base class for filters that can get hits | 
| 356 * @param {string} text | 378 * @param {string} text | 
| 357 * see {@link Filter Filter()} | 379 * see {@link Filter Filter()} | 
| 358 * @param {string} [domains] | 380 * @param {string} [domains] | 
| 359 * Domains that the filter is restricted to separated by domainSeparator | 381 * Domains that the filter is restricted to separated by domainSeparator | 
| 360 * e.g. "foo.com|bar.com|~baz.com" | 382 * e.g. "foo.com|bar.com|~baz.com" | 
| 361 * @constructor | 383 * @constructor | 
| (...skipping 262 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 624 { | 646 { | 
| 625 let {sitekeys, domains} = this; | 647 let {sitekeys, domains} = this; | 
| 626 | 648 | 
| 627 return !(sitekeys && sitekeys.length) && (!domains || domains.get("")); | 649 return !(sitekeys && sitekeys.length) && (!domains || domains.get("")); | 
| 628 }, | 650 }, | 
| 629 | 651 | 
| 630 /** | 652 /** | 
| 631 * See Filter.serialize() | 653 * See Filter.serialize() | 
| 632 * @inheritdoc | 654 * @inheritdoc | 
| 633 */ | 655 */ | 
| 634 serialize(buffer) | 656 *serialize() | 
| 635 { | 657 { | 
| 636 if (this._disabled || this._hitCount || this._lastHit) | 658 let {_disabled, _hitCount, _lastHit} = this; | 
| 637 { | 659 | 
| 638 Filter.prototype.serialize.call(this, buffer); | 660 if (_disabled || _hitCount || _lastHit) | 
| 639 if (this._disabled) | 661 { | 
| 640 buffer.push("disabled=true"); | 662 yield* Filter.prototype.serialize.call(this); | 
| 641 if (this._hitCount) | 663 if (_disabled) | 
| 642 buffer.push("hitCount=" + this._hitCount); | 664 yield "disabled=true"; | 
| 643 if (this._lastHit) | 665 if (_hitCount) | 
| 644 buffer.push("lastHit=" + this._lastHit); | 666 yield "hitCount=" + _hitCount; | 
| 667 if (_lastHit) | |
| 668 yield "lastHit=" + _lastHit; | |
| 645 } | 669 } | 
| 646 } | 670 } | 
| 647 }); | 671 }); | 
| 648 | 672 | 
| 649 /** | 673 /** | 
| 650 * Abstract base class for RegExp-based filters | 674 * Abstract base class for RegExp-based filters | 
| 651 * @param {string} text see {@link Filter Filter()} | 675 * @param {string} text see {@link Filter Filter()} | 
| 652 * @param {string} regexpSource | 676 * @param {string} regexpSource | 
| 653 * filter part that the regular expression should be build from | 677 * filter part that the regular expression should be build from | 
| 654 * @param {number} [contentType] | 678 * @param {number} [contentType] | 
| (...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 686 regexpSource[regexpSource.length - 1] == "/") | 710 regexpSource[regexpSource.length - 1] == "/") | 
| 687 { | 711 { | 
| 688 // The filter is a regular expression - convert it immediately to | 712 // The filter is a regular expression - convert it immediately to | 
| 689 // catch syntax errors | 713 // catch syntax errors | 
| 690 let regexp = new RegExp(regexpSource.substr(1, regexpSource.length - 2), | 714 let regexp = new RegExp(regexpSource.substr(1, regexpSource.length - 2), | 
| 691 this.matchCase ? "" : "i"); | 715 this.matchCase ? "" : "i"); | 
| 692 Object.defineProperty(this, "regexp", {value: regexp}); | 716 Object.defineProperty(this, "regexp", {value: regexp}); | 
| 693 } | 717 } | 
| 694 else | 718 else | 
| 695 { | 719 { | 
| 720 if (!this.matchCase && isLiteralPattern(regexpSource)) | |
| 721 regexpSource = regexpSource.toLowerCase(); | |
| 722 | |
| 696 // No need to convert this filter to regular expression yet, do it on demand | 723 // No need to convert this filter to regular expression yet, do it on demand | 
| 697 this.pattern = regexpSource; | 724 this.pattern = regexpSource; | 
| 698 } | 725 } | 
| 699 } | 726 } | 
| 700 exports.RegExpFilter = RegExpFilter; | 727 exports.RegExpFilter = RegExpFilter; | 
| 701 | 728 | 
| 702 RegExpFilter.prototype = extend(ActiveFilter, { | 729 RegExpFilter.prototype = extend(ActiveFilter, { | 
| 703 /** | 730 /** | 
| 704 * Number of filters contained, will always be 1 (required to | 731 * Number of filters contained, will always be 1 (required to | 
| 705 * optimize {@link Matcher}). | 732 * optimize {@link Matcher}). | 
| (...skipping 11 matching lines...) Expand all Loading... | |
| 717 * for delayed creation of the regexp property | 744 * for delayed creation of the regexp property | 
| 718 * @type {?string} | 745 * @type {?string} | 
| 719 */ | 746 */ | 
| 720 pattern: null, | 747 pattern: null, | 
| 721 /** | 748 /** | 
| 722 * Regular expression to be used when testing against this filter | 749 * Regular expression to be used when testing against this filter | 
| 723 * @type {RegExp} | 750 * @type {RegExp} | 
| 724 */ | 751 */ | 
| 725 get regexp() | 752 get regexp() | 
| 726 { | 753 { | 
| 727 let source = filterToRegExp(this.pattern, this.rewrite != null); | 754 let value = null; | 
| 728 let regexp = new RegExp(source, this.matchCase ? "" : "i"); | 755 | 
| 729 Object.defineProperty(this, "regexp", {value: regexp}); | 756 let {pattern, rewrite} = this; | 
| 730 return regexp; | 757 if (rewrite != null || !isLiteralPattern(pattern)) | 
| 758 { | |
| 759 value = new RegExp(filterToRegExp(pattern, rewrite != null), | |
| 760 this.matchCase ? "" : "i"); | |
| 761 } | |
| 762 | |
| 763 Object.defineProperty(this, "regexp", {value}); | |
| 764 return value; | |
| 731 }, | 765 }, | 
| 732 /** | 766 /** | 
| 733 * Content types the filter applies to, combination of values from | 767 * Content types the filter applies to, combination of values from | 
| 734 * RegExpFilter.typeMap | 768 * RegExpFilter.typeMap | 
| 735 * @type {number} | 769 * @type {number} | 
| 736 */ | 770 */ | 
| 737 contentType: 0x7FFFFFFF, | 771 contentType: 0x7FFFFFFF, | 
| 738 /** | 772 /** | 
| 739 * Defines whether the filter should distinguish between lower and | 773 * Defines whether the filter should distinguish between lower and | 
| 740 * upper case letters | 774 * upper case letters | 
| (...skipping 24 matching lines...) Expand all Loading... | |
| 765 { | 799 { | 
| 766 sitekeys = this.sitekeySource.split("|"); | 800 sitekeys = this.sitekeySource.split("|"); | 
| 767 this.sitekeySource = null; | 801 this.sitekeySource = null; | 
| 768 } | 802 } | 
| 769 | 803 | 
| 770 Object.defineProperty( | 804 Object.defineProperty( | 
| 771 this, "sitekeys", {value: sitekeys, enumerable: true} | 805 this, "sitekeys", {value: sitekeys, enumerable: true} | 
| 772 ); | 806 ); | 
| 773 return this.sitekeys; | 807 return this.sitekeys; | 
| 774 }, | 808 }, | 
| 775 /** | 809 | 
| 776 * Tests whether the filter only has location. | 810 /** | 
| 777 */ | 811 * Tests whether the filter only has a location. | 
| 778 get isLocationOnly() | 812 * @return {boolean} | 
| 813 */ | |
| 814 isLocationOnly() | |
| 779 { | 815 { | 
| 780 return this.contentType == RegExpFilter.prototype.contentType && | 816 return this.contentType == RegExpFilter.prototype.contentType && | 
| 
Manish Jethani
2018/10/24 21:35:28
I haven't run this code, but I'm pretty sure that
 
Jon Sonesen
2018/10/24 21:46:51
Ah, I see what you mean there. I didn't look deep
 | |
| 781 this.thirdParty == null && !this.domains && !this.sitekeys; | 817 this.thirdParty == null && !this.domains && !this.sitekeys; | 
| 782 }, | 818 }, | 
| 783 | 819 | 
| 784 /** | 820 /** | 
| 785 * Tests whether the URL matches this filter | 821 * Tests whether the URL matches this filter | 
| 786 * @param {string} location URL to be tested | 822 * @param {string} location URL to be tested | 
| 787 * @param {number} typeMask bitmask of content / request types to match | 823 * @param {number} typeMask bitmask of content / request types to match | 
| 788 * @param {string} [docDomain] domain name of the document that loads the URL | 824 * @param {string} [docDomain] domain name of the document that loads the URL | 
| 789 * @param {boolean} [thirdParty] should be true if the URL is a third-party | 825 * @param {boolean} [thirdParty] should be true if the URL is a third-party | 
| 790 * request | 826 * request | 
| 791 * @param {string} [sitekey] public key provided by the document | 827 * @param {string} [sitekey] public key provided by the document | 
| 792 * @return {boolean} true in case of a match | 828 * @return {boolean} true in case of a match | 
| 793 */ | 829 */ | 
| 794 matches(location, typeMask, docDomain, thirdParty, sitekey) | 830 matches(location, typeMask, docDomain, thirdParty, sitekey) | 
| 795 { | 831 { | 
| 796 return (this.contentType & typeMask) != 0 && | 832 return (this.contentType & typeMask) != 0 && | 
| 797 (this.thirdParty == null || this.thirdParty == thirdParty) && | 833 (this.thirdParty == null || this.thirdParty == thirdParty) && | 
| 798 this.isActiveOnDomain(docDomain, sitekey) && | 834 this.isActiveOnDomain(docDomain, sitekey) && | 
| 799 this.regexp.test(location); | 835 this.matchesLocation(location); | 
| 800 }, | 836 }, | 
| 837 | |
| 838 /** | |
| 839 * Checks whether the given URL matches this filter's pattern. | |
| 840 * @param {string} location The URL to check. | |
| 841 * @returns {boolean} <code>true</code> if the URL matches. | |
| 842 */ | |
| 801 matchesLocation(location) | 843 matchesLocation(location) | 
| 802 { | 844 { | 
| 803 return this.regexp.test(location); | 845 let {regexp} = this; | 
| 846 | |
| 847 if (regexp) | |
| 848 return regexp.test(location); | |
| 849 | |
| 850 if (!this.matchCase) | |
| 851 location = location.toLowerCase(); | |
| 852 | |
| 853 let {pattern} = this; | |
| 854 | |
| 855 if (pattern[0] == "|" && pattern[1] == "|") | |
| 856 { | |
| 857 let index = location.indexOf(pattern.substring(2)); | |
| 858 | |
| 859 // The "||" prefix requires that the text that follows does not start | |
| 860 // with a forward slash. | |
| 861 return index != -1 && location[index] != "/" && | |
| 862 doubleAnchorRegExp.test(location.substring(0, index)); | |
| 863 } | |
| 864 | |
| 865 return location.includes(pattern); | |
| 804 } | 866 } | 
| 805 }); | 867 }); | 
| 806 | 868 | 
| 807 /** | 869 /** | 
| 808 * Yields the filter itself (required to optimize {@link Matcher}). | 870 * Yields the filter itself (required to optimize {@link Matcher}). | 
| 809 * @yields {RegExpFilter} | 871 * @yields {RegExpFilter} | 
| 810 */ | 872 */ | 
| 811 RegExpFilter.prototype[Symbol.iterator] = function*() | 873 RegExpFilter.prototype[Symbol.iterator] = function*() | 
| 812 { | 874 { | 
| 813 yield this; | 875 yield this; | 
| (...skipping 474 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1288 | 1350 | 
| 1289 /** | 1351 /** | 
| 1290 * Script that should be executed | 1352 * Script that should be executed | 
| 1291 * @type {string} | 1353 * @type {string} | 
| 1292 */ | 1354 */ | 
| 1293 get script() | 1355 get script() | 
| 1294 { | 1356 { | 
| 1295 return this.body; | 1357 return this.body; | 
| 1296 } | 1358 } | 
| 1297 }); | 1359 }); | 
| LEFT | RIGHT |