Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Side by Side Diff: lib/filterClasses.js

Issue 29912636: Issue 7052 - Use string-based matching for literal patterns (Closed) Base URL: https://hg.adblockplus.org/adblockpluscore/
Patch Set: Restrict to patterns with double anchor Created Oct. 17, 2018, 2:21 a.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | test/filterClasses.js » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * This file is part of Adblock Plus <https://adblockplus.org/>, 2 * This file is part of Adblock Plus <https://adblockplus.org/>,
3 * Copyright (C) 2006-present eyeo GmbH 3 * Copyright (C) 2006-present eyeo GmbH
4 * 4 *
5 * Adblock Plus is free software: you can redistribute it and/or modify 5 * Adblock Plus is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 3 as 6 * it under the terms of the GNU General Public License version 3 as
7 * published by the Free Software Foundation. 7 * published by the Free Software Foundation.
8 * 8 *
9 * Adblock Plus is distributed in the hope that it will be useful, 9 * Adblock Plus is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details. 12 * GNU General Public License for more details.
13 * 13 *
14 * You should have received a copy of the GNU General Public License 14 * You should have received a copy of the GNU General Public License
15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. 15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.
16 */ 16 */
17 17
18 "use strict"; 18 "use strict";
19 19
20 /** 20 /**
21 * @fileOverview Definition of Filter class and its subclasses. 21 * @fileOverview Definition of Filter class and its subclasses.
22 */ 22 */
23 23
24 const {filterNotifier} = require("./filterNotifier"); 24 const {filterNotifier} = require("./filterNotifier");
25 const {extend} = require("./coreUtils"); 25 const {extend} = require("./coreUtils");
26 const {filterToRegExp} = require("./common"); 26 const {filterToRegExp} = require("./common");
27 27
28 let tripleAnchorRegExp = new RegExp(filterToRegExp("|||"));
29
28 /** 30 /**
29 * All known unique domain sources mapped to their parsed values. 31 * All known unique domain sources mapped to their parsed values.
30 * @type {Map.<string,Map.<string,boolean>>} 32 * @type {Map.<string,Map.<string,boolean>>}
31 */ 33 */
32 let knownDomainMaps = new Map(); 34 let knownDomainMaps = new Map();
33 35
34 /** 36 /**
35 * Abstract base class for filters 37 * Abstract base class for filters
36 * 38 *
37 * @param {string} text string representation of the filter 39 * @param {string} text string representation of the filter
(...skipping 619 matching lines...) Expand 10 before | Expand all | Expand 10 after
657 * @param {boolean} [matchCase] 659 * @param {boolean} [matchCase]
658 * Defines whether the filter should distinguish between lower and upper case 660 * Defines whether the filter should distinguish between lower and upper case
659 * letters 661 * letters
660 * @param {string} [domains] 662 * @param {string} [domains]
661 * Domains that the filter is restricted to, e.g. "foo.com|bar.com|~baz.com" 663 * Domains that the filter is restricted to, e.g. "foo.com|bar.com|~baz.com"
662 * @param {boolean} [thirdParty] 664 * @param {boolean} [thirdParty]
663 * Defines whether the filter should apply to third-party or first-party 665 * Defines whether the filter should apply to third-party or first-party
664 * content only 666 * content only
665 * @param {string} [sitekeys] 667 * @param {string} [sitekeys]
666 * Public keys of websites that this filter should apply to 668 * Public keys of websites that this filter should apply to
667 * @constructor 669 * @param {?string} [rewrite]
670 * The (optional) rule specifying how to rewrite the URL.
668 * @augments ActiveFilter 671 * @augments ActiveFilter
669 */ 672 */
670 function RegExpFilter(text, regexpSource, contentType, matchCase, domains, 673 function RegExpFilter(text, regexpSource, contentType, matchCase, domains,
671 thirdParty, sitekeys) 674 thirdParty, sitekeys, rewrite)
672 { 675 {
673 ActiveFilter.call(this, text, domains); 676 ActiveFilter.call(this, text, domains);
674 677
675 if (contentType != null) 678 if (contentType != null)
676 this.contentType = contentType; 679 this.contentType = contentType;
677 if (matchCase) 680 if (matchCase)
678 this.matchCase = matchCase; 681 this.matchCase = matchCase;
679 if (thirdParty != null) 682 if (thirdParty != null)
680 this.thirdParty = thirdParty; 683 this.thirdParty = thirdParty;
681 if (sitekeys != null) 684 if (sitekeys != null)
682 this.sitekeySource = sitekeys; 685 this.sitekeySource = sitekeys;
683 686
684 if (regexpSource.length >= 2 && 687 if (regexpSource.length >= 2 &&
685 regexpSource[0] == "/" && 688 regexpSource[0] == "/" &&
686 regexpSource[regexpSource.length - 1] == "/") 689 regexpSource[regexpSource.length - 1] == "/")
687 { 690 {
688 // The filter is a regular expression - convert it immediately to 691 // The filter is a regular expression - convert it immediately to
689 // catch syntax errors 692 // catch syntax errors
690 let regexp = new RegExp(regexpSource.substr(1, regexpSource.length - 2), 693 let regexp = new RegExp(regexpSource.substr(1, regexpSource.length - 2),
691 this.matchCase ? "" : "i"); 694 this.matchCase ? "" : "i");
692 Object.defineProperty(this, "regexp", {value: regexp}); 695 Object.defineProperty(this, "regexp", {value: regexp});
693 } 696 }
694 else 697 else
695 { 698 {
696 // No need to convert this filter to regular expression yet, do it on demand 699 // No need to convert this filter to regular expression yet, do it on demand
697 this.pattern = regexpSource; 700 this.pattern = regexpSource;
701
702 if (!this.matchCase && rewrite == null &&
703 !/[*^|]/.test(this.pattern.replace(/^\|{2}/, "")))
704 {
705 this.pattern = this.pattern.toLowerCase();
706 }
698 } 707 }
699 } 708 }
700 exports.RegExpFilter = RegExpFilter; 709 exports.RegExpFilter = RegExpFilter;
701 710
702 RegExpFilter.prototype = extend(ActiveFilter, { 711 RegExpFilter.prototype = extend(ActiveFilter, {
703 /** 712 /**
704 * Number of filters contained, will always be 1 (required to 713 * Number of filters contained, will always be 1 (required to
705 * optimize {@link Matcher}). 714 * optimize {@link Matcher}).
706 * @type {number} 715 * @type {number}
707 */ 716 */
708 size: 1, 717 size: 1,
709 718
710 /** 719 /**
711 * @see ActiveFilter.domainSeparator 720 * @see ActiveFilter.domainSeparator
712 */ 721 */
713 domainSeparator: "|", 722 domainSeparator: "|",
714 723
715 /** 724 /**
716 * Expression from which a regular expression should be generated - 725 * Expression from which a regular expression should be generated -
717 * for delayed creation of the regexp property 726 * for delayed creation of the regexp property
718 * @type {?string} 727 * @type {?string}
719 */ 728 */
720 pattern: null, 729 pattern: null,
721 /** 730 /**
722 * Regular expression to be used when testing against this filter 731 * Regular expression to be used when testing against this filter
723 * @type {RegExp} 732 * @type {RegExp}
724 */ 733 */
725 get regexp() 734 get regexp()
726 { 735 {
727 let source = filterToRegExp(this.pattern, this.rewrite != null); 736 let value = null;
728 let regexp = new RegExp(source, this.matchCase ? "" : "i"); 737
729 Object.defineProperty(this, "regexp", {value: regexp}); 738 let {pattern, rewrite} = this;
730 return regexp; 739 if (rewrite != null || /[*^|]/.test(pattern.replace(/^\|{2}/, "")))
740 {
741 value = new RegExp(filterToRegExp(pattern, rewrite != null),
742 this.matchCase ? "" : "i");
743 }
744
745 Object.defineProperty(this, "regexp", {value});
746 return value;
731 }, 747 },
732 /** 748 /**
733 * Content types the filter applies to, combination of values from 749 * Content types the filter applies to, combination of values from
734 * RegExpFilter.typeMap 750 * RegExpFilter.typeMap
735 * @type {number} 751 * @type {number}
736 */ 752 */
737 contentType: 0x7FFFFFFF, 753 contentType: 0x7FFFFFFF,
738 /** 754 /**
739 * Defines whether the filter should distinguish between lower and 755 * Defines whether the filter should distinguish between lower and
740 * upper case letters 756 * upper case letters
(...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after
781 * @param {boolean} [thirdParty] should be true if the URL is a third-party 797 * @param {boolean} [thirdParty] should be true if the URL is a third-party
782 * request 798 * request
783 * @param {string} [sitekey] public key provided by the document 799 * @param {string} [sitekey] public key provided by the document
784 * @return {boolean} true in case of a match 800 * @return {boolean} true in case of a match
785 */ 801 */
786 matches(location, typeMask, docDomain, thirdParty, sitekey) 802 matches(location, typeMask, docDomain, thirdParty, sitekey)
787 { 803 {
788 return (this.contentType & typeMask) != 0 && 804 return (this.contentType & typeMask) != 0 &&
789 (this.thirdParty == null || this.thirdParty == thirdParty) && 805 (this.thirdParty == null || this.thirdParty == thirdParty) &&
790 this.isActiveOnDomain(docDomain, sitekey) && 806 this.isActiveOnDomain(docDomain, sitekey) &&
791 this.regexp.test(location); 807 this.matchesLocation(location);
808 },
809
810 matchesLocation(location)
811 {
812 let {regexp} = this;
813
814 if (regexp)
815 return regexp.test(location);
816
817 if (!this.matchCase)
818 location = location.toLowerCase();
819
820 let {pattern} = this;
821
822 if (pattern[0] == "|" && pattern[1] == "|")
823 {
824 let index = location.indexOf(pattern.substring(2));
825 return index != -1 && location[index] != "/" &&
826 tripleAnchorRegExp.test(location.substring(0, index));
827 }
828
829 return location.includes(pattern);
792 } 830 }
793 }); 831 });
794 832
795 /** 833 /**
796 * Yields the filter itself (required to optimize {@link Matcher}). 834 * Yields the filter itself (required to optimize {@link Matcher}).
797 * @yields {RegExpFilter} 835 * @yields {RegExpFilter}
798 */ 836 */
799 RegExpFilter.prototype[Symbol.iterator] = function*() 837 RegExpFilter.prototype[Symbol.iterator] = function*()
800 { 838 {
801 yield this; 839 yield this;
(...skipping 184 matching lines...) Expand 10 before | Expand all | Expand 10 after
986 * @param {?string} [rewrite] 1024 * @param {?string} [rewrite]
987 * The (optional) rule specifying how to rewrite the URL. See 1025 * The (optional) rule specifying how to rewrite the URL. See
988 * BlockingFilter.prototype.rewrite. 1026 * BlockingFilter.prototype.rewrite.
989 * @constructor 1027 * @constructor
990 * @augments RegExpFilter 1028 * @augments RegExpFilter
991 */ 1029 */
992 function BlockingFilter(text, regexpSource, contentType, matchCase, domains, 1030 function BlockingFilter(text, regexpSource, contentType, matchCase, domains,
993 thirdParty, sitekeys, collapse, csp, rewrite) 1031 thirdParty, sitekeys, collapse, csp, rewrite)
994 { 1032 {
995 RegExpFilter.call(this, text, regexpSource, contentType, matchCase, domains, 1033 RegExpFilter.call(this, text, regexpSource, contentType, matchCase, domains,
996 thirdParty, sitekeys); 1034 thirdParty, sitekeys, rewrite);
997 1035
998 if (collapse != null) 1036 if (collapse != null)
999 this.collapse = collapse; 1037 this.collapse = collapse;
1000 1038
1001 if (csp != null) 1039 if (csp != null)
1002 this.csp = csp; 1040 this.csp = csp;
1003 1041
1004 if (rewrite != null) 1042 if (rewrite != null)
1005 this.rewrite = rewrite; 1043 this.rewrite = rewrite;
1006 } 1044 }
(...skipping 269 matching lines...) Expand 10 before | Expand all | Expand 10 after
1276 1314
1277 /** 1315 /**
1278 * Script that should be executed 1316 * Script that should be executed
1279 * @type {string} 1317 * @type {string}
1280 */ 1318 */
1281 get script() 1319 get script()
1282 { 1320 {
1283 return this.body; 1321 return this.body;
1284 } 1322 }
1285 }); 1323 });
OLDNEW
« no previous file with comments | « no previous file | test/filterClasses.js » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld