Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Delta Between Two Patch Sets: lib/filterClasses.js

Issue 29912636: Issue 7052 - Use string-based matching for literal patterns (Closed) Base URL: https://hg.adblockplus.org/adblockpluscore/
Left Patch Set: Created Oct. 17, 2018, 1:34 a.m.
Right Patch Set: Move code back to lib/filterClasses.js Created Oct. 21, 2018, 11:46 a.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
Left: Side by side diff | Download
Right: Side by side diff | Download
« no previous file with change/comment | « no previous file | test/filterClasses.js » ('j') | no next file with change/comment »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
LEFTRIGHT
1 /* 1 /*
2 * This file is part of Adblock Plus <https://adblockplus.org/>, 2 * This file is part of Adblock Plus <https://adblockplus.org/>,
3 * Copyright (C) 2006-present eyeo GmbH 3 * Copyright (C) 2006-present eyeo GmbH
4 * 4 *
5 * Adblock Plus is free software: you can redistribute it and/or modify 5 * Adblock Plus is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 3 as 6 * it under the terms of the GNU General Public License version 3 as
7 * published by the Free Software Foundation. 7 * published by the Free Software Foundation.
8 * 8 *
9 * Adblock Plus is distributed in the hope that it will be useful, 9 * Adblock Plus is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details. 12 * GNU General Public License for more details.
13 * 13 *
14 * You should have received a copy of the GNU General Public License 14 * You should have received a copy of the GNU General Public License
15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. 15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.
16 */ 16 */
17 17
18 "use strict"; 18 "use strict";
19 19
20 /** 20 /**
21 * @fileOverview Definition of Filter class and its subclasses. 21 * @fileOverview Definition of Filter class and its subclasses.
22 */ 22 */
23 23
24 const {filterNotifier} = require("./filterNotifier"); 24 const {filterNotifier} = require("./filterNotifier");
25 const {extend} = require("./coreUtils"); 25 const {extend} = require("./coreUtils");
26 const {filterToRegExp} = require("./common"); 26 const {filterToRegExp} = require("./common");
27 27
28 let tripleAnchorRegExp = new RegExp(filterToRegExp("|||")); 28 /**
29 * Regular expression used to match the <code>||</code> prefix in an otherwise
30 * literal pattern.
31 * @type {RegExp}
32 */
33 let doubleAnchorRegExp = new RegExp(filterToRegExp("||") + "$");
29 34
30 /** 35 /**
31 * All known unique domain sources mapped to their parsed values. 36 * All known unique domain sources mapped to their parsed values.
32 * @type {Map.<string,Map.<string,boolean>>} 37 * @type {Map.<string,Map.<string,boolean>>}
33 */ 38 */
34 let knownDomainMaps = new Map(); 39 let knownDomainMaps = new Map();
40
41 /**
42 * Checks whether the given pattern is a string of literal characters with no
43 * wildcards or any other special characters. If the pattern is prefixed with a
44 * <code>||</code> but otherwise contains no special characters, it is still
45 * considered to be a literal pattern.
46 * @param {string} pattern
47 * @returns {boolean}
48 */
49 function isLiteralPattern(pattern)
50 {
51 return !/[*^|]/.test(pattern.replace(/^\|{2}/, ""));
52 }
35 53
36 /** 54 /**
37 * Abstract base class for filters 55 * Abstract base class for filters
38 * 56 *
39 * @param {string} text string representation of the filter 57 * @param {string} text string representation of the filter
40 * @constructor 58 * @constructor
41 */ 59 */
42 function Filter(text) 60 function Filter(text)
43 { 61 {
44 this.text = text; 62 this.text = text;
(...skipping 614 matching lines...) Expand 10 before | Expand all | Expand 10 after
659 * @param {boolean} [matchCase] 677 * @param {boolean} [matchCase]
660 * Defines whether the filter should distinguish between lower and upper case 678 * Defines whether the filter should distinguish between lower and upper case
661 * letters 679 * letters
662 * @param {string} [domains] 680 * @param {string} [domains]
663 * Domains that the filter is restricted to, e.g. "foo.com|bar.com|~baz.com" 681 * Domains that the filter is restricted to, e.g. "foo.com|bar.com|~baz.com"
664 * @param {boolean} [thirdParty] 682 * @param {boolean} [thirdParty]
665 * Defines whether the filter should apply to third-party or first-party 683 * Defines whether the filter should apply to third-party or first-party
666 * content only 684 * content only
667 * @param {string} [sitekeys] 685 * @param {string} [sitekeys]
668 * Public keys of websites that this filter should apply to 686 * Public keys of websites that this filter should apply to
669 * @param {?string} [rewrite] 687 * @constructor
670 * The (optional) rule specifying how to rewrite the URL.
671 * @augments ActiveFilter 688 * @augments ActiveFilter
672 */ 689 */
673 function RegExpFilter(text, regexpSource, contentType, matchCase, domains, 690 function RegExpFilter(text, regexpSource, contentType, matchCase, domains,
674 thirdParty, sitekeys, rewrite) 691 thirdParty, sitekeys)
675 { 692 {
676 ActiveFilter.call(this, text, domains); 693 ActiveFilter.call(this, text, domains);
677 694
678 if (contentType != null) 695 if (contentType != null)
679 this.contentType = contentType; 696 this.contentType = contentType;
680 if (matchCase) 697 if (matchCase)
681 this.matchCase = matchCase; 698 this.matchCase = matchCase;
682 if (thirdParty != null) 699 if (thirdParty != null)
683 this.thirdParty = thirdParty; 700 this.thirdParty = thirdParty;
684 if (sitekeys != null) 701 if (sitekeys != null)
685 this.sitekeySource = sitekeys; 702 this.sitekeySource = sitekeys;
686 703
687 if (regexpSource.length >= 2 && 704 if (regexpSource.length >= 2 &&
688 regexpSource[0] == "/" && 705 regexpSource[0] == "/" &&
689 regexpSource[regexpSource.length - 1] == "/") 706 regexpSource[regexpSource.length - 1] == "/")
690 { 707 {
691 // The filter is a regular expression - convert it immediately to 708 // The filter is a regular expression - convert it immediately to
692 // catch syntax errors 709 // catch syntax errors
693 let regexp = new RegExp(regexpSource.substr(1, regexpSource.length - 2), 710 let regexp = new RegExp(regexpSource.substr(1, regexpSource.length - 2),
694 this.matchCase ? "" : "i"); 711 this.matchCase ? "" : "i");
695 Object.defineProperty(this, "regexp", {value: regexp}); 712 Object.defineProperty(this, "regexp", {value: regexp});
696 } 713 }
697 else 714 else
698 { 715 {
716 if (!this.matchCase && isLiteralPattern(regexpSource))
717 regexpSource = regexpSource.toLowerCase();
718
699 // No need to convert this filter to regular expression yet, do it on demand 719 // No need to convert this filter to regular expression yet, do it on demand
700 this.pattern = regexpSource; 720 this.pattern = regexpSource;
701
702 if (rewrite == null)
703 {
704 this.pattern = this.pattern.replace(/^\**/, "").replace(/\**$/, "");
705
706 if (!this.matchCase)
707 this.pattern = this.pattern.toLowerCase();
708 }
709 } 721 }
710 } 722 }
711 exports.RegExpFilter = RegExpFilter; 723 exports.RegExpFilter = RegExpFilter;
712 724
713 RegExpFilter.prototype = extend(ActiveFilter, { 725 RegExpFilter.prototype = extend(ActiveFilter, {
714 /** 726 /**
715 * Number of filters contained, will always be 1 (required to 727 * Number of filters contained, will always be 1 (required to
716 * optimize {@link Matcher}). 728 * optimize {@link Matcher}).
717 * @type {number} 729 * @type {number}
718 */ 730 */
(...skipping 12 matching lines...) Expand all
731 pattern: null, 743 pattern: null,
732 /** 744 /**
733 * Regular expression to be used when testing against this filter 745 * Regular expression to be used when testing against this filter
734 * @type {RegExp} 746 * @type {RegExp}
735 */ 747 */
736 get regexp() 748 get regexp()
737 { 749 {
738 let value = null; 750 let value = null;
739 751
740 let {pattern, rewrite} = this; 752 let {pattern, rewrite} = this;
741 if (rewrite != null || /[*^|]/.test(pattern.replace(/^\|{1,2}/, ""))) 753 if (rewrite != null || !isLiteralPattern(pattern))
742 { 754 {
743 value = new RegExp(filterToRegExp(pattern, rewrite != null), 755 value = new RegExp(filterToRegExp(pattern, rewrite != null),
744 this.matchCase ? "" : "i"); 756 this.matchCase ? "" : "i");
745 } 757 }
746 758
747 Object.defineProperty(this, "regexp", {value}); 759 Object.defineProperty(this, "regexp", {value});
748 return value; 760 return value;
749 }, 761 },
750 /** 762 /**
751 * Content types the filter applies to, combination of values from 763 * Content types the filter applies to, combination of values from
(...skipping 50 matching lines...) Expand 10 before | Expand all | Expand 10 after
802 * @return {boolean} true in case of a match 814 * @return {boolean} true in case of a match
803 */ 815 */
804 matches(location, typeMask, docDomain, thirdParty, sitekey) 816 matches(location, typeMask, docDomain, thirdParty, sitekey)
805 { 817 {
806 return (this.contentType & typeMask) != 0 && 818 return (this.contentType & typeMask) != 0 &&
807 (this.thirdParty == null || this.thirdParty == thirdParty) && 819 (this.thirdParty == null || this.thirdParty == thirdParty) &&
808 this.isActiveOnDomain(docDomain, sitekey) && 820 this.isActiveOnDomain(docDomain, sitekey) &&
809 this.matchesLocation(location); 821 this.matchesLocation(location);
810 }, 822 },
811 823
824 /**
825 * Checks whether the given URL matches this filter's pattern.
826 * @param {string} location The URL to check.
827 * @returns {boolean} <code>true</code> if the URL matches.
828 */
812 matchesLocation(location) 829 matchesLocation(location)
813 { 830 {
814 let {regexp} = this; 831 let {regexp} = this;
815 832
816 if (regexp) 833 if (regexp)
817 return regexp.test(location); 834 return regexp.test(location);
818 835
819 if (!this.matchCase) 836 if (!this.matchCase)
820 location = location.toLowerCase(); 837 location = location.toLowerCase();
821 838
822 let {pattern} = this; 839 let {pattern} = this;
823 840
824 if (pattern[0] == "|") 841 if (pattern[0] == "|" && pattern[1] == "|")
825 { 842 {
826 if (pattern[1] == "|") 843 let index = location.indexOf(pattern.substring(2));
827 { 844
828 let index = location.indexOf(pattern.substring(2)); 845 // The "||" prefix requires that the text that follows does not start
829 return index != -1 && location[index] != "/" && 846 // with a forward slash.
830 tripleAnchorRegExp.test(location.substring(0, index)); 847 return index != -1 && location[index] != "/" &&
831 } 848 doubleAnchorRegExp.test(location.substring(0, index));
832
833 return location.startsWith(pattern.substring(1));
834 } 849 }
835 850
836 return location.includes(pattern); 851 return location.includes(pattern);
837 } 852 }
838 }); 853 });
839 854
840 /** 855 /**
841 * Yields the filter itself (required to optimize {@link Matcher}). 856 * Yields the filter itself (required to optimize {@link Matcher}).
842 * @yields {RegExpFilter} 857 * @yields {RegExpFilter}
843 */ 858 */
(...skipping 187 matching lines...) Expand 10 before | Expand all | Expand 10 after
1031 * @param {?string} [rewrite] 1046 * @param {?string} [rewrite]
1032 * The (optional) rule specifying how to rewrite the URL. See 1047 * The (optional) rule specifying how to rewrite the URL. See
1033 * BlockingFilter.prototype.rewrite. 1048 * BlockingFilter.prototype.rewrite.
1034 * @constructor 1049 * @constructor
1035 * @augments RegExpFilter 1050 * @augments RegExpFilter
1036 */ 1051 */
1037 function BlockingFilter(text, regexpSource, contentType, matchCase, domains, 1052 function BlockingFilter(text, regexpSource, contentType, matchCase, domains,
1038 thirdParty, sitekeys, collapse, csp, rewrite) 1053 thirdParty, sitekeys, collapse, csp, rewrite)
1039 { 1054 {
1040 RegExpFilter.call(this, text, regexpSource, contentType, matchCase, domains, 1055 RegExpFilter.call(this, text, regexpSource, contentType, matchCase, domains,
1041 thirdParty, sitekeys, rewrite); 1056 thirdParty, sitekeys);
1042 1057
1043 if (collapse != null) 1058 if (collapse != null)
1044 this.collapse = collapse; 1059 this.collapse = collapse;
1045 1060
1046 if (csp != null) 1061 if (csp != null)
1047 this.csp = csp; 1062 this.csp = csp;
1048 1063
1049 if (rewrite != null) 1064 if (rewrite != null)
1050 this.rewrite = rewrite; 1065 this.rewrite = rewrite;
1051 } 1066 }
(...skipping 269 matching lines...) Expand 10 before | Expand all | Expand 10 after
1321 1336
1322 /** 1337 /**
1323 * Script that should be executed 1338 * Script that should be executed
1324 * @type {string} 1339 * @type {string}
1325 */ 1340 */
1326 get script() 1341 get script()
1327 { 1342 {
1328 return this.body; 1343 return this.body;
1329 } 1344 }
1330 }); 1345 });
LEFTRIGHT

Powered by Google App Engine
This is Rietveld