Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Delta Between Two Patch Sets: lib/filterClasses.js

Issue 29907586: Issue 6994 - Use shortcut matching for location only filters (Closed)
Left Patch Set: Address PS1 Comments Created Oct. 20, 2018, 11:07 p.m.
Right Patch Set: Address PS4, and rebase Created Oct. 24, 2018, 8:40 p.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
Left: Side by side diff | Download
Right: Side by side diff | Download
« no previous file with change/comment | « no previous file | lib/matcher.js » ('j') | lib/matcher.js » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
LEFTRIGHT
1 /* 1 /*
2 * This file is part of Adblock Plus <https://adblockplus.org/>, 2 * This file is part of Adblock Plus <https://adblockplus.org/>,
3 * Copyright (C) 2006-present eyeo GmbH 3 * Copyright (C) 2006-present eyeo GmbH
4 * 4 *
5 * Adblock Plus is free software: you can redistribute it and/or modify 5 * Adblock Plus is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 3 as 6 * it under the terms of the GNU General Public License version 3 as
7 * published by the Free Software Foundation. 7 * published by the Free Software Foundation.
8 * 8 *
9 * Adblock Plus is distributed in the hope that it will be useful, 9 * Adblock Plus is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details. 12 * GNU General Public License for more details.
13 * 13 *
14 * You should have received a copy of the GNU General Public License 14 * You should have received a copy of the GNU General Public License
15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. 15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.
16 */ 16 */
17 17
18 "use strict"; 18 "use strict";
19 19
20 /** 20 /**
21 * @fileOverview Definition of Filter class and its subclasses. 21 * @fileOverview Definition of Filter class and its subclasses.
22 */ 22 */
23 23
24 const {filterNotifier} = require("./filterNotifier"); 24 const {filterNotifier} = require("./filterNotifier");
25 const {extend} = require("./coreUtils"); 25 const {extend} = require("./coreUtils");
26 const {filterToRegExp} = require("./common"); 26 const {filterToRegExp} = require("./common");
27 27
28 /** 28 /**
29 * Regular expression used to match the <code>||</code> prefix in an otherwise
30 * literal pattern.
31 * @type {RegExp}
32 */
33 let doubleAnchorRegExp = new RegExp(filterToRegExp("||") + "$");
34
35 /**
29 * All known unique domain sources mapped to their parsed values. 36 * All known unique domain sources mapped to their parsed values.
30 * @type {Map.<string,Map.<string,boolean>>} 37 * @type {Map.<string,Map.<string,boolean>>}
31 */ 38 */
32 let knownDomainMaps = new Map(); 39 let knownDomainMaps = new Map();
40
41 /**
42 * Checks whether the given pattern is a string of literal characters with no
43 * wildcards or any other special characters. If the pattern is prefixed with a
44 * <code>||</code> but otherwise contains no special characters, it is still
45 * considered to be a literal pattern.
46 * @param {string} pattern
47 * @returns {boolean}
48 */
49 function isLiteralPattern(pattern)
50 {
51 return !/[*^|]/.test(pattern.replace(/^\|{2}/, ""));
52 }
33 53
34 /** 54 /**
35 * Abstract base class for filters 55 * Abstract base class for filters
36 * 56 *
37 * @param {string} text string representation of the filter 57 * @param {string} text string representation of the filter
38 * @constructor 58 * @constructor
39 */ 59 */
40 function Filter(text) 60 function Filter(text)
41 { 61 {
42 this.text = text; 62 this.text = text;
(...skipping 93 matching lines...) Expand 10 before | Expand all | Expand 10 after
136 this._subscriptions = [...this._subscriptions][0]; 156 this._subscriptions = [...this._subscriptions][0];
137 } 157 }
138 else if (subscription == this._subscriptions) 158 else if (subscription == this._subscriptions)
139 { 159 {
140 this._subscriptions = null; 160 this._subscriptions = null;
141 } 161 }
142 } 162 }
143 }, 163 },
144 164
145 /** 165 /**
146 * Serializes the filter to an array of strings for writing out on the disk. 166 * Serializes the filter for writing out on disk.
147 * @param {string[]} buffer buffer to push the serialization results into 167 * @yields {string}
148 */ 168 */
149 serialize(buffer) 169 *serialize()
150 { 170 {
151 buffer.push("[Filter]"); 171 let {text} = this;
152 buffer.push("text=" + this.text); 172
173 yield "[Filter]";
174 yield "text=" + text;
153 }, 175 },
154 176
155 toString() 177 toString()
156 { 178 {
157 return this.text; 179 return this.text;
158 } 180 }
159 }; 181 };
160 182
161 /** 183 /**
162 * Cache for known filters, maps string representation to filter objects. 184 * Cache for known filters, maps string representation to filter objects.
(...skipping 156 matching lines...) Expand 10 before | Expand all | Expand 10 after
319 /** 341 /**
320 * Reason why this filter is invalid 342 * Reason why this filter is invalid
321 * @type {string} 343 * @type {string}
322 */ 344 */
323 reason: null, 345 reason: null,
324 346
325 /** 347 /**
326 * See Filter.serialize() 348 * See Filter.serialize()
327 * @inheritdoc 349 * @inheritdoc
328 */ 350 */
329 serialize(buffer) {} 351 *serialize() {}
330 }); 352 });
331 353
332 /** 354 /**
333 * Class for comments 355 * Class for comments
334 * @param {string} text see {@link Filter Filter()} 356 * @param {string} text see {@link Filter Filter()}
335 * @constructor 357 * @constructor
336 * @augments Filter 358 * @augments Filter
337 */ 359 */
338 function CommentFilter(text) 360 function CommentFilter(text)
339 { 361 {
340 Filter.call(this, text); 362 Filter.call(this, text);
341 } 363 }
342 exports.CommentFilter = CommentFilter; 364 exports.CommentFilter = CommentFilter;
343 365
344 CommentFilter.prototype = extend(Filter, { 366 CommentFilter.prototype = extend(Filter, {
345 type: "comment", 367 type: "comment",
346 368
347 /** 369 /**
348 * See Filter.serialize() 370 * See Filter.serialize()
349 * @inheritdoc 371 * @inheritdoc
350 */ 372 */
351 serialize(buffer) {} 373 *serialize() {}
352 }); 374 });
353 375
354 /** 376 /**
355 * Abstract base class for filters that can get hits 377 * Abstract base class for filters that can get hits
356 * @param {string} text 378 * @param {string} text
357 * see {@link Filter Filter()} 379 * see {@link Filter Filter()}
358 * @param {string} [domains] 380 * @param {string} [domains]
359 * Domains that the filter is restricted to separated by domainSeparator 381 * Domains that the filter is restricted to separated by domainSeparator
360 * e.g. "foo.com|bar.com|~baz.com" 382 * e.g. "foo.com|bar.com|~baz.com"
361 * @constructor 383 * @constructor
(...skipping 262 matching lines...) Expand 10 before | Expand all | Expand 10 after
624 { 646 {
625 let {sitekeys, domains} = this; 647 let {sitekeys, domains} = this;
626 648
627 return !(sitekeys && sitekeys.length) && (!domains || domains.get("")); 649 return !(sitekeys && sitekeys.length) && (!domains || domains.get(""));
628 }, 650 },
629 651
630 /** 652 /**
631 * See Filter.serialize() 653 * See Filter.serialize()
632 * @inheritdoc 654 * @inheritdoc
633 */ 655 */
634 serialize(buffer) 656 *serialize()
635 { 657 {
636 if (this._disabled || this._hitCount || this._lastHit) 658 let {_disabled, _hitCount, _lastHit} = this;
637 { 659
638 Filter.prototype.serialize.call(this, buffer); 660 if (_disabled || _hitCount || _lastHit)
639 if (this._disabled) 661 {
640 buffer.push("disabled=true"); 662 yield* Filter.prototype.serialize.call(this);
641 if (this._hitCount) 663 if (_disabled)
642 buffer.push("hitCount=" + this._hitCount); 664 yield "disabled=true";
643 if (this._lastHit) 665 if (_hitCount)
644 buffer.push("lastHit=" + this._lastHit); 666 yield "hitCount=" + _hitCount;
667 if (_lastHit)
668 yield "lastHit=" + _lastHit;
645 } 669 }
646 } 670 }
647 }); 671 });
648 672
649 /** 673 /**
650 * Abstract base class for RegExp-based filters 674 * Abstract base class for RegExp-based filters
651 * @param {string} text see {@link Filter Filter()} 675 * @param {string} text see {@link Filter Filter()}
652 * @param {string} regexpSource 676 * @param {string} regexpSource
653 * filter part that the regular expression should be build from 677 * filter part that the regular expression should be build from
654 * @param {number} [contentType] 678 * @param {number} [contentType]
(...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after
686 regexpSource[regexpSource.length - 1] == "/") 710 regexpSource[regexpSource.length - 1] == "/")
687 { 711 {
688 // The filter is a regular expression - convert it immediately to 712 // The filter is a regular expression - convert it immediately to
689 // catch syntax errors 713 // catch syntax errors
690 let regexp = new RegExp(regexpSource.substr(1, regexpSource.length - 2), 714 let regexp = new RegExp(regexpSource.substr(1, regexpSource.length - 2),
691 this.matchCase ? "" : "i"); 715 this.matchCase ? "" : "i");
692 Object.defineProperty(this, "regexp", {value: regexp}); 716 Object.defineProperty(this, "regexp", {value: regexp});
693 } 717 }
694 else 718 else
695 { 719 {
720 if (!this.matchCase && isLiteralPattern(regexpSource))
721 regexpSource = regexpSource.toLowerCase();
722
696 // No need to convert this filter to regular expression yet, do it on demand 723 // No need to convert this filter to regular expression yet, do it on demand
697 this.pattern = regexpSource; 724 this.pattern = regexpSource;
698 } 725 }
699 } 726 }
700 exports.RegExpFilter = RegExpFilter; 727 exports.RegExpFilter = RegExpFilter;
701 728
702 RegExpFilter.prototype = extend(ActiveFilter, { 729 RegExpFilter.prototype = extend(ActiveFilter, {
703 /** 730 /**
704 * Number of filters contained, will always be 1 (required to 731 * Number of filters contained, will always be 1 (required to
705 * optimize {@link Matcher}). 732 * optimize {@link Matcher}).
(...skipping 11 matching lines...) Expand all
717 * for delayed creation of the regexp property 744 * for delayed creation of the regexp property
718 * @type {?string} 745 * @type {?string}
719 */ 746 */
720 pattern: null, 747 pattern: null,
721 /** 748 /**
722 * Regular expression to be used when testing against this filter 749 * Regular expression to be used when testing against this filter
723 * @type {RegExp} 750 * @type {RegExp}
724 */ 751 */
725 get regexp() 752 get regexp()
726 { 753 {
727 let source = filterToRegExp(this.pattern, this.rewrite != null); 754 let value = null;
728 let regexp = new RegExp(source, this.matchCase ? "" : "i"); 755
729 Object.defineProperty(this, "regexp", {value: regexp}); 756 let {pattern, rewrite} = this;
730 return regexp; 757 if (rewrite != null || !isLiteralPattern(pattern))
758 {
759 value = new RegExp(filterToRegExp(pattern, rewrite != null),
760 this.matchCase ? "" : "i");
761 }
762
763 Object.defineProperty(this, "regexp", {value});
764 return value;
731 }, 765 },
732 /** 766 /**
733 * Content types the filter applies to, combination of values from 767 * Content types the filter applies to, combination of values from
734 * RegExpFilter.typeMap 768 * RegExpFilter.typeMap
735 * @type {number} 769 * @type {number}
736 */ 770 */
737 contentType: 0x7FFFFFFF, 771 contentType: 0x7FFFFFFF,
738 /** 772 /**
739 * Defines whether the filter should distinguish between lower and 773 * Defines whether the filter should distinguish between lower and
740 * upper case letters 774 * upper case letters
(...skipping 24 matching lines...) Expand all
765 { 799 {
766 sitekeys = this.sitekeySource.split("|"); 800 sitekeys = this.sitekeySource.split("|");
767 this.sitekeySource = null; 801 this.sitekeySource = null;
768 } 802 }
769 803
770 Object.defineProperty( 804 Object.defineProperty(
771 this, "sitekeys", {value: sitekeys, enumerable: true} 805 this, "sitekeys", {value: sitekeys, enumerable: true}
772 ); 806 );
773 return this.sitekeys; 807 return this.sitekeys;
774 }, 808 },
775 /** 809
776 * Tests whether the filter only has location. 810 /**
777 */ 811 * Tests whether the filter only has a location.
778 get isLocationOnly() 812 * @return {boolean}
813 */
814 isLocationOnly()
779 { 815 {
780 return this.contentType == RegExpFilter.prototype.contentType && 816 return this.contentType == RegExpFilter.prototype.contentType &&
Manish Jethani 2018/10/24 21:35:28 I haven't run this code, but I'm pretty sure that
Jon Sonesen 2018/10/24 21:46:51 Ah, I see what you mean there. I didn't look deep
781 this.thirdParty == null && !this.domains && !this.sitekeys; 817 this.thirdParty == null && !this.domains && !this.sitekeys;
782 }, 818 },
783 819
784 /** 820 /**
785 * Tests whether the URL matches this filter 821 * Tests whether the URL matches this filter
786 * @param {string} location URL to be tested 822 * @param {string} location URL to be tested
787 * @param {number} typeMask bitmask of content / request types to match 823 * @param {number} typeMask bitmask of content / request types to match
788 * @param {string} [docDomain] domain name of the document that loads the URL 824 * @param {string} [docDomain] domain name of the document that loads the URL
789 * @param {boolean} [thirdParty] should be true if the URL is a third-party 825 * @param {boolean} [thirdParty] should be true if the URL is a third-party
790 * request 826 * request
791 * @param {string} [sitekey] public key provided by the document 827 * @param {string} [sitekey] public key provided by the document
792 * @return {boolean} true in case of a match 828 * @return {boolean} true in case of a match
793 */ 829 */
794 matches(location, typeMask, docDomain, thirdParty, sitekey) 830 matches(location, typeMask, docDomain, thirdParty, sitekey)
795 { 831 {
796 return (this.contentType & typeMask) != 0 && 832 return (this.contentType & typeMask) != 0 &&
797 (this.thirdParty == null || this.thirdParty == thirdParty) && 833 (this.thirdParty == null || this.thirdParty == thirdParty) &&
798 this.isActiveOnDomain(docDomain, sitekey) && 834 this.isActiveOnDomain(docDomain, sitekey) &&
799 this.regexp.test(location); 835 this.matchesLocation(location);
800 }, 836 },
837
838 /**
839 * Checks whether the given URL matches this filter's pattern.
840 * @param {string} location The URL to check.
841 * @returns {boolean} <code>true</code> if the URL matches.
842 */
801 matchesLocation(location) 843 matchesLocation(location)
802 { 844 {
803 return RegExp(location).test(this.regexp); 845 let {regexp} = this;
Jon Sonesen 2018/10/20 23:09:48 My bad, thought I added doc strings here.
846
847 if (regexp)
848 return regexp.test(location);
849
850 if (!this.matchCase)
851 location = location.toLowerCase();
852
853 let {pattern} = this;
854
855 if (pattern[0] == "|" && pattern[1] == "|")
856 {
857 let index = location.indexOf(pattern.substring(2));
858
859 // The "||" prefix requires that the text that follows does not start
860 // with a forward slash.
861 return index != -1 && location[index] != "/" &&
862 doubleAnchorRegExp.test(location.substring(0, index));
863 }
864
865 return location.includes(pattern);
804 } 866 }
805 }); 867 });
806 868
807 /** 869 /**
808 * Yields the filter itself (required to optimize {@link Matcher}). 870 * Yields the filter itself (required to optimize {@link Matcher}).
809 * @yields {RegExpFilter} 871 * @yields {RegExpFilter}
810 */ 872 */
811 RegExpFilter.prototype[Symbol.iterator] = function*() 873 RegExpFilter.prototype[Symbol.iterator] = function*()
812 { 874 {
813 yield this; 875 yield this;
(...skipping 474 matching lines...) Expand 10 before | Expand all | Expand 10 after
1288 1350
1289 /** 1351 /**
1290 * Script that should be executed 1352 * Script that should be executed
1291 * @type {string} 1353 * @type {string}
1292 */ 1354 */
1293 get script() 1355 get script()
1294 { 1356 {
1295 return this.body; 1357 return this.body;
1296 } 1358 }
1297 }); 1359 });
LEFTRIGHT
« no previous file | lib/matcher.js » ('j') | Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Toggle Comments ('s')

Powered by Google App Engine
This is Rietveld