Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Side by Side Diff: lib/matcher.js

Issue 29998564: Issue 7260 - Internalize third-party request check in matcher (Closed) Base URL: https://hg.adblockplus.org/adblockpluscore/
Patch Set: Created Feb. 5, 2019, 4:04 a.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 * This file is part of Adblock Plus <https://adblockplus.org/>, 2 * This file is part of Adblock Plus <https://adblockplus.org/>,
3 * Copyright (C) 2006-present eyeo GmbH 3 * Copyright (C) 2006-present eyeo GmbH
4 * 4 *
5 * Adblock Plus is free software: you can redistribute it and/or modify 5 * Adblock Plus is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 3 as 6 * it under the terms of the GNU General Public License version 3 as
7 * published by the Free Software Foundation. 7 * published by the Free Software Foundation.
8 * 8 *
9 * Adblock Plus is distributed in the hope that it will be useful, 9 * Adblock Plus is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details. 12 * GNU General Public License for more details.
13 * 13 *
14 * You should have received a copy of the GNU General Public License 14 * You should have received a copy of the GNU General Public License
15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. 15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.
16 */ 16 */
17 17
18 "use strict"; 18 "use strict";
19 19
20 /** 20 /**
21 * @fileOverview Matcher class implementing matching addresses against 21 * @fileOverview Matcher class implementing matching addresses against
22 * a list of filters. 22 * a list of filters.
23 */ 23 */
24 24
25 const {RegExpFilter, WhitelistFilter} = require("./filterClasses"); 25 const {RegExpFilter, WhitelistFilter} = require("./filterClasses");
26 const {isThirdParty} = require("./domain");
26 27
27 /** 28 /**
28 * Regular expression for matching a keyword in a filter. 29 * Regular expression for matching a keyword in a filter.
29 * @type {RegExp} 30 * @type {RegExp}
30 */ 31 */
31 const keywordRegExp = /[^a-z0-9%*][a-z0-9%]{3,}(?=[^a-z0-9%*])/; 32 const keywordRegExp = /[^a-z0-9%*][a-z0-9%]{3,}(?=[^a-z0-9%*])/;
32 33
33 /** 34 /**
34 * Regular expression for matching all keywords in a filter. 35 * Regular expression for matching all keywords in a filter.
35 * @type {RegExp} 36 * @type {RegExp}
(...skipping 333 matching lines...) Expand 10 before | Expand all | Expand 10 after
369 collection.push(filter); 370 collection.push(filter);
370 } 371 }
371 } 372 }
372 } 373 }
373 374
374 return null; 375 return null;
375 } 376 }
376 377
377 /** 378 /**
378 * Tests whether the URL matches any of the known filters 379 * Tests whether the URL matches any of the known filters
379 * @param {string} location 380 * @param {URL|string} location
380 * URL to be tested 381 * URL to be tested
381 * @param {number} typeMask 382 * @param {number} typeMask
382 * bitmask of content / request types to match 383 * bitmask of content / request types to match
383 * @param {string} [docDomain] 384 * @param {string} [docDomain]
384 * domain name of the document that loads the URL 385 * domain name of the document that loads the URL
385 * @param {boolean} [thirdParty]
386 * should be true if the URL is a third-party request
387 * @param {string} [sitekey] 386 * @param {string} [sitekey]
388 * public key provided by the document 387 * public key provided by the document
389 * @param {boolean} [specificOnly] 388 * @param {boolean} [specificOnly]
390 * should be <code>true</code> if generic matches should be ignored 389 * should be <code>true</code> if generic matches should be ignored
391 * @returns {?RegExpFilter} 390 * @returns {?RegExpFilter}
392 * matching filter or <code>null</code> 391 * matching filter or <code>null</code>
393 */ 392 */
394 matchesAny(location, typeMask, docDomain, thirdParty, sitekey, specificOnly) 393 matchesAny(location, typeMask, docDomain, sitekey, specificOnly)
395 { 394 {
395 let thirdParty = docDomain && isThirdParty(location, docDomain);
Sebastian Noack 2019/02/05 04:32:53 As discussed on IRC, how about only calling isThir
Manish Jethani 2019/02/05 05:07:28 I tried this but it actually seemed to be more exp
Sebastian Noack 2019/02/05 05:21:16 Sure, if you just call isThridParty() as we perfor
Manish Jethani 2019/02/05 05:42:23 Yes, I know what you mean. I put the isThirdParty
Sebastian Noack 2019/02/05 05:54:45 Fair enough, for not further optimizing this here.
396
397 if (typeof location != "string")
398 location = location + "";
399
396 let candidates = location.toLowerCase().match(/[a-z0-9%]{3,}/g); 400 let candidates = location.toLowerCase().match(/[a-z0-9%]{3,}/g);
397 if (candidates === null) 401 if (candidates === null)
398 candidates = []; 402 candidates = [];
399 candidates.push(""); 403 candidates.push("");
404
400 for (let i = 0, l = candidates.length; i < l; i++) 405 for (let i = 0, l = candidates.length; i < l; i++)
401 { 406 {
402 let result = this.checkEntryMatch(candidates[i], location, typeMask, 407 let result = this.checkEntryMatch(candidates[i], location, typeMask,
403 docDomain, thirdParty, sitekey, 408 docDomain, thirdParty, sitekey,
404 specificOnly); 409 specificOnly);
405 if (result) 410 if (result)
406 return result; 411 return result;
407 } 412 }
408 413
409 return null; 414 return null;
(...skipping 90 matching lines...) Expand 10 before | Expand all | Expand 10 after
500 } 505 }
501 506
502 /** 507 /**
503 * Optimized filter matching testing both whitelist and blacklist matchers 508 * Optimized filter matching testing both whitelist and blacklist matchers
504 * simultaneously. For parameters see 509 * simultaneously. For parameters see
505 {@link Matcher#matchesAny Matcher.matchesAny()}. 510 {@link Matcher#matchesAny Matcher.matchesAny()}.
506 * @see Matcher#matchesAny 511 * @see Matcher#matchesAny
507 * @inheritdoc 512 * @inheritdoc
508 * @private 513 * @private
509 */ 514 */
510 _matchesAnyInternal(location, typeMask, docDomain, thirdParty, sitekey, 515 _matchesAnyInternal(location, typeMask, docDomain, sitekey, specificOnly)
511 specificOnly)
512 { 516 {
517 let thirdParty = docDomain && isThirdParty(location, docDomain);
518
519 if (typeof location != "string")
520 location = location + "";
521
513 let candidates = location.toLowerCase().match(/[a-z0-9%]{3,}/g); 522 let candidates = location.toLowerCase().match(/[a-z0-9%]{3,}/g);
514 if (candidates === null) 523 if (candidates === null)
515 candidates = []; 524 candidates = [];
516 525
517 // The first keyword in a URL is the protocol (usually "https" or "http"). 526 // The first keyword in a URL is the protocol (usually "https" or "http").
518 // This is an outlier: it has hundreds of filters typically, yet it rarely 527 // This is an outlier: it has hundreds of filters typically, yet it rarely
519 // ever has a match. We cut down the amount of processing for blocked URLs 528 // ever has a match. We cut down the amount of processing for blocked URLs
520 // significantly by moving it to the end of the list. 529 // significantly by moving it to the end of the list.
521 if (candidates.length > 1) 530 if (candidates.length > 1)
522 candidates.push(candidates.shift()); 531 candidates.push(candidates.shift());
(...skipping 24 matching lines...) Expand all
547 { 556 {
548 whitelistHit = this._whitelist.checkEntryMatch(candidates[i], location, 557 whitelistHit = this._whitelist.checkEntryMatch(candidates[i], location,
549 typeMask, docDomain, 558 typeMask, docDomain,
550 thirdParty, sitekey); 559 thirdParty, sitekey);
551 } 560 }
552 } 561 }
553 562
554 return whitelistHit || blacklistHit; 563 return whitelistHit || blacklistHit;
555 } 564 }
556 565
557 _searchInternal(location, typeMask, docDomain, thirdParty, sitekey, 566 _searchInternal(location, typeMask, docDomain, sitekey, specificOnly,
558 specificOnly, filterType) 567 filterType)
559 { 568 {
560 let hits = {}; 569 let hits = {};
561 570
562 let searchBlocking = filterType == "blocking" || filterType == "all"; 571 let searchBlocking = filterType == "blocking" || filterType == "all";
563 let searchWhitelist = filterType == "whitelist" || filterType == "all"; 572 let searchWhitelist = filterType == "whitelist" || filterType == "all";
564 573
565 if (searchBlocking) 574 if (searchBlocking)
566 hits.blocking = []; 575 hits.blocking = [];
567 576
568 if (searchWhitelist) 577 if (searchWhitelist)
569 hits.whitelist = []; 578 hits.whitelist = [];
570 579
571 // If the type mask includes no types other than whitelist-only types, we 580 // If the type mask includes no types other than whitelist-only types, we
572 // can skip the blacklist. 581 // can skip the blacklist.
573 if ((typeMask & ~WHITELIST_ONLY_TYPES) == 0) 582 if ((typeMask & ~WHITELIST_ONLY_TYPES) == 0)
574 searchBlocking = false; 583 searchBlocking = false;
575 584
585 let thirdParty = docDomain && isThirdParty(location, docDomain);
586
587 if (typeof location != "string")
588 location = location + "";
589
576 let candidates = location.toLowerCase().match(/[a-z0-9%]{3,}/g); 590 let candidates = location.toLowerCase().match(/[a-z0-9%]{3,}/g);
577 if (candidates === null) 591 if (candidates === null)
578 candidates = []; 592 candidates = [];
579 candidates.push(""); 593 candidates.push("");
580 594
581 for (let i = 0, l = candidates.length; i < l; i++) 595 for (let i = 0, l = candidates.length; i < l; i++)
582 { 596 {
583 if (searchBlocking) 597 if (searchBlocking)
584 { 598 {
585 this._blacklist.checkEntryMatch(candidates[i], location, typeMask, 599 this._blacklist.checkEntryMatch(candidates[i], location, typeMask,
586 docDomain, thirdParty, sitekey, 600 docDomain, thirdParty, sitekey,
587 specificOnly, hits.blocking); 601 specificOnly, hits.blocking);
588 } 602 }
589 603
590 if (searchWhitelist) 604 if (searchWhitelist)
591 { 605 {
592 this._whitelist.checkEntryMatch(candidates[i], location, typeMask, 606 this._whitelist.checkEntryMatch(candidates[i], location, typeMask,
593 docDomain, thirdParty, sitekey, 607 docDomain, thirdParty, sitekey,
594 false, hits.whitelist); 608 false, hits.whitelist);
595 } 609 }
596 } 610 }
597 611
598 return hits; 612 return hits;
599 } 613 }
600 614
601 /** 615 /**
602 * @see Matcher#matchesAny 616 * @see Matcher#matchesAny
603 * @inheritdoc 617 * @inheritdoc
604 */ 618 */
605 matchesAny(location, typeMask, docDomain, thirdParty, sitekey, specificOnly) 619 matchesAny(location, typeMask, docDomain, sitekey, specificOnly)
606 { 620 {
607 let key = location + " " + typeMask + " " + docDomain + " " + thirdParty + 621 let key = location + " " + typeMask + " " + docDomain + " " + sitekey +
608 " " + sitekey + " " + specificOnly; 622 " " + specificOnly;
609 623
610 let result = this._resultCache.get(key); 624 let result = this._resultCache.get(key);
611 if (typeof result != "undefined") 625 if (typeof result != "undefined")
612 return result; 626 return result;
613 627
614 result = this._matchesAnyInternal(location, typeMask, docDomain, 628 result = this._matchesAnyInternal(location, typeMask, docDomain,
615 thirdParty, sitekey, specificOnly); 629 sitekey, specificOnly);
616 630
617 if (this._resultCache.size >= this.maxCacheEntries) 631 if (this._resultCache.size >= this.maxCacheEntries)
618 this._resultCache.clear(); 632 this._resultCache.clear();
619 633
620 this._resultCache.set(key, result); 634 this._resultCache.set(key, result);
621 635
622 return result; 636 return result;
623 } 637 }
624 638
625 /** 639 /**
626 * @typedef {object} MatcherSearchResults 640 * @typedef {object} MatcherSearchResults
627 * @property {Array.<BlockingFilter>} [blocking] List of blocking filters 641 * @property {Array.<BlockingFilter>} [blocking] List of blocking filters
628 * found. 642 * found.
629 * @property {Array.<WhitelistFilter>} [whitelist] List of whitelist filters 643 * @property {Array.<WhitelistFilter>} [whitelist] List of whitelist filters
630 * found. 644 * found.
631 */ 645 */
632 646
633 /** 647 /**
634 * Searches all blocking and whitelist filters and returns results matching 648 * Searches all blocking and whitelist filters and returns results matching
635 * the given parameters. 649 * the given parameters.
636 * 650 *
637 * @param {string} location 651 * @param {URL|string} location
638 * @param {number} typeMask 652 * @param {number} typeMask
639 * @param {string} [docDomain] 653 * @param {string} [docDomain]
640 * @param {boolean} [thirdParty]
641 * @param {string} [sitekey] 654 * @param {string} [sitekey]
642 * @param {boolean} [specificOnly] 655 * @param {boolean} [specificOnly]
643 * @param {string} [filterType] The types of filters to look for. This can be 656 * @param {string} [filterType] The types of filters to look for. This can be
644 * <code>"blocking"</code>, <code>"whitelist"</code>, or 657 * <code>"blocking"</code>, <code>"whitelist"</code>, or
645 * <code>"all"</code> (default). 658 * <code>"all"</code> (default).
646 * 659 *
647 * @returns {MatcherSearchResults} 660 * @returns {MatcherSearchResults}
648 */ 661 */
649 search(location, typeMask, docDomain, thirdParty, sitekey, specificOnly, 662 search(location, typeMask, docDomain, sitekey, specificOnly,
650 filterType = "all") 663 filterType = "all")
651 { 664 {
652 let key = "* " + location + " " + typeMask + " " + docDomain + " " + 665 let key = "* " + location + " " + typeMask + " " + docDomain + " " +
653 thirdParty + " " + sitekey + " " + specificOnly + " " + 666 sitekey + " " + specificOnly + " " + filterType;
654 filterType;
655 667
656 let result = this._resultCache.get(key); 668 let result = this._resultCache.get(key);
657 if (typeof result != "undefined") 669 if (typeof result != "undefined")
658 return result; 670 return result;
659 671
660 result = this._searchInternal(location, typeMask, docDomain, thirdParty, 672 result = this._searchInternal(location, typeMask, docDomain, sitekey,
661 sitekey, specificOnly, filterType); 673 specificOnly, filterType);
662 674
663 if (this._resultCache.size >= this.maxCacheEntries) 675 if (this._resultCache.size >= this.maxCacheEntries)
664 this._resultCache.clear(); 676 this._resultCache.clear();
665 677
666 this._resultCache.set(key, result); 678 this._resultCache.set(key, result);
667 679
668 return result; 680 return result;
669 } 681 }
670 682
671 /** 683 /**
672 * Tests whether the URL is whitelisted 684 * Tests whether the URL is whitelisted
673 * @see Matcher#matchesAny 685 * @see Matcher#matchesAny
674 * @inheritdoc 686 * @inheritdoc
675 * @returns {boolean} 687 * @returns {boolean}
676 */ 688 */
677 isWhitelisted(location, typeMask, docDomain, thirdParty, sitekey, 689 isWhitelisted(location, typeMask, docDomain, sitekey, specificOnly)
678 specificOnly)
679 { 690 {
680 return !!this._whitelist.matchesAny(location, typeMask, docDomain, 691 return !!this._whitelist.matchesAny(location, typeMask, docDomain, sitekey,
681 thirdParty, sitekey, specificOnly); 692 specificOnly);
682 } 693 }
683 } 694 }
684 695
685 exports.CombinedMatcher = CombinedMatcher; 696 exports.CombinedMatcher = CombinedMatcher;
686 697
687 /** 698 /**
688 * Shared {@link CombinedMatcher} instance that should usually be used. 699 * Shared {@link CombinedMatcher} instance that should usually be used.
689 * @type {CombinedMatcher} 700 * @type {CombinedMatcher}
690 */ 701 */
691 let defaultMatcher = new CombinedMatcher(); 702 let defaultMatcher = new CombinedMatcher();
692 703
693 exports.defaultMatcher = defaultMatcher; 704 exports.defaultMatcher = defaultMatcher;
OLDNEW
« lib/domain.js ('K') | « lib/domain.js ('k') | test/matcher.js » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld