Left: | ||
Right: |
OLD | NEW |
---|---|
1 /* | 1 /* |
2 * This file is part of Adblock Plus <https://adblockplus.org/>, | 2 * This file is part of Adblock Plus <https://adblockplus.org/>, |
3 * Copyright (C) 2006-present eyeo GmbH | 3 * Copyright (C) 2006-present eyeo GmbH |
4 * | 4 * |
5 * Adblock Plus is free software: you can redistribute it and/or modify | 5 * Adblock Plus is free software: you can redistribute it and/or modify |
6 * it under the terms of the GNU General Public License version 3 as | 6 * it under the terms of the GNU General Public License version 3 as |
7 * published by the Free Software Foundation. | 7 * published by the Free Software Foundation. |
8 * | 8 * |
9 * Adblock Plus is distributed in the hope that it will be useful, | 9 * Adblock Plus is distributed in the hope that it will be useful, |
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of | 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
12 * GNU General Public License for more details. | 12 * GNU General Public License for more details. |
13 * | 13 * |
14 * You should have received a copy of the GNU General Public License | 14 * You should have received a copy of the GNU General Public License |
15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. | 15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. |
16 */ | 16 */ |
17 | 17 |
18 "use strict"; | 18 "use strict"; |
19 | 19 |
20 /** | 20 /** |
21 * @fileOverview Matcher class implementing matching addresses against | 21 * @fileOverview Matcher class implementing matching addresses against |
22 * a list of filters. | 22 * a list of filters. |
23 */ | 23 */ |
24 | 24 |
25 const {RegExpFilter, WhitelistFilter} = require("./filterClasses"); | 25 const {RegExpFilter, WhitelistFilter} = require("./filterClasses"); |
26 const {isThirdParty} = require("./domain"); | |
26 | 27 |
27 /** | 28 /** |
28 * Regular expression for matching a keyword in a filter. | 29 * Regular expression for matching a keyword in a filter. |
29 * @type {RegExp} | 30 * @type {RegExp} |
30 */ | 31 */ |
31 const keywordRegExp = /[^a-z0-9%*][a-z0-9%]{3,}(?=[^a-z0-9%*])/; | 32 const keywordRegExp = /[^a-z0-9%*][a-z0-9%]{3,}(?=[^a-z0-9%*])/; |
32 | 33 |
33 /** | 34 /** |
34 * Regular expression for matching all keywords in a filter. | 35 * Regular expression for matching all keywords in a filter. |
35 * @type {RegExp} | 36 * @type {RegExp} |
(...skipping 333 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
369 collection.push(filter); | 370 collection.push(filter); |
370 } | 371 } |
371 } | 372 } |
372 } | 373 } |
373 | 374 |
374 return null; | 375 return null; |
375 } | 376 } |
376 | 377 |
377 /** | 378 /** |
378 * Tests whether the URL matches any of the known filters | 379 * Tests whether the URL matches any of the known filters |
379 * @param {string} location | 380 * @param {URL|string} location |
380 * URL to be tested | 381 * URL to be tested |
381 * @param {number} typeMask | 382 * @param {number} typeMask |
382 * bitmask of content / request types to match | 383 * bitmask of content / request types to match |
383 * @param {string} [docDomain] | 384 * @param {string} [docDomain] |
384 * domain name of the document that loads the URL | 385 * domain name of the document that loads the URL |
385 * @param {boolean} [thirdParty] | |
386 * should be true if the URL is a third-party request | |
387 * @param {string} [sitekey] | 386 * @param {string} [sitekey] |
388 * public key provided by the document | 387 * public key provided by the document |
389 * @param {boolean} [specificOnly] | 388 * @param {boolean} [specificOnly] |
390 * should be <code>true</code> if generic matches should be ignored | 389 * should be <code>true</code> if generic matches should be ignored |
391 * @returns {?RegExpFilter} | 390 * @returns {?RegExpFilter} |
392 * matching filter or <code>null</code> | 391 * matching filter or <code>null</code> |
393 */ | 392 */ |
394 matchesAny(location, typeMask, docDomain, thirdParty, sitekey, specificOnly) | 393 matchesAny(location, typeMask, docDomain, sitekey, specificOnly) |
395 { | 394 { |
395 let thirdParty = docDomain && isThirdParty(location, docDomain); | |
Sebastian Noack
2019/02/05 04:32:53
As discussed on IRC, how about only calling isThir
Manish Jethani
2019/02/05 05:07:28
I tried this but it actually seemed to be more exp
Sebastian Noack
2019/02/05 05:21:16
Sure, if you just call isThridParty() as we perfor
Manish Jethani
2019/02/05 05:42:23
Yes, I know what you mean. I put the isThirdParty
Sebastian Noack
2019/02/05 05:54:45
Fair enough, for not further optimizing this here.
| |
396 | |
397 if (typeof location != "string") | |
398 location = location + ""; | |
399 | |
396 let candidates = location.toLowerCase().match(/[a-z0-9%]{3,}/g); | 400 let candidates = location.toLowerCase().match(/[a-z0-9%]{3,}/g); |
397 if (candidates === null) | 401 if (candidates === null) |
398 candidates = []; | 402 candidates = []; |
399 candidates.push(""); | 403 candidates.push(""); |
404 | |
400 for (let i = 0, l = candidates.length; i < l; i++) | 405 for (let i = 0, l = candidates.length; i < l; i++) |
401 { | 406 { |
402 let result = this.checkEntryMatch(candidates[i], location, typeMask, | 407 let result = this.checkEntryMatch(candidates[i], location, typeMask, |
403 docDomain, thirdParty, sitekey, | 408 docDomain, thirdParty, sitekey, |
404 specificOnly); | 409 specificOnly); |
405 if (result) | 410 if (result) |
406 return result; | 411 return result; |
407 } | 412 } |
408 | 413 |
409 return null; | 414 return null; |
(...skipping 90 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
500 } | 505 } |
501 | 506 |
502 /** | 507 /** |
503 * Optimized filter matching testing both whitelist and blacklist matchers | 508 * Optimized filter matching testing both whitelist and blacklist matchers |
504 * simultaneously. For parameters see | 509 * simultaneously. For parameters see |
505 {@link Matcher#matchesAny Matcher.matchesAny()}. | 510 {@link Matcher#matchesAny Matcher.matchesAny()}. |
506 * @see Matcher#matchesAny | 511 * @see Matcher#matchesAny |
507 * @inheritdoc | 512 * @inheritdoc |
508 * @private | 513 * @private |
509 */ | 514 */ |
510 _matchesAnyInternal(location, typeMask, docDomain, thirdParty, sitekey, | 515 _matchesAnyInternal(location, typeMask, docDomain, sitekey, specificOnly) |
511 specificOnly) | |
512 { | 516 { |
517 let thirdParty = docDomain && isThirdParty(location, docDomain); | |
518 | |
519 if (typeof location != "string") | |
520 location = location + ""; | |
521 | |
513 let candidates = location.toLowerCase().match(/[a-z0-9%]{3,}/g); | 522 let candidates = location.toLowerCase().match(/[a-z0-9%]{3,}/g); |
514 if (candidates === null) | 523 if (candidates === null) |
515 candidates = []; | 524 candidates = []; |
516 | 525 |
517 // The first keyword in a URL is the protocol (usually "https" or "http"). | 526 // The first keyword in a URL is the protocol (usually "https" or "http"). |
518 // This is an outlier: it has hundreds of filters typically, yet it rarely | 527 // This is an outlier: it has hundreds of filters typically, yet it rarely |
519 // ever has a match. We cut down the amount of processing for blocked URLs | 528 // ever has a match. We cut down the amount of processing for blocked URLs |
520 // significantly by moving it to the end of the list. | 529 // significantly by moving it to the end of the list. |
521 if (candidates.length > 1) | 530 if (candidates.length > 1) |
522 candidates.push(candidates.shift()); | 531 candidates.push(candidates.shift()); |
(...skipping 24 matching lines...) Expand all Loading... | |
547 { | 556 { |
548 whitelistHit = this._whitelist.checkEntryMatch(candidates[i], location, | 557 whitelistHit = this._whitelist.checkEntryMatch(candidates[i], location, |
549 typeMask, docDomain, | 558 typeMask, docDomain, |
550 thirdParty, sitekey); | 559 thirdParty, sitekey); |
551 } | 560 } |
552 } | 561 } |
553 | 562 |
554 return whitelistHit || blacklistHit; | 563 return whitelistHit || blacklistHit; |
555 } | 564 } |
556 | 565 |
557 _searchInternal(location, typeMask, docDomain, thirdParty, sitekey, | 566 _searchInternal(location, typeMask, docDomain, sitekey, specificOnly, |
558 specificOnly, filterType) | 567 filterType) |
559 { | 568 { |
560 let hits = {}; | 569 let hits = {}; |
561 | 570 |
562 let searchBlocking = filterType == "blocking" || filterType == "all"; | 571 let searchBlocking = filterType == "blocking" || filterType == "all"; |
563 let searchWhitelist = filterType == "whitelist" || filterType == "all"; | 572 let searchWhitelist = filterType == "whitelist" || filterType == "all"; |
564 | 573 |
565 if (searchBlocking) | 574 if (searchBlocking) |
566 hits.blocking = []; | 575 hits.blocking = []; |
567 | 576 |
568 if (searchWhitelist) | 577 if (searchWhitelist) |
569 hits.whitelist = []; | 578 hits.whitelist = []; |
570 | 579 |
571 // If the type mask includes no types other than whitelist-only types, we | 580 // If the type mask includes no types other than whitelist-only types, we |
572 // can skip the blacklist. | 581 // can skip the blacklist. |
573 if ((typeMask & ~WHITELIST_ONLY_TYPES) == 0) | 582 if ((typeMask & ~WHITELIST_ONLY_TYPES) == 0) |
574 searchBlocking = false; | 583 searchBlocking = false; |
575 | 584 |
585 let thirdParty = docDomain && isThirdParty(location, docDomain); | |
586 | |
587 if (typeof location != "string") | |
588 location = location + ""; | |
589 | |
576 let candidates = location.toLowerCase().match(/[a-z0-9%]{3,}/g); | 590 let candidates = location.toLowerCase().match(/[a-z0-9%]{3,}/g); |
577 if (candidates === null) | 591 if (candidates === null) |
578 candidates = []; | 592 candidates = []; |
579 candidates.push(""); | 593 candidates.push(""); |
580 | 594 |
581 for (let i = 0, l = candidates.length; i < l; i++) | 595 for (let i = 0, l = candidates.length; i < l; i++) |
582 { | 596 { |
583 if (searchBlocking) | 597 if (searchBlocking) |
584 { | 598 { |
585 this._blacklist.checkEntryMatch(candidates[i], location, typeMask, | 599 this._blacklist.checkEntryMatch(candidates[i], location, typeMask, |
586 docDomain, thirdParty, sitekey, | 600 docDomain, thirdParty, sitekey, |
587 specificOnly, hits.blocking); | 601 specificOnly, hits.blocking); |
588 } | 602 } |
589 | 603 |
590 if (searchWhitelist) | 604 if (searchWhitelist) |
591 { | 605 { |
592 this._whitelist.checkEntryMatch(candidates[i], location, typeMask, | 606 this._whitelist.checkEntryMatch(candidates[i], location, typeMask, |
593 docDomain, thirdParty, sitekey, | 607 docDomain, thirdParty, sitekey, |
594 false, hits.whitelist); | 608 false, hits.whitelist); |
595 } | 609 } |
596 } | 610 } |
597 | 611 |
598 return hits; | 612 return hits; |
599 } | 613 } |
600 | 614 |
601 /** | 615 /** |
602 * @see Matcher#matchesAny | 616 * @see Matcher#matchesAny |
603 * @inheritdoc | 617 * @inheritdoc |
604 */ | 618 */ |
605 matchesAny(location, typeMask, docDomain, thirdParty, sitekey, specificOnly) | 619 matchesAny(location, typeMask, docDomain, sitekey, specificOnly) |
606 { | 620 { |
607 let key = location + " " + typeMask + " " + docDomain + " " + thirdParty + | 621 let key = location + " " + typeMask + " " + docDomain + " " + sitekey + |
608 " " + sitekey + " " + specificOnly; | 622 " " + specificOnly; |
609 | 623 |
610 let result = this._resultCache.get(key); | 624 let result = this._resultCache.get(key); |
611 if (typeof result != "undefined") | 625 if (typeof result != "undefined") |
612 return result; | 626 return result; |
613 | 627 |
614 result = this._matchesAnyInternal(location, typeMask, docDomain, | 628 result = this._matchesAnyInternal(location, typeMask, docDomain, |
615 thirdParty, sitekey, specificOnly); | 629 sitekey, specificOnly); |
616 | 630 |
617 if (this._resultCache.size >= this.maxCacheEntries) | 631 if (this._resultCache.size >= this.maxCacheEntries) |
618 this._resultCache.clear(); | 632 this._resultCache.clear(); |
619 | 633 |
620 this._resultCache.set(key, result); | 634 this._resultCache.set(key, result); |
621 | 635 |
622 return result; | 636 return result; |
623 } | 637 } |
624 | 638 |
625 /** | 639 /** |
626 * @typedef {object} MatcherSearchResults | 640 * @typedef {object} MatcherSearchResults |
627 * @property {Array.<BlockingFilter>} [blocking] List of blocking filters | 641 * @property {Array.<BlockingFilter>} [blocking] List of blocking filters |
628 * found. | 642 * found. |
629 * @property {Array.<WhitelistFilter>} [whitelist] List of whitelist filters | 643 * @property {Array.<WhitelistFilter>} [whitelist] List of whitelist filters |
630 * found. | 644 * found. |
631 */ | 645 */ |
632 | 646 |
633 /** | 647 /** |
634 * Searches all blocking and whitelist filters and returns results matching | 648 * Searches all blocking and whitelist filters and returns results matching |
635 * the given parameters. | 649 * the given parameters. |
636 * | 650 * |
637 * @param {string} location | 651 * @param {URL|string} location |
638 * @param {number} typeMask | 652 * @param {number} typeMask |
639 * @param {string} [docDomain] | 653 * @param {string} [docDomain] |
640 * @param {boolean} [thirdParty] | |
641 * @param {string} [sitekey] | 654 * @param {string} [sitekey] |
642 * @param {boolean} [specificOnly] | 655 * @param {boolean} [specificOnly] |
643 * @param {string} [filterType] The types of filters to look for. This can be | 656 * @param {string} [filterType] The types of filters to look for. This can be |
644 * <code>"blocking"</code>, <code>"whitelist"</code>, or | 657 * <code>"blocking"</code>, <code>"whitelist"</code>, or |
645 * <code>"all"</code> (default). | 658 * <code>"all"</code> (default). |
646 * | 659 * |
647 * @returns {MatcherSearchResults} | 660 * @returns {MatcherSearchResults} |
648 */ | 661 */ |
649 search(location, typeMask, docDomain, thirdParty, sitekey, specificOnly, | 662 search(location, typeMask, docDomain, sitekey, specificOnly, |
650 filterType = "all") | 663 filterType = "all") |
651 { | 664 { |
652 let key = "* " + location + " " + typeMask + " " + docDomain + " " + | 665 let key = "* " + location + " " + typeMask + " " + docDomain + " " + |
653 thirdParty + " " + sitekey + " " + specificOnly + " " + | 666 sitekey + " " + specificOnly + " " + filterType; |
654 filterType; | |
655 | 667 |
656 let result = this._resultCache.get(key); | 668 let result = this._resultCache.get(key); |
657 if (typeof result != "undefined") | 669 if (typeof result != "undefined") |
658 return result; | 670 return result; |
659 | 671 |
660 result = this._searchInternal(location, typeMask, docDomain, thirdParty, | 672 result = this._searchInternal(location, typeMask, docDomain, sitekey, |
661 sitekey, specificOnly, filterType); | 673 specificOnly, filterType); |
662 | 674 |
663 if (this._resultCache.size >= this.maxCacheEntries) | 675 if (this._resultCache.size >= this.maxCacheEntries) |
664 this._resultCache.clear(); | 676 this._resultCache.clear(); |
665 | 677 |
666 this._resultCache.set(key, result); | 678 this._resultCache.set(key, result); |
667 | 679 |
668 return result; | 680 return result; |
669 } | 681 } |
670 | 682 |
671 /** | 683 /** |
672 * Tests whether the URL is whitelisted | 684 * Tests whether the URL is whitelisted |
673 * @see Matcher#matchesAny | 685 * @see Matcher#matchesAny |
674 * @inheritdoc | 686 * @inheritdoc |
675 * @returns {boolean} | 687 * @returns {boolean} |
676 */ | 688 */ |
677 isWhitelisted(location, typeMask, docDomain, thirdParty, sitekey, | 689 isWhitelisted(location, typeMask, docDomain, sitekey, specificOnly) |
678 specificOnly) | |
679 { | 690 { |
680 return !!this._whitelist.matchesAny(location, typeMask, docDomain, | 691 return !!this._whitelist.matchesAny(location, typeMask, docDomain, sitekey, |
681 thirdParty, sitekey, specificOnly); | 692 specificOnly); |
682 } | 693 } |
683 } | 694 } |
684 | 695 |
685 exports.CombinedMatcher = CombinedMatcher; | 696 exports.CombinedMatcher = CombinedMatcher; |
686 | 697 |
687 /** | 698 /** |
688 * Shared {@link CombinedMatcher} instance that should usually be used. | 699 * Shared {@link CombinedMatcher} instance that should usually be used. |
689 * @type {CombinedMatcher} | 700 * @type {CombinedMatcher} |
690 */ | 701 */ |
691 let defaultMatcher = new CombinedMatcher(); | 702 let defaultMatcher = new CombinedMatcher(); |
692 | 703 |
693 exports.defaultMatcher = defaultMatcher; | 704 exports.defaultMatcher = defaultMatcher; |
OLD | NEW |