| Left: | ||
| Right: |
| OLD | NEW |
|---|---|
| 1 /* | 1 /* |
| 2 * This file is part of Adblock Plus <https://adblockplus.org/>, | 2 * This file is part of Adblock Plus <https://adblockplus.org/>, |
| 3 * Copyright (C) 2006-present eyeo GmbH | 3 * Copyright (C) 2006-present eyeo GmbH |
| 4 * | 4 * |
| 5 * Adblock Plus is free software: you can redistribute it and/or modify | 5 * Adblock Plus is free software: you can redistribute it and/or modify |
| 6 * it under the terms of the GNU General Public License version 3 as | 6 * it under the terms of the GNU General Public License version 3 as |
| 7 * published by the Free Software Foundation. | 7 * published by the Free Software Foundation. |
| 8 * | 8 * |
| 9 * Adblock Plus is distributed in the hope that it will be useful, | 9 * Adblock Plus is distributed in the hope that it will be useful, |
| 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of | 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 12 * GNU General Public License for more details. | 12 * GNU General Public License for more details. |
| 13 * | 13 * |
| 14 * You should have received a copy of the GNU General Public License | 14 * You should have received a copy of the GNU General Public License |
| 15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. | 15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. |
| 16 */ | 16 */ |
| 17 | 17 |
| 18 "use strict"; | 18 "use strict"; |
| 19 | 19 |
| 20 /** | 20 /** |
| 21 * @fileOverview Matcher class implementing matching addresses against | 21 * @fileOverview Matcher class implementing matching addresses against |
| 22 * a list of filters. | 22 * a list of filters. |
| 23 */ | 23 */ |
| 24 | 24 |
| 25 const {RegExpFilter, WhitelistFilter} = require("./filterClasses"); | 25 const {RegExpFilter, WhitelistFilter} = require("./filterClasses"); |
| 26 const {isThirdParty} = require("./domain"); | |
| 26 | 27 |
| 27 /** | 28 /** |
| 28 * Regular expression for matching a keyword in a filter. | 29 * Regular expression for matching a keyword in a filter. |
| 29 * @type {RegExp} | 30 * @type {RegExp} |
| 30 */ | 31 */ |
| 31 const keywordRegExp = /[^a-z0-9%*][a-z0-9%]{3,}(?=[^a-z0-9%*])/; | 32 const keywordRegExp = /[^a-z0-9%*][a-z0-9%]{3,}(?=[^a-z0-9%*])/; |
| 32 | 33 |
| 33 /** | 34 /** |
| 34 * Regular expression for matching all keywords in a filter. | 35 * Regular expression for matching all keywords in a filter. |
| 35 * @type {RegExp} | 36 * @type {RegExp} |
| (...skipping 333 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 369 collection.push(filter); | 370 collection.push(filter); |
| 370 } | 371 } |
| 371 } | 372 } |
| 372 } | 373 } |
| 373 | 374 |
| 374 return null; | 375 return null; |
| 375 } | 376 } |
| 376 | 377 |
| 377 /** | 378 /** |
| 378 * Tests whether the URL matches any of the known filters | 379 * Tests whether the URL matches any of the known filters |
| 379 * @param {string} location | 380 * @param {URL|string} location |
| 380 * URL to be tested | 381 * URL to be tested |
| 381 * @param {number} typeMask | 382 * @param {number} typeMask |
| 382 * bitmask of content / request types to match | 383 * bitmask of content / request types to match |
| 383 * @param {string} [docDomain] | 384 * @param {string} [docDomain] |
| 384 * domain name of the document that loads the URL | 385 * domain name of the document that loads the URL |
| 385 * @param {boolean} [thirdParty] | |
| 386 * should be true if the URL is a third-party request | |
| 387 * @param {string} [sitekey] | 386 * @param {string} [sitekey] |
| 388 * public key provided by the document | 387 * public key provided by the document |
| 389 * @param {boolean} [specificOnly] | 388 * @param {boolean} [specificOnly] |
| 390 * should be <code>true</code> if generic matches should be ignored | 389 * should be <code>true</code> if generic matches should be ignored |
| 391 * @returns {?RegExpFilter} | 390 * @returns {?RegExpFilter} |
| 392 * matching filter or <code>null</code> | 391 * matching filter or <code>null</code> |
| 393 */ | 392 */ |
| 394 matchesAny(location, typeMask, docDomain, thirdParty, sitekey, specificOnly) | 393 matchesAny(location, typeMask, docDomain, sitekey, specificOnly) |
| 395 { | 394 { |
| 395 let thirdParty = docDomain && isThirdParty(location, docDomain); | |
|
Sebastian Noack
2019/02/05 04:32:53
As discussed on IRC, how about only calling isThir
Manish Jethani
2019/02/05 05:07:28
I tried this but it actually seemed to be more exp
Sebastian Noack
2019/02/05 05:21:16
Sure, if you just call isThridParty() as we perfor
Manish Jethani
2019/02/05 05:42:23
Yes, I know what you mean. I put the isThirdParty
Sebastian Noack
2019/02/05 05:54:45
Fair enough, for not further optimizing this here.
| |
| 396 | |
| 397 if (typeof location != "string") | |
| 398 location = location + ""; | |
| 399 | |
| 396 let candidates = location.toLowerCase().match(/[a-z0-9%]{3,}/g); | 400 let candidates = location.toLowerCase().match(/[a-z0-9%]{3,}/g); |
| 397 if (candidates === null) | 401 if (candidates === null) |
| 398 candidates = []; | 402 candidates = []; |
| 399 candidates.push(""); | 403 candidates.push(""); |
| 404 | |
| 400 for (let i = 0, l = candidates.length; i < l; i++) | 405 for (let i = 0, l = candidates.length; i < l; i++) |
| 401 { | 406 { |
| 402 let result = this.checkEntryMatch(candidates[i], location, typeMask, | 407 let result = this.checkEntryMatch(candidates[i], location, typeMask, |
| 403 docDomain, thirdParty, sitekey, | 408 docDomain, thirdParty, sitekey, |
| 404 specificOnly); | 409 specificOnly); |
| 405 if (result) | 410 if (result) |
| 406 return result; | 411 return result; |
| 407 } | 412 } |
| 408 | 413 |
| 409 return null; | 414 return null; |
| (...skipping 90 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 500 } | 505 } |
| 501 | 506 |
| 502 /** | 507 /** |
| 503 * Optimized filter matching testing both whitelist and blacklist matchers | 508 * Optimized filter matching testing both whitelist and blacklist matchers |
| 504 * simultaneously. For parameters see | 509 * simultaneously. For parameters see |
| 505 {@link Matcher#matchesAny Matcher.matchesAny()}. | 510 {@link Matcher#matchesAny Matcher.matchesAny()}. |
| 506 * @see Matcher#matchesAny | 511 * @see Matcher#matchesAny |
| 507 * @inheritdoc | 512 * @inheritdoc |
| 508 * @private | 513 * @private |
| 509 */ | 514 */ |
| 510 _matchesAnyInternal(location, typeMask, docDomain, thirdParty, sitekey, | 515 _matchesAnyInternal(location, typeMask, docDomain, sitekey, specificOnly) |
| 511 specificOnly) | |
| 512 { | 516 { |
| 517 let thirdParty = docDomain && isThirdParty(location, docDomain); | |
| 518 | |
| 519 if (typeof location != "string") | |
| 520 location = location + ""; | |
| 521 | |
| 513 let candidates = location.toLowerCase().match(/[a-z0-9%]{3,}/g); | 522 let candidates = location.toLowerCase().match(/[a-z0-9%]{3,}/g); |
| 514 if (candidates === null) | 523 if (candidates === null) |
| 515 candidates = []; | 524 candidates = []; |
| 516 | 525 |
| 517 // The first keyword in a URL is the protocol (usually "https" or "http"). | 526 // The first keyword in a URL is the protocol (usually "https" or "http"). |
| 518 // This is an outlier: it has hundreds of filters typically, yet it rarely | 527 // This is an outlier: it has hundreds of filters typically, yet it rarely |
| 519 // ever has a match. We cut down the amount of processing for blocked URLs | 528 // ever has a match. We cut down the amount of processing for blocked URLs |
| 520 // significantly by moving it to the end of the list. | 529 // significantly by moving it to the end of the list. |
| 521 if (candidates.length > 1) | 530 if (candidates.length > 1) |
| 522 candidates.push(candidates.shift()); | 531 candidates.push(candidates.shift()); |
| (...skipping 24 matching lines...) Expand all Loading... | |
| 547 { | 556 { |
| 548 whitelistHit = this._whitelist.checkEntryMatch(candidates[i], location, | 557 whitelistHit = this._whitelist.checkEntryMatch(candidates[i], location, |
| 549 typeMask, docDomain, | 558 typeMask, docDomain, |
| 550 thirdParty, sitekey); | 559 thirdParty, sitekey); |
| 551 } | 560 } |
| 552 } | 561 } |
| 553 | 562 |
| 554 return whitelistHit || blacklistHit; | 563 return whitelistHit || blacklistHit; |
| 555 } | 564 } |
| 556 | 565 |
| 557 _searchInternal(location, typeMask, docDomain, thirdParty, sitekey, | 566 _searchInternal(location, typeMask, docDomain, sitekey, specificOnly, |
| 558 specificOnly, filterType) | 567 filterType) |
| 559 { | 568 { |
| 560 let hits = {}; | 569 let hits = {}; |
| 561 | 570 |
| 562 let searchBlocking = filterType == "blocking" || filterType == "all"; | 571 let searchBlocking = filterType == "blocking" || filterType == "all"; |
| 563 let searchWhitelist = filterType == "whitelist" || filterType == "all"; | 572 let searchWhitelist = filterType == "whitelist" || filterType == "all"; |
| 564 | 573 |
| 565 if (searchBlocking) | 574 if (searchBlocking) |
| 566 hits.blocking = []; | 575 hits.blocking = []; |
| 567 | 576 |
| 568 if (searchWhitelist) | 577 if (searchWhitelist) |
| 569 hits.whitelist = []; | 578 hits.whitelist = []; |
| 570 | 579 |
| 571 // If the type mask includes no types other than whitelist-only types, we | 580 // If the type mask includes no types other than whitelist-only types, we |
| 572 // can skip the blacklist. | 581 // can skip the blacklist. |
| 573 if ((typeMask & ~WHITELIST_ONLY_TYPES) == 0) | 582 if ((typeMask & ~WHITELIST_ONLY_TYPES) == 0) |
| 574 searchBlocking = false; | 583 searchBlocking = false; |
| 575 | 584 |
| 585 let thirdParty = docDomain && isThirdParty(location, docDomain); | |
| 586 | |
| 587 if (typeof location != "string") | |
| 588 location = location + ""; | |
| 589 | |
| 576 let candidates = location.toLowerCase().match(/[a-z0-9%]{3,}/g); | 590 let candidates = location.toLowerCase().match(/[a-z0-9%]{3,}/g); |
| 577 if (candidates === null) | 591 if (candidates === null) |
| 578 candidates = []; | 592 candidates = []; |
| 579 candidates.push(""); | 593 candidates.push(""); |
| 580 | 594 |
| 581 for (let i = 0, l = candidates.length; i < l; i++) | 595 for (let i = 0, l = candidates.length; i < l; i++) |
| 582 { | 596 { |
| 583 if (searchBlocking) | 597 if (searchBlocking) |
| 584 { | 598 { |
| 585 this._blacklist.checkEntryMatch(candidates[i], location, typeMask, | 599 this._blacklist.checkEntryMatch(candidates[i], location, typeMask, |
| 586 docDomain, thirdParty, sitekey, | 600 docDomain, thirdParty, sitekey, |
| 587 specificOnly, hits.blocking); | 601 specificOnly, hits.blocking); |
| 588 } | 602 } |
| 589 | 603 |
| 590 if (searchWhitelist) | 604 if (searchWhitelist) |
| 591 { | 605 { |
| 592 this._whitelist.checkEntryMatch(candidates[i], location, typeMask, | 606 this._whitelist.checkEntryMatch(candidates[i], location, typeMask, |
| 593 docDomain, thirdParty, sitekey, | 607 docDomain, thirdParty, sitekey, |
| 594 false, hits.whitelist); | 608 false, hits.whitelist); |
| 595 } | 609 } |
| 596 } | 610 } |
| 597 | 611 |
| 598 return hits; | 612 return hits; |
| 599 } | 613 } |
| 600 | 614 |
| 601 /** | 615 /** |
| 602 * @see Matcher#matchesAny | 616 * @see Matcher#matchesAny |
| 603 * @inheritdoc | 617 * @inheritdoc |
| 604 */ | 618 */ |
| 605 matchesAny(location, typeMask, docDomain, thirdParty, sitekey, specificOnly) | 619 matchesAny(location, typeMask, docDomain, sitekey, specificOnly) |
| 606 { | 620 { |
| 607 let key = location + " " + typeMask + " " + docDomain + " " + thirdParty + | 621 let key = location + " " + typeMask + " " + docDomain + " " + sitekey + |
| 608 " " + sitekey + " " + specificOnly; | 622 " " + specificOnly; |
| 609 | 623 |
| 610 let result = this._resultCache.get(key); | 624 let result = this._resultCache.get(key); |
| 611 if (typeof result != "undefined") | 625 if (typeof result != "undefined") |
| 612 return result; | 626 return result; |
| 613 | 627 |
| 614 result = this._matchesAnyInternal(location, typeMask, docDomain, | 628 result = this._matchesAnyInternal(location, typeMask, docDomain, |
| 615 thirdParty, sitekey, specificOnly); | 629 sitekey, specificOnly); |
| 616 | 630 |
| 617 if (this._resultCache.size >= this.maxCacheEntries) | 631 if (this._resultCache.size >= this.maxCacheEntries) |
| 618 this._resultCache.clear(); | 632 this._resultCache.clear(); |
| 619 | 633 |
| 620 this._resultCache.set(key, result); | 634 this._resultCache.set(key, result); |
| 621 | 635 |
| 622 return result; | 636 return result; |
| 623 } | 637 } |
| 624 | 638 |
| 625 /** | 639 /** |
| 626 * @typedef {object} MatcherSearchResults | 640 * @typedef {object} MatcherSearchResults |
| 627 * @property {Array.<BlockingFilter>} [blocking] List of blocking filters | 641 * @property {Array.<BlockingFilter>} [blocking] List of blocking filters |
| 628 * found. | 642 * found. |
| 629 * @property {Array.<WhitelistFilter>} [whitelist] List of whitelist filters | 643 * @property {Array.<WhitelistFilter>} [whitelist] List of whitelist filters |
| 630 * found. | 644 * found. |
| 631 */ | 645 */ |
| 632 | 646 |
| 633 /** | 647 /** |
| 634 * Searches all blocking and whitelist filters and returns results matching | 648 * Searches all blocking and whitelist filters and returns results matching |
| 635 * the given parameters. | 649 * the given parameters. |
| 636 * | 650 * |
| 637 * @param {string} location | 651 * @param {URL|string} location |
| 638 * @param {number} typeMask | 652 * @param {number} typeMask |
| 639 * @param {string} [docDomain] | 653 * @param {string} [docDomain] |
| 640 * @param {boolean} [thirdParty] | |
| 641 * @param {string} [sitekey] | 654 * @param {string} [sitekey] |
| 642 * @param {boolean} [specificOnly] | 655 * @param {boolean} [specificOnly] |
| 643 * @param {string} [filterType] The types of filters to look for. This can be | 656 * @param {string} [filterType] The types of filters to look for. This can be |
| 644 * <code>"blocking"</code>, <code>"whitelist"</code>, or | 657 * <code>"blocking"</code>, <code>"whitelist"</code>, or |
| 645 * <code>"all"</code> (default). | 658 * <code>"all"</code> (default). |
| 646 * | 659 * |
| 647 * @returns {MatcherSearchResults} | 660 * @returns {MatcherSearchResults} |
| 648 */ | 661 */ |
| 649 search(location, typeMask, docDomain, thirdParty, sitekey, specificOnly, | 662 search(location, typeMask, docDomain, sitekey, specificOnly, |
| 650 filterType = "all") | 663 filterType = "all") |
| 651 { | 664 { |
| 652 let key = "* " + location + " " + typeMask + " " + docDomain + " " + | 665 let key = "* " + location + " " + typeMask + " " + docDomain + " " + |
| 653 thirdParty + " " + sitekey + " " + specificOnly + " " + | 666 sitekey + " " + specificOnly + " " + filterType; |
| 654 filterType; | |
| 655 | 667 |
| 656 let result = this._resultCache.get(key); | 668 let result = this._resultCache.get(key); |
| 657 if (typeof result != "undefined") | 669 if (typeof result != "undefined") |
| 658 return result; | 670 return result; |
| 659 | 671 |
| 660 result = this._searchInternal(location, typeMask, docDomain, thirdParty, | 672 result = this._searchInternal(location, typeMask, docDomain, sitekey, |
| 661 sitekey, specificOnly, filterType); | 673 specificOnly, filterType); |
| 662 | 674 |
| 663 if (this._resultCache.size >= this.maxCacheEntries) | 675 if (this._resultCache.size >= this.maxCacheEntries) |
| 664 this._resultCache.clear(); | 676 this._resultCache.clear(); |
| 665 | 677 |
| 666 this._resultCache.set(key, result); | 678 this._resultCache.set(key, result); |
| 667 | 679 |
| 668 return result; | 680 return result; |
| 669 } | 681 } |
| 670 | 682 |
| 671 /** | 683 /** |
| 672 * Tests whether the URL is whitelisted | 684 * Tests whether the URL is whitelisted |
| 673 * @see Matcher#matchesAny | 685 * @see Matcher#matchesAny |
| 674 * @inheritdoc | 686 * @inheritdoc |
| 675 * @returns {boolean} | 687 * @returns {boolean} |
| 676 */ | 688 */ |
| 677 isWhitelisted(location, typeMask, docDomain, thirdParty, sitekey, | 689 isWhitelisted(location, typeMask, docDomain, sitekey, specificOnly) |
| 678 specificOnly) | |
| 679 { | 690 { |
| 680 return !!this._whitelist.matchesAny(location, typeMask, docDomain, | 691 return !!this._whitelist.matchesAny(location, typeMask, docDomain, sitekey, |
| 681 thirdParty, sitekey, specificOnly); | 692 specificOnly); |
| 682 } | 693 } |
| 683 } | 694 } |
| 684 | 695 |
| 685 exports.CombinedMatcher = CombinedMatcher; | 696 exports.CombinedMatcher = CombinedMatcher; |
| 686 | 697 |
| 687 /** | 698 /** |
| 688 * Shared {@link CombinedMatcher} instance that should usually be used. | 699 * Shared {@link CombinedMatcher} instance that should usually be used. |
| 689 * @type {CombinedMatcher} | 700 * @type {CombinedMatcher} |
| 690 */ | 701 */ |
| 691 let defaultMatcher = new CombinedMatcher(); | 702 let defaultMatcher = new CombinedMatcher(); |
| 692 | 703 |
| 693 exports.defaultMatcher = defaultMatcher; | 704 exports.defaultMatcher = defaultMatcher; |
| OLD | NEW |