| Index: lib/matcher.js |
| =================================================================== |
| --- a/lib/matcher.js |
| +++ b/lib/matcher.js |
| @@ -18,16 +18,17 @@ |
| "use strict"; |
| /** |
| * @fileOverview Matcher class implementing matching addresses against |
| * a list of filters. |
| */ |
| const {RegExpFilter, WhitelistFilter} = require("./filterClasses"); |
| +const {isThirdParty} = require("./domain"); |
| /** |
| * Regular expression for matching a keyword in a filter. |
| * @type {RegExp} |
| */ |
| const keywordRegExp = /[^a-z0-9%*][a-z0-9%]{3,}(?=[^a-z0-9%*])/; |
| /** |
| @@ -371,37 +372,41 @@ |
| } |
| } |
| return null; |
| } |
| /** |
| * Tests whether the URL matches any of the known filters |
| - * @param {string} location |
| + * @param {URL|string} location |
| * URL to be tested |
| * @param {number} typeMask |
| * bitmask of content / request types to match |
| * @param {string} [docDomain] |
| * domain name of the document that loads the URL |
| - * @param {boolean} [thirdParty] |
| - * should be true if the URL is a third-party request |
| * @param {string} [sitekey] |
| * public key provided by the document |
| * @param {boolean} [specificOnly] |
| * should be <code>true</code> if generic matches should be ignored |
| * @returns {?RegExpFilter} |
| * matching filter or <code>null</code> |
| */ |
| - matchesAny(location, typeMask, docDomain, thirdParty, sitekey, specificOnly) |
| + matchesAny(location, typeMask, docDomain, sitekey, specificOnly) |
| { |
| + let thirdParty = docDomain && isThirdParty(location, docDomain); |
|
Sebastian Noack
2019/02/05 04:32:53
As discussed on IRC, how about only calling isThir
Manish Jethani
2019/02/05 05:07:28
I tried this but it actually seemed to be more exp
Sebastian Noack
2019/02/05 05:21:16
Sure, if you just call isThridParty() as we perfor
Manish Jethani
2019/02/05 05:42:23
Yes, I know what you mean. I put the isThirdParty
Sebastian Noack
2019/02/05 05:54:45
Fair enough, for not further optimizing this here.
|
| + |
| + if (typeof location != "string") |
| + location = location + ""; |
| + |
| let candidates = location.toLowerCase().match(/[a-z0-9%]{3,}/g); |
| if (candidates === null) |
| candidates = []; |
| candidates.push(""); |
| + |
| for (let i = 0, l = candidates.length; i < l; i++) |
| { |
| let result = this.checkEntryMatch(candidates[i], location, typeMask, |
| docDomain, thirdParty, sitekey, |
| specificOnly); |
| if (result) |
| return result; |
| } |
| @@ -502,19 +507,23 @@ |
| /** |
| * Optimized filter matching testing both whitelist and blacklist matchers |
| * simultaneously. For parameters see |
| {@link Matcher#matchesAny Matcher.matchesAny()}. |
| * @see Matcher#matchesAny |
| * @inheritdoc |
| * @private |
| */ |
| - _matchesAnyInternal(location, typeMask, docDomain, thirdParty, sitekey, |
| - specificOnly) |
| + _matchesAnyInternal(location, typeMask, docDomain, sitekey, specificOnly) |
| { |
| + let thirdParty = docDomain && isThirdParty(location, docDomain); |
| + |
| + if (typeof location != "string") |
| + location = location + ""; |
| + |
| let candidates = location.toLowerCase().match(/[a-z0-9%]{3,}/g); |
| if (candidates === null) |
| candidates = []; |
| // The first keyword in a URL is the protocol (usually "https" or "http"). |
| // This is an outlier: it has hundreds of filters typically, yet it rarely |
| // ever has a match. We cut down the amount of processing for blocked URLs |
| // significantly by moving it to the end of the list. |
| @@ -549,18 +558,18 @@ |
| typeMask, docDomain, |
| thirdParty, sitekey); |
| } |
| } |
| return whitelistHit || blacklistHit; |
| } |
| - _searchInternal(location, typeMask, docDomain, thirdParty, sitekey, |
| - specificOnly, filterType) |
| + _searchInternal(location, typeMask, docDomain, sitekey, specificOnly, |
| + filterType) |
| { |
| let hits = {}; |
| let searchBlocking = filterType == "blocking" || filterType == "all"; |
| let searchWhitelist = filterType == "whitelist" || filterType == "all"; |
| if (searchBlocking) |
| hits.blocking = []; |
| @@ -568,16 +577,21 @@ |
| if (searchWhitelist) |
| hits.whitelist = []; |
| // If the type mask includes no types other than whitelist-only types, we |
| // can skip the blacklist. |
| if ((typeMask & ~WHITELIST_ONLY_TYPES) == 0) |
| searchBlocking = false; |
| + let thirdParty = docDomain && isThirdParty(location, docDomain); |
| + |
| + if (typeof location != "string") |
| + location = location + ""; |
| + |
| let candidates = location.toLowerCase().match(/[a-z0-9%]{3,}/g); |
| if (candidates === null) |
| candidates = []; |
| candidates.push(""); |
| for (let i = 0, l = candidates.length; i < l; i++) |
| { |
| if (searchBlocking) |
| @@ -597,27 +611,27 @@ |
| return hits; |
| } |
| /** |
| * @see Matcher#matchesAny |
| * @inheritdoc |
| */ |
| - matchesAny(location, typeMask, docDomain, thirdParty, sitekey, specificOnly) |
| + matchesAny(location, typeMask, docDomain, sitekey, specificOnly) |
| { |
| - let key = location + " " + typeMask + " " + docDomain + " " + thirdParty + |
| - " " + sitekey + " " + specificOnly; |
| + let key = location + " " + typeMask + " " + docDomain + " " + sitekey + |
| + " " + specificOnly; |
| let result = this._resultCache.get(key); |
| if (typeof result != "undefined") |
| return result; |
| result = this._matchesAnyInternal(location, typeMask, docDomain, |
| - thirdParty, sitekey, specificOnly); |
| + sitekey, specificOnly); |
| if (this._resultCache.size >= this.maxCacheEntries) |
| this._resultCache.clear(); |
| this._resultCache.set(key, result); |
| return result; |
| } |
| @@ -629,61 +643,58 @@ |
| * @property {Array.<WhitelistFilter>} [whitelist] List of whitelist filters |
| * found. |
| */ |
| /** |
| * Searches all blocking and whitelist filters and returns results matching |
| * the given parameters. |
| * |
| - * @param {string} location |
| + * @param {URL|string} location |
| * @param {number} typeMask |
| * @param {string} [docDomain] |
| - * @param {boolean} [thirdParty] |
| * @param {string} [sitekey] |
| * @param {boolean} [specificOnly] |
| * @param {string} [filterType] The types of filters to look for. This can be |
| * <code>"blocking"</code>, <code>"whitelist"</code>, or |
| * <code>"all"</code> (default). |
| * |
| * @returns {MatcherSearchResults} |
| */ |
| - search(location, typeMask, docDomain, thirdParty, sitekey, specificOnly, |
| + search(location, typeMask, docDomain, sitekey, specificOnly, |
| filterType = "all") |
| { |
| let key = "* " + location + " " + typeMask + " " + docDomain + " " + |
| - thirdParty + " " + sitekey + " " + specificOnly + " " + |
| - filterType; |
| + sitekey + " " + specificOnly + " " + filterType; |
| let result = this._resultCache.get(key); |
| if (typeof result != "undefined") |
| return result; |
| - result = this._searchInternal(location, typeMask, docDomain, thirdParty, |
| - sitekey, specificOnly, filterType); |
| + result = this._searchInternal(location, typeMask, docDomain, sitekey, |
| + specificOnly, filterType); |
| if (this._resultCache.size >= this.maxCacheEntries) |
| this._resultCache.clear(); |
| this._resultCache.set(key, result); |
| return result; |
| } |
| /** |
| * Tests whether the URL is whitelisted |
| * @see Matcher#matchesAny |
| * @inheritdoc |
| * @returns {boolean} |
| */ |
| - isWhitelisted(location, typeMask, docDomain, thirdParty, sitekey, |
| - specificOnly) |
| + isWhitelisted(location, typeMask, docDomain, sitekey, specificOnly) |
| { |
| - return !!this._whitelist.matchesAny(location, typeMask, docDomain, |
| - thirdParty, sitekey, specificOnly); |
| + return !!this._whitelist.matchesAny(location, typeMask, docDomain, sitekey, |
| + specificOnly); |
| } |
| } |
| exports.CombinedMatcher = CombinedMatcher; |
| /** |
| * Shared {@link CombinedMatcher} instance that should usually be used. |
| * @type {CombinedMatcher} |