Index: lib/matcher.js |
=================================================================== |
--- a/lib/matcher.js |
+++ b/lib/matcher.js |
@@ -18,16 +18,17 @@ |
"use strict"; |
/** |
* @fileOverview Matcher class implementing matching addresses against |
* a list of filters. |
*/ |
const {RegExpFilter, WhitelistFilter} = require("./filterClasses"); |
+const {isThirdParty} = require("./domain"); |
/** |
* Regular expression for matching a keyword in a filter. |
* @type {RegExp} |
*/ |
const keywordRegExp = /[^a-z0-9%*][a-z0-9%]{3,}(?=[^a-z0-9%*])/; |
/** |
@@ -371,37 +372,41 @@ |
} |
} |
return null; |
} |
/** |
* Tests whether the URL matches any of the known filters |
- * @param {string} location |
+ * @param {URL|string} location |
* URL to be tested |
* @param {number} typeMask |
* bitmask of content / request types to match |
* @param {string} [docDomain] |
* domain name of the document that loads the URL |
- * @param {boolean} [thirdParty] |
- * should be true if the URL is a third-party request |
* @param {string} [sitekey] |
* public key provided by the document |
* @param {boolean} [specificOnly] |
* should be <code>true</code> if generic matches should be ignored |
* @returns {?RegExpFilter} |
* matching filter or <code>null</code> |
*/ |
- matchesAny(location, typeMask, docDomain, thirdParty, sitekey, specificOnly) |
+ matchesAny(location, typeMask, docDomain, sitekey, specificOnly) |
{ |
+ let thirdParty = docDomain && isThirdParty(location, docDomain); |
Sebastian Noack
2019/02/05 04:32:53
As discussed on IRC, how about only calling isThir
Manish Jethani
2019/02/05 05:07:28
I tried this but it actually seemed to be more exp
Sebastian Noack
2019/02/05 05:21:16
Sure, if you just call isThridParty() as we perfor
Manish Jethani
2019/02/05 05:42:23
Yes, I know what you mean. I put the isThirdParty
Sebastian Noack
2019/02/05 05:54:45
Fair enough, for not further optimizing this here.
|
+ |
+ if (typeof location != "string") |
+ location = location + ""; |
+ |
let candidates = location.toLowerCase().match(/[a-z0-9%]{3,}/g); |
if (candidates === null) |
candidates = []; |
candidates.push(""); |
+ |
for (let i = 0, l = candidates.length; i < l; i++) |
{ |
let result = this.checkEntryMatch(candidates[i], location, typeMask, |
docDomain, thirdParty, sitekey, |
specificOnly); |
if (result) |
return result; |
} |
@@ -502,19 +507,23 @@ |
/** |
* Optimized filter matching testing both whitelist and blacklist matchers |
* simultaneously. For parameters see |
{@link Matcher#matchesAny Matcher.matchesAny()}. |
* @see Matcher#matchesAny |
* @inheritdoc |
* @private |
*/ |
- _matchesAnyInternal(location, typeMask, docDomain, thirdParty, sitekey, |
- specificOnly) |
+ _matchesAnyInternal(location, typeMask, docDomain, sitekey, specificOnly) |
{ |
+ let thirdParty = docDomain && isThirdParty(location, docDomain); |
+ |
+ if (typeof location != "string") |
+ location = location + ""; |
+ |
let candidates = location.toLowerCase().match(/[a-z0-9%]{3,}/g); |
if (candidates === null) |
candidates = []; |
// The first keyword in a URL is the protocol (usually "https" or "http"). |
// This is an outlier: it has hundreds of filters typically, yet it rarely |
// ever has a match. We cut down the amount of processing for blocked URLs |
// significantly by moving it to the end of the list. |
@@ -549,18 +558,18 @@ |
typeMask, docDomain, |
thirdParty, sitekey); |
} |
} |
return whitelistHit || blacklistHit; |
} |
- _searchInternal(location, typeMask, docDomain, thirdParty, sitekey, |
- specificOnly, filterType) |
+ _searchInternal(location, typeMask, docDomain, sitekey, specificOnly, |
+ filterType) |
{ |
let hits = {}; |
let searchBlocking = filterType == "blocking" || filterType == "all"; |
let searchWhitelist = filterType == "whitelist" || filterType == "all"; |
if (searchBlocking) |
hits.blocking = []; |
@@ -568,16 +577,21 @@ |
if (searchWhitelist) |
hits.whitelist = []; |
// If the type mask includes no types other than whitelist-only types, we |
// can skip the blacklist. |
if ((typeMask & ~WHITELIST_ONLY_TYPES) == 0) |
searchBlocking = false; |
+ let thirdParty = docDomain && isThirdParty(location, docDomain); |
+ |
+ if (typeof location != "string") |
+ location = location + ""; |
+ |
let candidates = location.toLowerCase().match(/[a-z0-9%]{3,}/g); |
if (candidates === null) |
candidates = []; |
candidates.push(""); |
for (let i = 0, l = candidates.length; i < l; i++) |
{ |
if (searchBlocking) |
@@ -597,27 +611,27 @@ |
return hits; |
} |
/** |
* @see Matcher#matchesAny |
* @inheritdoc |
*/ |
- matchesAny(location, typeMask, docDomain, thirdParty, sitekey, specificOnly) |
+ matchesAny(location, typeMask, docDomain, sitekey, specificOnly) |
{ |
- let key = location + " " + typeMask + " " + docDomain + " " + thirdParty + |
- " " + sitekey + " " + specificOnly; |
+ let key = location + " " + typeMask + " " + docDomain + " " + sitekey + |
+ " " + specificOnly; |
let result = this._resultCache.get(key); |
if (typeof result != "undefined") |
return result; |
result = this._matchesAnyInternal(location, typeMask, docDomain, |
- thirdParty, sitekey, specificOnly); |
+ sitekey, specificOnly); |
if (this._resultCache.size >= this.maxCacheEntries) |
this._resultCache.clear(); |
this._resultCache.set(key, result); |
return result; |
} |
@@ -629,61 +643,58 @@ |
* @property {Array.<WhitelistFilter>} [whitelist] List of whitelist filters |
* found. |
*/ |
/** |
* Searches all blocking and whitelist filters and returns results matching |
* the given parameters. |
* |
- * @param {string} location |
+ * @param {URL|string} location |
* @param {number} typeMask |
* @param {string} [docDomain] |
- * @param {boolean} [thirdParty] |
* @param {string} [sitekey] |
* @param {boolean} [specificOnly] |
* @param {string} [filterType] The types of filters to look for. This can be |
* <code>"blocking"</code>, <code>"whitelist"</code>, or |
* <code>"all"</code> (default). |
* |
* @returns {MatcherSearchResults} |
*/ |
- search(location, typeMask, docDomain, thirdParty, sitekey, specificOnly, |
+ search(location, typeMask, docDomain, sitekey, specificOnly, |
filterType = "all") |
{ |
let key = "* " + location + " " + typeMask + " " + docDomain + " " + |
- thirdParty + " " + sitekey + " " + specificOnly + " " + |
- filterType; |
+ sitekey + " " + specificOnly + " " + filterType; |
let result = this._resultCache.get(key); |
if (typeof result != "undefined") |
return result; |
- result = this._searchInternal(location, typeMask, docDomain, thirdParty, |
- sitekey, specificOnly, filterType); |
+ result = this._searchInternal(location, typeMask, docDomain, sitekey, |
+ specificOnly, filterType); |
if (this._resultCache.size >= this.maxCacheEntries) |
this._resultCache.clear(); |
this._resultCache.set(key, result); |
return result; |
} |
/** |
* Tests whether the URL is whitelisted |
* @see Matcher#matchesAny |
* @inheritdoc |
* @returns {boolean} |
*/ |
- isWhitelisted(location, typeMask, docDomain, thirdParty, sitekey, |
- specificOnly) |
+ isWhitelisted(location, typeMask, docDomain, sitekey, specificOnly) |
{ |
- return !!this._whitelist.matchesAny(location, typeMask, docDomain, |
- thirdParty, sitekey, specificOnly); |
+ return !!this._whitelist.matchesAny(location, typeMask, docDomain, sitekey, |
+ specificOnly); |
} |
} |
exports.CombinedMatcher = CombinedMatcher; |
/** |
* Shared {@link CombinedMatcher} instance that should usually be used. |
* @type {CombinedMatcher} |