Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Unified Diff: lib/matcher.js

Issue 29998564: Issue 7260 - Internalize third-party request check in matcher (Closed) Base URL: https://hg.adblockplus.org/adblockpluscore/
Patch Set: Created Feb. 5, 2019, 4:04 a.m.
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View side-by-side diff with in-line comments
Download patch
« lib/domain.js ('K') | « lib/domain.js ('k') | test/matcher.js » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: lib/matcher.js
===================================================================
--- a/lib/matcher.js
+++ b/lib/matcher.js
@@ -18,16 +18,17 @@
"use strict";
/**
* @fileOverview Matcher class implementing matching addresses against
* a list of filters.
*/
const {RegExpFilter, WhitelistFilter} = require("./filterClasses");
+const {isThirdParty} = require("./domain");
/**
* Regular expression for matching a keyword in a filter.
* @type {RegExp}
*/
const keywordRegExp = /[^a-z0-9%*][a-z0-9%]{3,}(?=[^a-z0-9%*])/;
/**
@@ -371,37 +372,41 @@
}
}
return null;
}
/**
* Tests whether the URL matches any of the known filters
- * @param {string} location
+ * @param {URL|string} location
* URL to be tested
* @param {number} typeMask
* bitmask of content / request types to match
* @param {string} [docDomain]
* domain name of the document that loads the URL
- * @param {boolean} [thirdParty]
- * should be true if the URL is a third-party request
* @param {string} [sitekey]
* public key provided by the document
* @param {boolean} [specificOnly]
* should be <code>true</code> if generic matches should be ignored
* @returns {?RegExpFilter}
* matching filter or <code>null</code>
*/
- matchesAny(location, typeMask, docDomain, thirdParty, sitekey, specificOnly)
+ matchesAny(location, typeMask, docDomain, sitekey, specificOnly)
{
+ let thirdParty = docDomain && isThirdParty(location, docDomain);
Sebastian Noack 2019/02/05 04:32:53 As discussed on IRC, how about only calling isThir
Manish Jethani 2019/02/05 05:07:28 I tried this but it actually seemed to be more exp
Sebastian Noack 2019/02/05 05:21:16 Sure, if you just call isThridParty() as we perfor
Manish Jethani 2019/02/05 05:42:23 Yes, I know what you mean. I put the isThirdParty
Sebastian Noack 2019/02/05 05:54:45 Fair enough, for not further optimizing this here.
+
+ if (typeof location != "string")
+ location = location + "";
+
let candidates = location.toLowerCase().match(/[a-z0-9%]{3,}/g);
if (candidates === null)
candidates = [];
candidates.push("");
+
for (let i = 0, l = candidates.length; i < l; i++)
{
let result = this.checkEntryMatch(candidates[i], location, typeMask,
docDomain, thirdParty, sitekey,
specificOnly);
if (result)
return result;
}
@@ -502,19 +507,23 @@
/**
* Optimized filter matching testing both whitelist and blacklist matchers
* simultaneously. For parameters see
{@link Matcher#matchesAny Matcher.matchesAny()}.
* @see Matcher#matchesAny
* @inheritdoc
* @private
*/
- _matchesAnyInternal(location, typeMask, docDomain, thirdParty, sitekey,
- specificOnly)
+ _matchesAnyInternal(location, typeMask, docDomain, sitekey, specificOnly)
{
+ let thirdParty = docDomain && isThirdParty(location, docDomain);
+
+ if (typeof location != "string")
+ location = location + "";
+
let candidates = location.toLowerCase().match(/[a-z0-9%]{3,}/g);
if (candidates === null)
candidates = [];
// The first keyword in a URL is the protocol (usually "https" or "http").
// This is an outlier: it has hundreds of filters typically, yet it rarely
// ever has a match. We cut down the amount of processing for blocked URLs
// significantly by moving it to the end of the list.
@@ -549,18 +558,18 @@
typeMask, docDomain,
thirdParty, sitekey);
}
}
return whitelistHit || blacklistHit;
}
- _searchInternal(location, typeMask, docDomain, thirdParty, sitekey,
- specificOnly, filterType)
+ _searchInternal(location, typeMask, docDomain, sitekey, specificOnly,
+ filterType)
{
let hits = {};
let searchBlocking = filterType == "blocking" || filterType == "all";
let searchWhitelist = filterType == "whitelist" || filterType == "all";
if (searchBlocking)
hits.blocking = [];
@@ -568,16 +577,21 @@
if (searchWhitelist)
hits.whitelist = [];
// If the type mask includes no types other than whitelist-only types, we
// can skip the blacklist.
if ((typeMask & ~WHITELIST_ONLY_TYPES) == 0)
searchBlocking = false;
+ let thirdParty = docDomain && isThirdParty(location, docDomain);
+
+ if (typeof location != "string")
+ location = location + "";
+
let candidates = location.toLowerCase().match(/[a-z0-9%]{3,}/g);
if (candidates === null)
candidates = [];
candidates.push("");
for (let i = 0, l = candidates.length; i < l; i++)
{
if (searchBlocking)
@@ -597,27 +611,27 @@
return hits;
}
/**
* @see Matcher#matchesAny
* @inheritdoc
*/
- matchesAny(location, typeMask, docDomain, thirdParty, sitekey, specificOnly)
+ matchesAny(location, typeMask, docDomain, sitekey, specificOnly)
{
- let key = location + " " + typeMask + " " + docDomain + " " + thirdParty +
- " " + sitekey + " " + specificOnly;
+ let key = location + " " + typeMask + " " + docDomain + " " + sitekey +
+ " " + specificOnly;
let result = this._resultCache.get(key);
if (typeof result != "undefined")
return result;
result = this._matchesAnyInternal(location, typeMask, docDomain,
- thirdParty, sitekey, specificOnly);
+ sitekey, specificOnly);
if (this._resultCache.size >= this.maxCacheEntries)
this._resultCache.clear();
this._resultCache.set(key, result);
return result;
}
@@ -629,61 +643,58 @@
* @property {Array.<WhitelistFilter>} [whitelist] List of whitelist filters
* found.
*/
/**
* Searches all blocking and whitelist filters and returns results matching
* the given parameters.
*
- * @param {string} location
+ * @param {URL|string} location
* @param {number} typeMask
* @param {string} [docDomain]
- * @param {boolean} [thirdParty]
* @param {string} [sitekey]
* @param {boolean} [specificOnly]
* @param {string} [filterType] The types of filters to look for. This can be
* <code>"blocking"</code>, <code>"whitelist"</code>, or
* <code>"all"</code> (default).
*
* @returns {MatcherSearchResults}
*/
- search(location, typeMask, docDomain, thirdParty, sitekey, specificOnly,
+ search(location, typeMask, docDomain, sitekey, specificOnly,
filterType = "all")
{
let key = "* " + location + " " + typeMask + " " + docDomain + " " +
- thirdParty + " " + sitekey + " " + specificOnly + " " +
- filterType;
+ sitekey + " " + specificOnly + " " + filterType;
let result = this._resultCache.get(key);
if (typeof result != "undefined")
return result;
- result = this._searchInternal(location, typeMask, docDomain, thirdParty,
- sitekey, specificOnly, filterType);
+ result = this._searchInternal(location, typeMask, docDomain, sitekey,
+ specificOnly, filterType);
if (this._resultCache.size >= this.maxCacheEntries)
this._resultCache.clear();
this._resultCache.set(key, result);
return result;
}
/**
* Tests whether the URL is whitelisted
* @see Matcher#matchesAny
* @inheritdoc
* @returns {boolean}
*/
- isWhitelisted(location, typeMask, docDomain, thirdParty, sitekey,
- specificOnly)
+ isWhitelisted(location, typeMask, docDomain, sitekey, specificOnly)
{
- return !!this._whitelist.matchesAny(location, typeMask, docDomain,
- thirdParty, sitekey, specificOnly);
+ return !!this._whitelist.matchesAny(location, typeMask, docDomain, sitekey,
+ specificOnly);
}
}
exports.CombinedMatcher = CombinedMatcher;
/**
* Shared {@link CombinedMatcher} instance that should usually be used.
* @type {CombinedMatcher}
« lib/domain.js ('K') | « lib/domain.js ('k') | test/matcher.js » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld