Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Unified Diff: lib/matcher.js

Issue 29892596: Issue 6992 - Remove keyword-by-filter map (Closed) Base URL: https://hg.adblockplus.org/adblockpluscore/
Patch Set: Implement faster version of isSlowFilter Created Sept. 28, 2018, 9:22 a.m.
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « lib/filterClasses.js ('k') | test/filterListener.js » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: lib/matcher.js
===================================================================
--- a/lib/matcher.js
+++ b/lib/matcher.js
@@ -20,172 +20,165 @@
/**
* @fileOverview Matcher class implementing matching addresses against
* a list of filters.
*/
const {WhitelistFilter} = require("./filterClasses");
/**
+ * Regular expression for matching a keyword in a filter.
+ * @type {RegExp}
+ */
+const keywordRegExp = /[^a-z0-9%*][a-z0-9%]{3,}(?=[^a-z0-9%*])/;
+
+/**
+ * Regular expression for matching all keywords in a filter.
+ * @type {RegExp}
+ */
+const allKeywordsRegExp = new RegExp(keywordRegExp, "g");
+
+/**
+ * Checks whether a particular filter is slow.
+ * @param {RegExpFilter} filter
+ * @returns {boolean}
+ */
+function isSlowFilter(filter)
+{
+ return !filter.pattern || !keywordRegExp.test(filter.pattern);
+}
+
+exports.isSlowFilter = isSlowFilter;
+
+/**
* Blacklist/whitelist filter matching
*/
class Matcher
{
constructor()
{
/**
* Lookup table for filters by their associated keyword
- * @type {Map.<string,(Filter|Filter[])>}
+ * @type {Map.<string,(Filter|Set.<Filter>)>}
*/
this.filterByKeyword = new Map();
-
- /**
- * Lookup table for keywords by the filter
- * @type {Map.<Filter,string>}
- */
- this.keywordByFilter = new Map();
}
/**
* Removes all known filters
*/
clear()
{
this.filterByKeyword.clear();
- this.keywordByFilter.clear();
}
/**
* Adds a filter to the matcher
* @param {RegExpFilter} filter
*/
add(filter)
{
- if (this.keywordByFilter.has(filter))
- return;
-
// Look for a suitable keyword
let keyword = this.findKeyword(filter);
- let oldEntry = this.filterByKeyword.get(keyword);
- if (typeof oldEntry == "undefined")
+ let set = this.filterByKeyword.get(keyword);
+ if (typeof set == "undefined")
+ {
this.filterByKeyword.set(keyword, filter);
- else if (oldEntry.length == 1)
- this.filterByKeyword.set(keyword, [oldEntry, filter]);
+ }
+ else if (set.size == 1)
+ {
+ if (filter != set)
+ this.filterByKeyword.set(keyword, new Set([set, filter]));
+ }
else
- oldEntry.push(filter);
- this.keywordByFilter.set(filter, keyword);
+ {
+ set.add(filter);
+ }
}
/**
* Removes a filter from the matcher
* @param {RegExpFilter} filter
*/
remove(filter)
{
- let keyword = this.keywordByFilter.get(filter);
- if (typeof keyword == "undefined")
+ let keyword = this.findKeyword(filter);
+ let set = this.filterByKeyword.get(keyword);
+ if (typeof set == "undefined")
return;
- let list = this.filterByKeyword.get(keyword);
- if (list.length <= 1)
- this.filterByKeyword.delete(keyword);
+ if (set.size == 1)
+ {
+ if (filter == set)
+ this.filterByKeyword.delete(keyword);
+ }
else
{
- let index = list.indexOf(filter);
- if (index >= 0)
- {
- list.splice(index, 1);
- if (list.length == 1)
- this.filterByKeyword.set(keyword, list[0]);
- }
+ set.delete(filter);
+
+ if (set.size == 1)
+ this.filterByKeyword.set(keyword, [...set][0]);
}
-
- this.keywordByFilter.delete(filter);
}
/**
* Chooses a keyword to be associated with the filter
* @param {Filter} filter
* @returns {string} keyword or an empty string if no keyword could be found
*/
findKeyword(filter)
{
let result = "";
let {pattern} = filter;
if (pattern == null)
return result;
- let candidates = pattern.toLowerCase().match(
- /[^a-z0-9%*][a-z0-9%]{3,}(?=[^a-z0-9%*])/g
- );
+ let candidates = pattern.toLowerCase().match(allKeywordsRegExp);
if (!candidates)
return result;
let hash = this.filterByKeyword;
let resultCount = 0xFFFFFF;
let resultLength = 0;
for (let i = 0, l = candidates.length; i < l; i++)
{
let candidate = candidates[i].substr(1);
let filters = hash.get(candidate);
- let count = typeof filters != "undefined" ? filters.length : 0;
+ let count = typeof filters != "undefined" ? filters.size : 0;
if (count < resultCount ||
(count == resultCount && candidate.length > resultLength))
{
result = candidate;
resultCount = count;
resultLength = candidate.length;
}
}
return result;
}
/**
- * Checks whether a particular filter is being matched against.
- * @param {RegExpFilter} filter
- * @returns {boolean}
- */
- hasFilter(filter)
- {
- return this.keywordByFilter.has(filter);
- }
-
- /**
- * Returns the keyword used for a filter, <code>null</code>
- * for unknown filters.
- * @param {RegExpFilter} filter
- * @returns {?string}
- */
- getKeywordForFilter(filter)
- {
- let keyword = this.keywordByFilter.get(filter);
- return typeof keyword != "undefined" ? keyword : null;
- }
-
- /**
* Checks whether the entries for a particular keyword match a URL
* @param {string} keyword
* @param {string} location
* @param {number} typeMask
* @param {string} [docDomain]
* @param {boolean} [thirdParty]
* @param {string} [sitekey]
* @param {boolean} [specificOnly]
* @returns {?Filter}
*/
_checkEntryMatch(keyword, location, typeMask, docDomain, thirdParty, sitekey,
specificOnly)
{
- let list = this.filterByKeyword.get(keyword);
- if (typeof list == "undefined")
+ let set = this.filterByKeyword.get(keyword);
+ if (typeof set == "undefined")
return null;
- for (let i = 0; i < list.length; i++)
+
+ for (let filter of set)
{
- let filter = list[i];
-
if (specificOnly && filter.isGeneric() &&
!(filter instanceof WhitelistFilter))
continue;
if (filter.matches(location, typeMask, docDomain, thirdParty, sitekey))
return filter;
}
return null;
@@ -308,56 +301,16 @@
findKeyword(filter)
{
if (filter instanceof WhitelistFilter)
return this.whitelist.findKeyword(filter);
return this.blacklist.findKeyword(filter);
}
/**
- * @see Matcher#hasFilter
- * @param {Filter} filter
- * @returns {boolean}
- */
- hasFilter(filter)
- {
- if (filter instanceof WhitelistFilter)
- return this.whitelist.hasFilter(filter);
- return this.blacklist.hasFilter(filter);
- }
-
- /**
- * @see Matcher#getKeywordForFilter
- * @param {Filter} filter
- * @returns {string} keyword
- */
- getKeywordForFilter(filter)
- {
- if (filter instanceof WhitelistFilter)
- return this.whitelist.getKeywordForFilter(filter);
- return this.blacklist.getKeywordForFilter(filter);
- }
-
- /**
- * Checks whether a particular filter is slow
- * @param {RegExpFilter} filter
- * @returns {boolean}
- */
- isSlowFilter(filter)
- {
- let matcher = (
- filter instanceof WhitelistFilter ? this.whitelist : this.blacklist
- );
- let keyword = matcher.getKeywordForFilter(filter);
- if (keyword != null)
- return !keyword;
- return !matcher.findKeyword(filter);
- }
-
- /**
* Optimized filter matching testing both whitelist and blacklist matchers
* simultaneously. For parameters see
{@link Matcher#matchesAny Matcher.matchesAny()}.
* @see Matcher#matchesAny
* @inheritdoc
*/
matchesAnyInternal(location, typeMask, docDomain, thirdParty, sitekey,
specificOnly)
« no previous file with comments | « lib/filterClasses.js ('k') | test/filterListener.js » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld