Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Unified Diff: lib/filterClasses.js

Issue 29912636: Issue 7052 - Use string-based matching for literal patterns (Closed) Base URL: https://hg.adblockplus.org/adblockpluscore/
Patch Set: Add comments Created Oct. 17, 2018, 8:58 a.m.
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « no previous file | test/filterClasses.js » ('j') | test/filterClasses.js » ('J')
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: lib/filterClasses.js
===================================================================
--- a/lib/filterClasses.js
+++ b/lib/filterClasses.js
@@ -21,22 +21,42 @@
* @fileOverview Definition of Filter class and its subclasses.
*/
const {filterNotifier} = require("./filterNotifier");
const {extend} = require("./coreUtils");
const {filterToRegExp} = require("./common");
/**
+ * Regular expression used to match the <code>||</code> prefix in an otherwise
+ * literal pattern.
+ * @type {RegExp}
+ */
+let tripleAnchorRegExp = new RegExp(filterToRegExp("|||"));
+
+/**
* All known unique domain sources mapped to their parsed values.
* @type {Map.<string,Map.<string,boolean>>}
*/
let knownDomainMaps = new Map();
/**
+ * Checks whether the given pattern is a string of literal characters with no
+ * wildcards or any other special characters. If the pattern is prefixed with a
+ * <code>||</code> but otherwise contains no special characters, it is still
+ * considered to be a literal pattern.
+ * @param {string} pattern
+ * @returns {boolean}
+ */
+function isLiteralPattern(pattern)
+{
+ return !/[*^|]/.test(pattern.replace(/^\|{2}/, ""));
+}
+
+/**
* Abstract base class for filters
*
* @param {string} text string representation of the filter
* @constructor
*/
function Filter(text)
{
this.text = text;
@@ -688,16 +708,19 @@
// The filter is a regular expression - convert it immediately to
// catch syntax errors
let regexp = new RegExp(regexpSource.substr(1, regexpSource.length - 2),
this.matchCase ? "" : "i");
Object.defineProperty(this, "regexp", {value: regexp});
}
else
{
+ if (!this.matchCase && isLiteralPattern(regexpSource))
Manish Jethani 2018/10/17 09:05:04 Lowercase the pattern once here if it is literal a
+ regexpSource = regexpSource.toLowerCase();
+
// No need to convert this filter to regular expression yet, do it on demand
this.pattern = regexpSource;
}
}
exports.RegExpFilter = RegExpFilter;
RegExpFilter.prototype = extend(ActiveFilter, {
/**
@@ -719,20 +742,27 @@
*/
pattern: null,
/**
* Regular expression to be used when testing against this filter
* @type {RegExp}
*/
get regexp()
{
- let source = filterToRegExp(this.pattern, this.rewrite != null);
- let regexp = new RegExp(source, this.matchCase ? "" : "i");
- Object.defineProperty(this, "regexp", {value: regexp});
- return regexp;
+ let value = null;
+
+ let {pattern, rewrite} = this;
+ if (rewrite != null || !isLiteralPattern(pattern))
+ {
+ value = new RegExp(filterToRegExp(pattern, rewrite != null),
+ this.matchCase ? "" : "i");
+ }
+
+ Object.defineProperty(this, "regexp", {value});
+ return value;
},
/**
* Content types the filter applies to, combination of values from
* RegExpFilter.typeMap
* @type {number}
*/
contentType: 0x7FFFFFFF,
/**
@@ -783,17 +813,49 @@
* @param {string} [sitekey] public key provided by the document
* @return {boolean} true in case of a match
*/
matches(location, typeMask, docDomain, thirdParty, sitekey)
{
return (this.contentType & typeMask) != 0 &&
(this.thirdParty == null || this.thirdParty == thirdParty) &&
this.isActiveOnDomain(docDomain, sitekey) &&
- this.regexp.test(location);
+ this.matchesLocation(location);
+ },
+
+ /**
+ * Checks whether the given URL matches this filter's pattern.
+ * @param {string} location The URL to check.
+ * @returns {boolean} <code>true</code> if the URL matches.
+ */
+ matchesLocation(location)
+ {
+ let {regexp} = this;
+
+ if (regexp)
+ return regexp.test(location);
+
+ if (!this.matchCase)
+ location = location.toLowerCase();
+
+ let {pattern} = this;
+
+ if (pattern[0] == "|" && pattern[1] == "|")
+ {
+ let index = location.indexOf(pattern.substring(2));
+
+ // The "||" prefix requires that the text that follows does not start
+ // with a forward slash. Normally this is part of the generated regular
+ // expression, but since we're faking it here with string-based matching
+ // we need to do the check ourselves.
+ return index != -1 && location[index] != "/" &&
Manish Jethani 2018/10/17 09:05:04 If the index is not -1 then it's highly unlikely t
+ tripleAnchorRegExp.test(location.substring(0, index));
+ }
+
+ return location.includes(pattern);
}
});
/**
* Yields the filter itself (required to optimize {@link Matcher}).
* @yields {RegExpFilter}
*/
RegExpFilter.prototype[Symbol.iterator] = function*()
« no previous file with comments | « no previous file | test/filterClasses.js » ('j') | test/filterClasses.js » ('J')

Powered by Google App Engine
This is Rietveld