Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Unified Diff: lib/contentBlockerList.js

Issue 29336787: Issue 3670 - Make rules case sensitive where possible (Closed)
Patch Set: Improved documentation of toRegexp function and switched to JSDoc syntax Created Feb. 25, 2016, 3:21 p.m.
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: lib/contentBlockerList.js
diff --git a/lib/contentBlockerList.js b/lib/contentBlockerList.js
index 792b54e624c219634ef6cc21d77c3aa4690507fa..caf80f7d8c8d8dee646ad54893ff1282d2c4ece1 100644
--- a/lib/contentBlockerList.js
+++ b/lib/contentBlockerList.js
@@ -50,7 +50,7 @@ function escapeRegExp(s)
function matchDomain(domain)
{
- return "^https?://([^/:]*\\.)?" + escapeRegExp(domain) + "[/:]";
+ return "^https?://([^/:]*\\.)?" + escapeRegExp(domain).toLowerCase() + "[/:]";
}
function convertElemHideFilter(filter, elemhideSelectorExceptions)
@@ -65,10 +65,23 @@ function convertElemHideFilter(filter, elemhideSelectorExceptions)
return {matchDomains: included.map(matchDomain), selector: filter.selector};
}
+/**
+ * Convert the given filter "regexpSource" string into a regular expression.
+ * (Also deciding if the regular expression can be safely converted to and
+ * matched as lower case or not.)
+ *
+ * @param {string} text regexpSource property of a filter
+ * @returns {object} An object containing a regular expression string and a bool
+ * indicating if the filter can be safely matched as lower
+ * case: {regexp: "...", caseSenstive: true/false}
+ */
function toRegExp(text)
{
let result = [];
let lastIndex = text.length - 1;
+ let hostnameStarted = false;
+ let hostnameFinished = false;
+ let caseSensitive = false;
for (let i = 0; i < text.length; i++)
{
@@ -77,10 +90,14 @@ function toRegExp(text)
switch (c)
{
case "*":
+ if (hostnameStarted)
+ hostnameFinished = true;
if (result.length > 0 && i < lastIndex && text[i + 1] != "*")
result.push(".*");
break;
case "^":
+ if (hostnameStarted)
+ hostnameFinished = true;
if (i < lastIndex)
result.push(".");
break;
@@ -97,25 +114,38 @@ function toRegExp(text)
}
if (i == 1 && text[0] == "|")
{
+ hostnameStarted = caseSensitive = true;
result.push("https?://");
break;
}
- case ".": case "+": case "?": case "$":
- case "{": case "}": case "(": case ")":
- case "[": case "]": case "\\":
result.push("\\", c);
break;
+ case "?":
+ if (hostnameStarted)
+ hostnameFinished = true;
+ case ".": case "+": case "$": case "{": case "}":
+ case "(": case ")": case "[": case "]": case "\\":
+ result.push("\\", c);
+ break;
+ case "/":
+ if (hostnameStarted)
+ hostnameFinished = true;
+ else if (text.charAt(i-2) == ":" && text.charAt(i-1) == "/")
+ hostnameStarted = caseSensitive = true;
default:
+ if (hostnameFinished && (c >= "a" && c <= "z" ||
+ c >= "A" && c <= "Z"))
+ caseSensitive = false;
result.push(c);
}
}
- return result.join("");
+ return {regexp: result.join(""), caseSensitive: caseSensitive};
}
-function getRegExpSource(filter)
+function getRegExpTrigger(filter)
{
- let source = toRegExp(filter.regexpSource.replace(
+ let result = toRegExp(filter.regexpSource.replace(
// Safari expects punycode, filter lists use unicode
/^(\|\||\|?https?:\/\/)([\w\-.*\u0080-\uFFFF]+)/i,
function (match, prefix, domain)
@@ -124,11 +154,21 @@ function getRegExpSource(filter)
}
));
+ let trigger = {"url-filter": result.regexp};
+
// Limit rules to to HTTP(S) URLs
- if (!/^(\^|http)/i.test(source))
- source = "^https?://.*" + source;
+ if (!/^(\^|http)/i.test(trigger["url-filter"]))
+ trigger["url-filter"] = "^https?://.*" + trigger["url-filter"];
+
+ // For rules containing only a hostname we know that we're matching against
+ // a lowercase string unless the matchCase option was passed.
+ if (result.caseSensitive && !filter.matchCase)
+ trigger["url-filter"] = trigger["url-filter"].toLowerCase();
+
+ if (result.caseSensitive || filter.matchCase)
+ trigger["url-filter-is-case-sensitive"] = true;
- return source;
+ return trigger;
}
function getResourceTypes(filter)
@@ -175,7 +215,7 @@ function addDomainPrefix(domains)
function convertFilter(filter, action, withResourceTypes)
{
- let trigger = {"url-filter": getRegExpSource(filter)};
+ let trigger = getRegExpTrigger(filter);
let included = [];
let excluded = [];
@@ -377,7 +417,8 @@ ContentBlockerList.prototype.generateRules = function(filter)
selector = convertIDSelectorsToAttributeSelectors(selector);
addRule({
- trigger: {"url-filter": matchDomain},
+ trigger: {"url-filter": matchDomain,
+ "url-filter-is-case-sensitive": true},
action: {type: "css-display-none",
selector: selector}
});
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld