Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Unified Diff: lib/contentBlockerList.js

Issue 29336787: Issue 3670 - Make rules case sensitive where possible (Closed)
Patch Set: Handle "/" separately and the fall through Created Feb. 24, 2016, 9:18 p.m.
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: lib/contentBlockerList.js
diff --git a/lib/contentBlockerList.js b/lib/contentBlockerList.js
index 792b54e624c219634ef6cc21d77c3aa4690507fa..337cf3cfd4f1ed8e1d63a04efd7e679592e9cb41 100644
--- a/lib/contentBlockerList.js
+++ b/lib/contentBlockerList.js
@@ -50,7 +50,7 @@ function escapeRegExp(s)
function matchDomain(domain)
{
- return "^https?://([^/:]*\\.)?" + escapeRegExp(domain) + "[/:]";
+ return "^https?://([^/:]*\\.)?" + escapeRegExp(domain).toLowerCase() + "[/:]";
}
function convertElemHideFilter(filter, elemhideSelectorExceptions)
@@ -65,10 +65,16 @@ function convertElemHideFilter(filter, elemhideSelectorExceptions)
return {matchDomains: included.map(matchDomain), selector: filter.selector};
}
+// Convert the "regexpSource" part of a filter's text to a regular expression,
+// also deciding if the expression can safely be converted to and matched as
+// lowercase.
function toRegExp(text)
{
let result = [];
let lastIndex = text.length - 1;
+ let hostnameStarted = false;
+ let hostnameFinished = false;
+ let caseSensitive = false;
for (let i = 0; i < text.length; i++)
{
@@ -98,24 +104,44 @@ function toRegExp(text)
if (i == 1 && text[0] == "|")
{
result.push("https?://");
+ hostnameStarted = caseSensitive = true;
break;
}
- case ".": case "+": case "?": case "$":
- case "{": case "}": case "(": case ")":
- case "[": case "]": case "\\":
+ result.push("\\", c);
+ break;
+ case "/":
+ if (!hostnameStarted && i >= 2 && text[i-2] == ":" && text[i-1] == "/")
Sebastian Noack 2016/02/24 21:47:06 Note that when you use charAt() as in my initial s
kzar 2016/02/24 22:19:45 Done.
+ {
+ hostnameStarted = caseSensitive = true;
+ result.push("/");
+ break;
Sebastian Noack 2016/02/24 21:47:07 If I get the logic here straight, there is an inco
kzar 2016/02/24 22:19:46 No, for all other occurrences of slashes we fall t
+ }
+ case "?": case "*": case "^":
Sebastian Noack 2016/02/24 21:47:06 "*" and "^" are already handled and bail out above
kzar 2016/02/24 22:19:46 Done.
+ if (hostnameStarted)
+ hostnameFinished = true;
+ if (c != "?")
+ {
+ result.push(c);
+ break;
+ }
+ case ".": case "+": case "$": case "{":
+ case "}": case "(": case ")": case "[":
+ case "]": case "\\":
result.push("\\", c);
break;
default:
+ if (hostnameFinished && (c >= "a" && c <= "z" || c >= "A" && c <= "Z"))
Sebastian Noack 2016/02/24 21:47:06 Note that this check can be simlified with a regex
kzar 2016/02/24 22:19:45 Done.
+ caseSensitive = false;
result.push(c);
}
}
- return result.join("");
+ return {regexp: result.join(""), caseSensitive: caseSensitive};
}
-function getRegExpSource(filter)
+function getRegExpTrigger(filter)
{
- let source = toRegExp(filter.regexpSource.replace(
+ let result = toRegExp(filter.regexpSource.replace(
// Safari expects punycode, filter lists use unicode
/^(\|\||\|?https?:\/\/)([\w\-.*\u0080-\uFFFF]+)/i,
function (match, prefix, domain)
@@ -124,11 +150,21 @@ function getRegExpSource(filter)
}
));
+ let trigger = {"url-filter": result.regexp};
+
// Limit rules to to HTTP(S) URLs
- if (!/^(\^|http)/i.test(source))
- source = "^https?://.*" + source;
+ if (!/^(\^|http)/i.test(trigger["url-filter"]))
+ trigger["url-filter"] = "^https?://.*" + trigger["url-filter"];
- return source;
+ // For rules containing only a hostname we know that we're matching against
+ // a lowercase string and can therefore enable case sensitive matching.
+ if (result.caseSensitive)
+ {
+ trigger["url-filter"] = trigger["url-filter"].toLowerCase();
+ trigger["url-filter-is-case-sensitive"] = true;
+ }
+
+ return trigger;
}
function getResourceTypes(filter)
@@ -175,11 +211,14 @@ function addDomainPrefix(domains)
function convertFilter(filter, action, withResourceTypes)
{
- let trigger = {"url-filter": getRegExpSource(filter)};
+ let trigger = getRegExpTrigger(filter);
let included = [];
let excluded = [];
- parseDomains(filter.domains, included, excluded);
+ if (filter.matchCase)
+ trigger["url-filter-is-case-sensitive"] = true;
+
+ parseDomains(filter.domains, included, excluded);
if (withResourceTypes)
trigger["resource-type"] = getResourceTypes(filter);
@@ -377,7 +416,8 @@ ContentBlockerList.prototype.generateRules = function(filter)
selector = convertIDSelectorsToAttributeSelectors(selector);
addRule({
- trigger: {"url-filter": matchDomain},
+ trigger: {"url-filter": matchDomain,
+ "url-filter-is-case-sensitive": true},
action: {type: "css-display-none",
selector: selector}
});
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld