Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Unified Diff: lib/contentBlockerList.js

Issue 29336787: Issue 3670 - Make rules case sensitive where possible (Closed)
Patch Set: Fixed mistake with logic for * and ^, addressed other feedback Created Feb. 24, 2016, 10:18 p.m.
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: lib/contentBlockerList.js
diff --git a/lib/contentBlockerList.js b/lib/contentBlockerList.js
index 792b54e624c219634ef6cc21d77c3aa4690507fa..b4045de9a663a840553cf0cd214127d0238c31a2 100644
--- a/lib/contentBlockerList.js
+++ b/lib/contentBlockerList.js
@@ -50,7 +50,7 @@ function escapeRegExp(s)
function matchDomain(domain)
{
- return "^https?://([^/:]*\\.)?" + escapeRegExp(domain) + "[/:]";
+ return "^https?://([^/:]*\\.)?" + escapeRegExp(domain).toLowerCase() + "[/:]";
}
function convertElemHideFilter(filter, elemhideSelectorExceptions)
@@ -65,10 +65,16 @@ function convertElemHideFilter(filter, elemhideSelectorExceptions)
return {matchDomains: included.map(matchDomain), selector: filter.selector};
}
+// Convert the "regexpSource" part of a filter's text to a regular expression,
+// also deciding if the expression can safely be converted to and matched as
+// lowercase.
function toRegExp(text)
{
let result = [];
let lastIndex = text.length - 1;
+ let hostnameStarted = false;
+ let hostnameFinished = false;
+ let caseSensitive = false;
for (let i = 0; i < text.length; i++)
{
@@ -77,10 +83,14 @@ function toRegExp(text)
switch (c)
{
case "*":
+ if (hostnameStarted)
+ hostnameFinished = true;
if (result.length > 0 && i < lastIndex && text[i + 1] != "*")
result.push(".*");
break;
case "^":
+ if (hostnameStarted)
+ hostnameFinished = true;
if (i < lastIndex)
result.push(".");
break;
@@ -98,24 +108,40 @@ function toRegExp(text)
if (i == 1 && text[0] == "|")
{
result.push("https?://");
+ hostnameStarted = caseSensitive = true;
Sebastian Noack 2016/02/24 22:53:49 Nit: Sometimes you have the regular expression con
kzar 2016/02/24 23:15:20 Done.
break;
}
- case ".": case "+": case "?": case "$":
- case "{": case "}": case "(": case ")":
- case "[": case "]": case "\\":
+ result.push("\\", c);
+ break;
+ case "/":
+ if (hostnameStarted)
+ hostnameFinished = true;
+ else if (text.charAt(i-2) == ":" && text.charAt(i-1) == "/")
+ hostnameStarted = caseSensitive = true;
+ result.push("/");
Sebastian Noack 2016/02/24 22:53:49 If we move this case just above the default case,
kzar 2016/02/24 23:15:20 Done.
+ break;
+ case "?":
+ if (hostnameStarted)
+ hostnameFinished = true;
+ case ".": case "+": case "$": case "{":
Sebastian Noack 2016/02/24 22:53:48 Nit: It doesn't really matter, but you wrap after
kzar 2016/02/24 23:15:20 Done.
+ case "}": case "(": case ")": case "[":
+ case "]": case "\\":
result.push("\\", c);
break;
default:
+ if (hostnameFinished && (c >= "a" && c <= "z" ||
+ c >= "A" && c <= "Z"))
+ caseSensitive = false;
result.push(c);
}
}
- return result.join("");
+ return {regexp: result.join(""), caseSensitive: caseSensitive};
}
-function getRegExpSource(filter)
+function getRegExpTrigger(filter)
{
- let source = toRegExp(filter.regexpSource.replace(
+ let result = toRegExp(filter.regexpSource.replace(
// Safari expects punycode, filter lists use unicode
/^(\|\||\|?https?:\/\/)([\w\-.*\u0080-\uFFFF]+)/i,
function (match, prefix, domain)
@@ -124,11 +150,21 @@ function getRegExpSource(filter)
}
));
+ let trigger = {"url-filter": result.regexp};
+
// Limit rules to to HTTP(S) URLs
- if (!/^(\^|http)/i.test(source))
- source = "^https?://.*" + source;
+ if (!/^(\^|http)/i.test(trigger["url-filter"]))
+ trigger["url-filter"] = "^https?://.*" + trigger["url-filter"];
- return source;
+ // For rules containing only a hostname we know that we're matching against
+ // a lowercase string and can therefore enable case sensitive matching.
+ if (result.caseSensitive)
Sebastian Noack 2016/02/24 22:53:49 I think the logic here would be a little more stra
Sebastian Noack 2016/02/24 23:07:37 Ah wait, the logic is incorrect anyway, with eithe
kzar 2016/02/24 23:15:20 Done.
+ {
+ trigger["url-filter"] = trigger["url-filter"].toLowerCase();
+ trigger["url-filter-is-case-sensitive"] = true;
+ }
+
+ return trigger;
}
function getResourceTypes(filter)
@@ -175,11 +211,14 @@ function addDomainPrefix(domains)
function convertFilter(filter, action, withResourceTypes)
{
- let trigger = {"url-filter": getRegExpSource(filter)};
+ let trigger = getRegExpTrigger(filter);
let included = [];
let excluded = [];
- parseDomains(filter.domains, included, excluded);
+ if (filter.matchCase)
+ trigger["url-filter-is-case-sensitive"] = true;
+
+ parseDomains(filter.domains, included, excluded);
if (withResourceTypes)
trigger["resource-type"] = getResourceTypes(filter);
@@ -377,7 +416,8 @@ ContentBlockerList.prototype.generateRules = function(filter)
selector = convertIDSelectorsToAttributeSelectors(selector);
addRule({
- trigger: {"url-filter": matchDomain},
+ trigger: {"url-filter": matchDomain,
+ "url-filter-is-case-sensitive": true},
action: {type: "css-display-none",
selector: selector}
});
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld