Index: lib/contentBlockerList.js |
diff --git a/lib/contentBlockerList.js b/lib/contentBlockerList.js |
index 792b54e624c219634ef6cc21d77c3aa4690507fa..b4045de9a663a840553cf0cd214127d0238c31a2 100644 |
--- a/lib/contentBlockerList.js |
+++ b/lib/contentBlockerList.js |
@@ -50,7 +50,7 @@ function escapeRegExp(s) |
function matchDomain(domain) |
{ |
- return "^https?://([^/:]*\\.)?" + escapeRegExp(domain) + "[/:]"; |
+ return "^https?://([^/:]*\\.)?" + escapeRegExp(domain).toLowerCase() + "[/:]"; |
} |
function convertElemHideFilter(filter, elemhideSelectorExceptions) |
@@ -65,10 +65,16 @@ function convertElemHideFilter(filter, elemhideSelectorExceptions) |
return {matchDomains: included.map(matchDomain), selector: filter.selector}; |
} |
+// Convert the "regexpSource" part of a filter's text to a regular expression, |
+// also deciding if the expression can safely be converted to and matched as |
+// lowercase. |
function toRegExp(text) |
{ |
let result = []; |
let lastIndex = text.length - 1; |
+ let hostnameStarted = false; |
+ let hostnameFinished = false; |
+ let caseSensitive = false; |
for (let i = 0; i < text.length; i++) |
{ |
@@ -77,10 +83,14 @@ function toRegExp(text) |
switch (c) |
{ |
case "*": |
+ if (hostnameStarted) |
+ hostnameFinished = true; |
if (result.length > 0 && i < lastIndex && text[i + 1] != "*") |
result.push(".*"); |
break; |
case "^": |
+ if (hostnameStarted) |
+ hostnameFinished = true; |
if (i < lastIndex) |
result.push("."); |
break; |
@@ -98,24 +108,40 @@ function toRegExp(text) |
if (i == 1 && text[0] == "|") |
{ |
result.push("https?://"); |
+ hostnameStarted = caseSensitive = true; |
Sebastian Noack
2016/02/24 22:53:49
Nit: Sometimes you have the regular expression con
kzar
2016/02/24 23:15:20
Done.
|
break; |
} |
- case ".": case "+": case "?": case "$": |
- case "{": case "}": case "(": case ")": |
- case "[": case "]": case "\\": |
+ result.push("\\", c); |
+ break; |
+ case "/": |
+ if (hostnameStarted) |
+ hostnameFinished = true; |
+ else if (text.charAt(i-2) == ":" && text.charAt(i-1) == "/") |
+ hostnameStarted = caseSensitive = true; |
+ result.push("/"); |
Sebastian Noack
2016/02/24 22:53:49
If we move this case just above the default case,
kzar
2016/02/24 23:15:20
Done.
|
+ break; |
+ case "?": |
+ if (hostnameStarted) |
+ hostnameFinished = true; |
+ case ".": case "+": case "$": case "{": |
Sebastian Noack
2016/02/24 22:53:48
Nit: It doesn't really matter, but you wrap after
kzar
2016/02/24 23:15:20
Done.
|
+ case "}": case "(": case ")": case "[": |
+ case "]": case "\\": |
result.push("\\", c); |
break; |
default: |
+ if (hostnameFinished && (c >= "a" && c <= "z" || |
+ c >= "A" && c <= "Z")) |
+ caseSensitive = false; |
result.push(c); |
} |
} |
- return result.join(""); |
+ return {regexp: result.join(""), caseSensitive: caseSensitive}; |
} |
-function getRegExpSource(filter) |
+function getRegExpTrigger(filter) |
{ |
- let source = toRegExp(filter.regexpSource.replace( |
+ let result = toRegExp(filter.regexpSource.replace( |
// Safari expects punycode, filter lists use unicode |
/^(\|\||\|?https?:\/\/)([\w\-.*\u0080-\uFFFF]+)/i, |
function (match, prefix, domain) |
@@ -124,11 +150,21 @@ function getRegExpSource(filter) |
} |
)); |
+ let trigger = {"url-filter": result.regexp}; |
+ |
// Limit rules to to HTTP(S) URLs |
- if (!/^(\^|http)/i.test(source)) |
- source = "^https?://.*" + source; |
+ if (!/^(\^|http)/i.test(trigger["url-filter"])) |
+ trigger["url-filter"] = "^https?://.*" + trigger["url-filter"]; |
- return source; |
+ // For rules containing only a hostname we know that we're matching against |
+ // a lowercase string and can therefore enable case sensitive matching. |
+ if (result.caseSensitive) |
Sebastian Noack
2016/02/24 22:53:49
I think the logic here would be a little more stra
Sebastian Noack
2016/02/24 23:07:37
Ah wait, the logic is incorrect anyway, with eithe
kzar
2016/02/24 23:15:20
Done.
|
+ { |
+ trigger["url-filter"] = trigger["url-filter"].toLowerCase(); |
+ trigger["url-filter-is-case-sensitive"] = true; |
+ } |
+ |
+ return trigger; |
} |
function getResourceTypes(filter) |
@@ -175,11 +211,14 @@ function addDomainPrefix(domains) |
function convertFilter(filter, action, withResourceTypes) |
{ |
- let trigger = {"url-filter": getRegExpSource(filter)}; |
+ let trigger = getRegExpTrigger(filter); |
let included = []; |
let excluded = []; |
- parseDomains(filter.domains, included, excluded); |
+ if (filter.matchCase) |
+ trigger["url-filter-is-case-sensitive"] = true; |
+ |
+ parseDomains(filter.domains, included, excluded); |
if (withResourceTypes) |
trigger["resource-type"] = getResourceTypes(filter); |
@@ -377,7 +416,8 @@ ContentBlockerList.prototype.generateRules = function(filter) |
selector = convertIDSelectorsToAttributeSelectors(selector); |
addRule({ |
- trigger: {"url-filter": matchDomain}, |
+ trigger: {"url-filter": matchDomain, |
+ "url-filter-is-case-sensitive": true}, |
action: {type: "css-display-none", |
selector: selector} |
}); |