Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Unified Diff: lib/abp2blocklist.js

Issue 29337803: Issue 3710 - Unify hostname logic (Closed)
Patch Set: Created Feb. 27, 2016, 2:23 p.m.
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « abp2blocklist.js ('k') | no next file » | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: lib/abp2blocklist.js
diff --git a/lib/abp2blocklist.js b/lib/abp2blocklist.js
index 1c1ff6cf57607e7344359dc9109a616c793df721..069d5153cea8d02a37076bd0b8509c9136f20aee 100644
--- a/lib/abp2blocklist.js
+++ b/lib/abp2blocklist.js
@@ -66,38 +66,49 @@ function convertElemHideFilter(filter, elemhideSelectorExceptions)
}
/**
- * Convert the given filter "regexpSource" string into a regular expression.
+ * Convert the given filter "regexpSource" string into a regular expression,
+ * handling the conversion of unicode inside hostnames to punycode.
* (Also deciding if the regular expression can be safely converted to and
* matched as lower case or not.)
*
* @param {string} text regexpSource property of a filter
* @returns {object} An object containing a regular expression string and a bool
* indicating if the filter can be safely matched as lower
- * case: {regexp: "...", caseSenstive: true/false}
+ * case: {regexp: "...", canSafelyMatchAsLowercase: true/false}
*/
function toRegExp(text)
{
let result = [];
let lastIndex = text.length - 1;
- let hostnameStarted = false;
+ let hostnameStart = null;
let hostnameFinished = false;
- let caseSensitive = false;
+ let canSafelyMatchAsLowercase = false;
for (let i = 0; i < text.length; i++)
{
let c = text[i];
+ // If we're currently inside the hostname we have to be careful not to
+ // escape any characters until after we have converted it to punycode.
+ if (hostnameStart != null && !hostnameFinished)
+ {
+ if (c == "*" || c == "^" || c == "?" || c == "/" || i == lastIndex)
Sebastian Noack 2016/02/27 20:30:38 If you turn the logic here the other way around, y
Sebastian Noack 2016/02/27 20:30:38 I'm not entirely sure if the case of last index is
kzar 2016/02/27 21:28:53 Done.
kzar 2016/02/27 21:28:53 Good point but it's even more complicated, what if
+ {
+ hostnameFinished = true;
+ let hostname = text.substring(hostnameStart, i);
+ result.push(escapeRegExp(punycode.toASCII(hostname)));
+ }
+ else
+ continue;
+ }
+
switch (c)
{
case "*":
- if (hostnameStarted)
- hostnameFinished = true;
if (result.length > 0 && i < lastIndex && text[i + 1] != "*")
result.push(".*");
break;
case "^":
- if (hostnameStarted)
- hostnameFinished = true;
if (i < lastIndex)
result.push(".");
break;
@@ -114,45 +125,41 @@ function toRegExp(text)
}
if (i == 1 && text[0] == "|")
{
- hostnameStarted = caseSensitive = true;
result.push("https?://");
Sebastian Noack 2016/02/27 20:30:38 Nit: Mind moving that line to just above the break
kzar 2016/02/27 21:28:53 Done.
+ hostnameStart = i + 1;
+ canSafelyMatchAsLowercase = true;
break;
}
- result.push("\\", c);
+ result.push("\\|");
break;
- case "?":
- if (hostnameStarted)
- hostnameFinished = true;
- case ".": case "+": case "$": case "{": case "}":
+ case "/":
+ result.push("/");
Sebastian Noack 2016/02/27 20:30:38 Nit: Mind moving that line to just above the break
kzar 2016/02/27 21:28:53 Done.
+ if (!hostnameFinished &&
Sebastian Noack 2016/02/27 20:30:38 Nit: It doesn't matter, but I personally find that
kzar 2016/02/27 21:28:53 I'd rather leave this one as it is.
+ text.charAt(i-2) == ":" && text.charAt(i-1) == "/")
+ {
+ hostnameStart = i + 1;
+ canSafelyMatchAsLowercase = true;
+ }
+ break;
+ case ".": case "+": case "$": case "{": case "}": case "?":
Sebastian Noack 2016/02/27 20:30:38 Nit: I think the way this block was originally wra
kzar 2016/02/27 21:28:53 Done.
case "(": case ")": case "[": case "]": case "\\":
result.push("\\", c);
break;
- case "/":
- if (hostnameStarted)
- hostnameFinished = true;
- else if (text.charAt(i-2) == ":" && text.charAt(i-1) == "/")
- hostnameStarted = caseSensitive = true;
kzar 2016/02/27 14:29:34 (I've switched this around as I decided that sneak
default:
if (hostnameFinished && (c >= "a" && c <= "z" ||
c >= "A" && c <= "Z"))
- caseSensitive = false;
+ canSafelyMatchAsLowercase = false;
result.push(c);
}
}
- return {regexp: result.join(""), caseSensitive: caseSensitive};
+ return {regexp: result.join(""),
+ canSafelyMatchAsLowercase: canSafelyMatchAsLowercase};
}
function getRegExpTrigger(filter)
{
- let result = toRegExp(filter.regexpSource.replace(
- // Safari expects punycode, filter lists use unicode
- /^(\|\||\|?https?:\/\/)([\w\-.*\u0080-\uFFFF]+)/i,
- function (match, prefix, domain)
- {
- return prefix + punycode.toASCII(domain);
- }
- ));
+ let result = toRegExp(filter.regexpSource);
let trigger = {"url-filter": result.regexp};
@@ -162,10 +169,10 @@ function getRegExpTrigger(filter)
// For rules containing only a hostname we know that we're matching against
// a lowercase string unless the matchCase option was passed.
- if (result.caseSensitive && !filter.matchCase)
+ if (result.canSafelyMatchAsLowercase && !filter.matchCase)
trigger["url-filter"] = trigger["url-filter"].toLowerCase();
- if (result.caseSensitive || filter.matchCase)
+ if (result.canSafelyMatchAsLowercase || filter.matchCase)
trigger["url-filter-is-case-sensitive"] = true;
return trigger;
« no previous file with comments | « abp2blocklist.js ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld