Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Unified Diff: lib/abp2blocklist.js

Issue 29340694: Issue 3956 - Convert domain whitelisting filters (Closed)
Patch Set: Created April 20, 2016, 5:09 p.m.
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: lib/abp2blocklist.js
diff --git a/lib/abp2blocklist.js b/lib/abp2blocklist.js
index 1bece259e455539c7aebdbf220479425f7eab0e3..0a1ec26e3e7907c5cd76357ba7887c5770c974a5 100644
--- a/lib/abp2blocklist.js
+++ b/lib/abp2blocklist.js
@@ -66,28 +66,38 @@ function convertElemHideFilter(filter, elemhideSelectorExceptions)
}
/**
- * Convert the given filter "regexpSource" string into a regular expression,
- * handling the conversion of unicode inside hostnames to punycode.
- * (Also deciding if the regular expression can be safely converted to and
- * matched as lower case or not.)
+ * Parse the given filter "regexpSource" string. Producing a regular expression,
+ * extracting the hostname (if any), deciding if the regular expression is safe
+ * to be converted + matched as lower case and noting if the source contains
+ * anything after the hostname.)
*
* @param {string} text regexpSource property of a filter
- * @returns {object} An object containing a regular expression string and a bool
+ * @returns {object} An object containing a regular expression string, a bool
* indicating if the filter can be safely matched as lower
- * case: {regexp: "...", canSafelyMatchAsLowercase: true/false}
+ * case, a hostname string (or undefined) and a bool
+ * indicating if the source only contains a hostname or not:
+ * {regexp: "...",
+ * canSafelyMatchAsLowercase: true/false,
+ * hostname: "...",
+ * justHostname: true/false}
*/
-function toRegExp(text)
+function parseFilterRegexpSource(text)
{
- let result = [];
+ let regexp = [];
let lastIndex = text.length - 1;
+ let hostname;
let hostnameStart = null;
let hostnameFinished = false;
+ let justHostname = false;
let canSafelyMatchAsLowercase = false;
for (let i = 0; i < text.length; i++)
{
let c = text[i];
+ if (hostnameFinished)
+ justHostname = false;
+
// If we're currently inside the hostname we have to be careful not to
// escape any characters until after we have converted it to punycode.
if (hostnameStart != null && !hostnameFinished)
@@ -97,9 +107,11 @@ function toRegExp(text)
if (!endingChar && i != lastIndex)
continue;
- let hostname = text.substring(hostnameStart, endingChar ? i : i + 1);
- hostnameFinished = true;
- result.push(escapeRegExp(punycode.toASCII(hostname)));
+ hostname = punycode.toASCII(
+ text.substring(hostnameStart, endingChar ? i : i + 1)
+ );
+ hostnameFinished = justHostname = true;
+ regexp.push(escapeRegExp(hostname));
if (!endingChar)
break;
}
@@ -107,32 +119,32 @@ function toRegExp(text)
switch (c)
{
case "*":
- if (result.length > 0 && i < lastIndex && text[i + 1] != "*")
- result.push(".*");
+ if (regexp.length > 0 && i < lastIndex && text[i + 1] != "*")
+ regexp.push(".*");
break;
case "^":
if (i < lastIndex)
- result.push(".");
+ regexp.push(".");
break;
case "|":
if (i == 0)
{
- result.push("^");
+ regexp.push("^");
break;
}
if (i == lastIndex)
{
- result.push("$");
+ regexp.push("$");
break;
}
if (i == 1 && text[0] == "|")
{
hostnameStart = i + 1;
canSafelyMatchAsLowercase = true;
- result.push("https?://");
+ regexp.push("https?://");
break;
}
- result.push("\\|");
+ regexp.push("\\|");
break;
case "/":
if (!hostnameFinished &&
@@ -141,44 +153,27 @@ function toRegExp(text)
hostnameStart = i + 1;
canSafelyMatchAsLowercase = true;
}
- result.push("/");
+ regexp.push("/");
break;
case ".": case "+": case "$": case "?":
case "{": case "}": case "(": case ")":
case "[": case "]": case "\\":
- result.push("\\", c);
+ regexp.push("\\", c);
break;
default:
if (hostnameFinished && (c >= "a" && c <= "z" ||
c >= "A" && c <= "Z"))
canSafelyMatchAsLowercase = false;
- result.push(c);
+ regexp.push(c);
}
}
- return {regexp: result.join(""),
- canSafelyMatchAsLowercase: canSafelyMatchAsLowercase};
-}
-
-function getRegExpTrigger(filter)
-{
- let result = toRegExp(filter.regexpSource);
-
- let trigger = {"url-filter": result.regexp};
-
- // Limit rules to to HTTP(S) URLs
- if (!/^(\^|http)/i.test(trigger["url-filter"]))
- trigger["url-filter"] = "^https?://.*" + trigger["url-filter"];
-
- // For rules containing only a hostname we know that we're matching against
- // a lowercase string unless the matchCase option was passed.
- if (result.canSafelyMatchAsLowercase && !filter.matchCase)
- trigger["url-filter"] = trigger["url-filter"].toLowerCase();
-
- if (result.canSafelyMatchAsLowercase || filter.matchCase)
- trigger["url-filter-is-case-sensitive"] = true;
-
- return trigger;
+ return {
+ regexp: regexp.join(""),
+ canSafelyMatchAsLowercase: canSafelyMatchAsLowercase,
+ hostname: hostname,
+ justHostname: justHostname
+ };
}
function getResourceTypes(filter)
@@ -225,7 +220,29 @@ function addDomainPrefix(domains)
function convertFilter(filter, action, withResourceTypes)
{
- let trigger = getRegExpTrigger(filter);
+ let parsed = parseFilterRegexpSource(filter.regexpSource);
+
+ // For the special case of $document whitelisting filters with just a domain
+ // we can generate an equivalent blocking rule exception using if-domain.
+ if (filter.contentType == typeMap.DOCUMENT && parsed.justHostname)
Sebastian Noack 2016/05/12 12:12:25 For filters like example.com$document,image we wou
kzar 2016/05/16 16:22:36 Done.
+ return {trigger: {"url-filter": ".*",
Sebastian Noack 2016/05/12 12:12:26 Nit: Mind wrapping the nested object for better re
Sebastian Noack 2016/05/12 12:12:26 Wouldn't an empty string be sufficient as url-filt
kzar 2016/05/16 16:22:36 Done.
kzar 2016/05/16 16:22:36 Unfortunately this causes a "Extension compilation
+ "if-domain": addDomainPrefix([parsed.hostname])},
+ action: {type: "ignore-previous-rules"}};
+
+ let trigger = {"url-filter": parsed.regexp};
+
+ // Limit rules to to HTTP(S) URLs
Sebastian Noack 2016/05/12 12:12:26 Typo: to to
kzar 2016/05/16 16:22:36 Done.
+ if (!/^(\^|http)/i.test(trigger["url-filter"]))
+ trigger["url-filter"] = "^https?://.*" + trigger["url-filter"];
+
+ // For rules containing only a hostname we know that we're matching against
+ // a lowercase string unless the matchCase option was passed.
+ if (parsed.canSafelyMatchAsLowercase && !filter.matchCase)
+ trigger["url-filter"] = trigger["url-filter"].toLowerCase();
+
+ if (parsed.canSafelyMatchAsLowercase || filter.matchCase)
+ trigger["url-filter-is-case-sensitive"] = true;
+
let included = [];
let excluded = [];
@@ -352,7 +369,8 @@ ContentBlockerList.prototype.addFilter = function(filter)
if (filter instanceof filterClasses.WhitelistFilter)
{
- if (filter.contentType & (typeMap.IMAGE
+ if (filter.contentType & (typeMap.DOCUMENT
+ | typeMap.IMAGE
| typeMap.STYLESHEET
| typeMap.SCRIPT
| typeMap.FONT
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld