Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Unified Diff: lib/abp2blocklist.js

Issue 29340694: Issue 3956 - Convert domain whitelisting filters (Closed)
Patch Set: Addressed feedback Created May 16, 2016, 4:21 p.m.
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: lib/abp2blocklist.js
diff --git a/lib/abp2blocklist.js b/lib/abp2blocklist.js
index 1bece259e455539c7aebdbf220479425f7eab0e3..5cd57e764253c7a26a4b20fde0d5033edc093717 100644
--- a/lib/abp2blocklist.js
+++ b/lib/abp2blocklist.js
@@ -66,28 +66,38 @@ function convertElemHideFilter(filter, elemhideSelectorExceptions)
}
/**
- * Convert the given filter "regexpSource" string into a regular expression,
- * handling the conversion of unicode inside hostnames to punycode.
- * (Also deciding if the regular expression can be safely converted to and
- * matched as lower case or not.)
+ * Parse the given filter "regexpSource" string. Producing a regular expression,
+ * extracting the hostname (if any), deciding if the regular expression is safe
+ * to be converted + matched as lower case and noting if the source contains
+ * anything after the hostname.)
*
* @param {string} text regexpSource property of a filter
- * @returns {object} An object containing a regular expression string and a bool
+ * @returns {object} An object containing a regular expression string, a bool
* indicating if the filter can be safely matched as lower
- * case: {regexp: "...", canSafelyMatchAsLowercase: true/false}
+ * case, a hostname string (or undefined) and a bool
+ * indicating if the source only contains a hostname or not:
+ * {regexp: "...",
+ * canSafelyMatchAsLowercase: true/false,
+ * hostname: "...",
+ * justHostname: true/false}
*/
-function toRegExp(text)
+function parseFilterRegexpSource(text)
{
- let result = [];
+ let regexp = [];
let lastIndex = text.length - 1;
+ let hostname;
let hostnameStart = null;
let hostnameFinished = false;
+ let justHostname = false;
let canSafelyMatchAsLowercase = false;
for (let i = 0; i < text.length; i++)
{
let c = text[i];
+ if (hostnameFinished)
+ justHostname = false;
+
// If we're currently inside the hostname we have to be careful not to
// escape any characters until after we have converted it to punycode.
if (hostnameStart != null && !hostnameFinished)
@@ -97,9 +107,11 @@ function toRegExp(text)
if (!endingChar && i != lastIndex)
continue;
- let hostname = text.substring(hostnameStart, endingChar ? i : i + 1);
- hostnameFinished = true;
- result.push(escapeRegExp(punycode.toASCII(hostname)));
+ hostname = punycode.toASCII(
+ text.substring(hostnameStart, endingChar ? i : i + 1)
+ );
+ hostnameFinished = justHostname = true;
+ regexp.push(escapeRegExp(hostname));
if (!endingChar)
break;
}
@@ -107,32 +119,32 @@ function toRegExp(text)
switch (c)
{
case "*":
- if (result.length > 0 && i < lastIndex && text[i + 1] != "*")
- result.push(".*");
+ if (regexp.length > 0 && i < lastIndex && text[i + 1] != "*")
+ regexp.push(".*");
break;
case "^":
if (i < lastIndex)
- result.push(".");
+ regexp.push(".");
break;
case "|":
if (i == 0)
{
- result.push("^");
+ regexp.push("^");
break;
}
if (i == lastIndex)
{
- result.push("$");
+ regexp.push("$");
break;
}
if (i == 1 && text[0] == "|")
{
hostnameStart = i + 1;
canSafelyMatchAsLowercase = true;
- result.push("https?://");
+ regexp.push("https?://");
break;
}
- result.push("\\|");
+ regexp.push("\\|");
break;
case "/":
if (!hostnameFinished &&
@@ -141,44 +153,27 @@ function toRegExp(text)
hostnameStart = i + 1;
canSafelyMatchAsLowercase = true;
}
- result.push("/");
+ regexp.push("/");
break;
case ".": case "+": case "$": case "?":
case "{": case "}": case "(": case ")":
case "[": case "]": case "\\":
- result.push("\\", c);
+ regexp.push("\\", c);
break;
default:
if (hostnameFinished && (c >= "a" && c <= "z" ||
c >= "A" && c <= "Z"))
canSafelyMatchAsLowercase = false;
- result.push(c);
+ regexp.push(c);
}
}
- return {regexp: result.join(""),
- canSafelyMatchAsLowercase: canSafelyMatchAsLowercase};
-}
-
-function getRegExpTrigger(filter)
-{
- let result = toRegExp(filter.regexpSource);
-
- let trigger = {"url-filter": result.regexp};
-
- // Limit rules to to HTTP(S) URLs
- if (!/^(\^|http)/i.test(trigger["url-filter"]))
- trigger["url-filter"] = "^https?://.*" + trigger["url-filter"];
-
- // For rules containing only a hostname we know that we're matching against
- // a lowercase string unless the matchCase option was passed.
- if (result.canSafelyMatchAsLowercase && !filter.matchCase)
- trigger["url-filter"] = trigger["url-filter"].toLowerCase();
-
- if (result.canSafelyMatchAsLowercase || filter.matchCase)
- trigger["url-filter-is-case-sensitive"] = true;
-
- return trigger;
+ return {
+ regexp: regexp.join(""),
+ canSafelyMatchAsLowercase: canSafelyMatchAsLowercase,
+ hostname: hostname,
+ justHostname: justHostname
+ };
}
function getResourceTypes(filter)
@@ -225,7 +220,42 @@ function addDomainPrefix(domains)
function convertFilter(filter, action, withResourceTypes)
{
- let trigger = getRegExpTrigger(filter);
+ let rules = [];
+ let parsed = parseFilterRegexpSource(filter.regexpSource);
+
+ // For the special case of $document whitelisting filters with just a domain
+ // we can generate an equivalent blocking rule exception using if-domain.
+ if (filter instanceof filterClasses.WhitelistFilter &&
+ filter.contentType & typeMap.DOCUMENT &&
+ parsed.justHostname)
+ {
+ rules.push({
+ trigger: {
+ "url-filter": ".*",
+ "if-domain": addDomainPrefix([parsed.hostname])
+ },
+ action: {type: "ignore-previous-rules"}
+ });
+ // If the filter contains multiple options we'll need to generate further
+ // rules for it, but if not we can simply return now.
+ if (filter.contentType == typeMap.DOCUMENT)
+ return rules;
+ }
+
+ let trigger = {"url-filter": parsed.regexp};
+
+ // Limit rules to HTTP(S) URLs
+ if (!/^(\^|http)/i.test(trigger["url-filter"]))
+ trigger["url-filter"] = "^https?://.*" + trigger["url-filter"];
+
+ // For rules containing only a hostname we know that we're matching against
+ // a lowercase string unless the matchCase option was passed.
+ if (parsed.canSafelyMatchAsLowercase && !filter.matchCase)
+ trigger["url-filter"] = trigger["url-filter"].toLowerCase();
+
+ if (parsed.canSafelyMatchAsLowercase || filter.matchCase)
+ trigger["url-filter-is-case-sensitive"] = true;
+
let included = [];
let excluded = [];
@@ -241,7 +271,9 @@ function convertFilter(filter, action, withResourceTypes)
else if (excluded.length > 0)
trigger["unless-domain"] = addDomainPrefix(excluded);
- return {trigger: trigger, action: {type: action}};
+ rules.push({trigger: trigger, action: {type: action}});
+
+ return rules;
}
function hasNonASCI(obj)
@@ -352,7 +384,8 @@ ContentBlockerList.prototype.addFilter = function(filter)
if (filter instanceof filterClasses.WhitelistFilter)
{
- if (filter.contentType & (typeMap.IMAGE
+ if (filter.contentType & (typeMap.DOCUMENT
+ | typeMap.IMAGE
| typeMap.STYLESHEET
| typeMap.SCRIPT
| typeMap.FONT
@@ -392,8 +425,9 @@ ContentBlockerList.prototype.generateRules = function(filter)
{
let rules = [];
- function addRule(rule)
+ function addRules(newRules)
{
+ for (let rule of newRules)
if (!hasNonASCI(rule))
rules.push(rule);
}
@@ -426,21 +460,21 @@ ContentBlockerList.prototype.generateRules = function(filter)
// this by converting to the attribute format [id="elementID"]
selector = convertIDSelectorsToAttributeSelectors(selector);
- addRule({
+ addRules([{
Sebastian Noack 2016/05/17 10:17:54 We are creating quite a few temporary arrays now.
kzar 2016/05/17 10:38:02 Done.
trigger: {"url-filter": matchDomain,
"url-filter-is-case-sensitive": true},
action: {type: "css-display-none",
selector: selector}
- });
+ }]);
}
});
for (let filter of this.elemhideExceptions)
- addRule(convertFilter(filter, "ignore-previous-rules", false));
+ addRules(convertFilter(filter, "ignore-previous-rules", false));
for (let filter of this.requestFilters)
- addRule(convertFilter(filter, "block", true));
+ addRules(convertFilter(filter, "block", true));
for (let filter of this.requestExceptions)
- addRule(convertFilter(filter, "ignore-previous-rules", true));
+ addRules(convertFilter(filter, "ignore-previous-rules", true));
return rules;
};
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld