Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Unified Diff: lib/abp2blocklist.js

Issue 29340694: Issue 3956 - Convert domain whitelisting filters (Closed)
Patch Set: Fix whitelisting request type logic Created May 17, 2016, 11:22 a.m.
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: lib/abp2blocklist.js
diff --git a/lib/abp2blocklist.js b/lib/abp2blocklist.js
index 1bece259e455539c7aebdbf220479425f7eab0e3..fb90da33463b453806cae4fa5497cb31cef9223c 100644
--- a/lib/abp2blocklist.js
+++ b/lib/abp2blocklist.js
@@ -25,6 +25,18 @@ let punycode = require("punycode");
const selectorLimit = 5000;
const typeMap = filterClasses.RegExpFilter.typeMap;
+const whitelistableRequestTypes = (typeMap.IMAGE
+ | typeMap.STYLESHEET
+ | typeMap.SCRIPT
+ | typeMap.FONT
+ | typeMap.MEDIA
+ | typeMap.POPUP
+ | typeMap.OBJECT
+ | typeMap.OBJECT_SUBREQUEST
+ | typeMap.XMLHTTPREQUEST
+ | typeMap.PING
+ | typeMap.SUBDOCUMENT
+ | typeMap.OTHER);
function parseDomains(domains, included, excluded)
{
@@ -66,28 +78,38 @@ function convertElemHideFilter(filter, elemhideSelectorExceptions)
}
/**
- * Convert the given filter "regexpSource" string into a regular expression,
- * handling the conversion of unicode inside hostnames to punycode.
- * (Also deciding if the regular expression can be safely converted to and
- * matched as lower case or not.)
+ * Parse the given filter "regexpSource" string. Producing a regular expression,
+ * extracting the hostname (if any), deciding if the regular expression is safe
+ * to be converted + matched as lower case and noting if the source contains
+ * anything after the hostname.)
*
* @param {string} text regexpSource property of a filter
- * @returns {object} An object containing a regular expression string and a bool
+ * @returns {object} An object containing a regular expression string, a bool
* indicating if the filter can be safely matched as lower
- * case: {regexp: "...", canSafelyMatchAsLowercase: true/false}
+ * case, a hostname string (or undefined) and a bool
+ * indicating if the source only contains a hostname or not:
+ * {regexp: "...",
+ * canSafelyMatchAsLowercase: true/false,
+ * hostname: "...",
+ * justHostname: true/false}
*/
-function toRegExp(text)
+function parseFilterRegexpSource(text)
{
- let result = [];
+ let regexp = [];
let lastIndex = text.length - 1;
+ let hostname;
let hostnameStart = null;
let hostnameFinished = false;
+ let justHostname = false;
let canSafelyMatchAsLowercase = false;
for (let i = 0; i < text.length; i++)
{
let c = text[i];
+ if (hostnameFinished)
+ justHostname = false;
+
// If we're currently inside the hostname we have to be careful not to
// escape any characters until after we have converted it to punycode.
if (hostnameStart != null && !hostnameFinished)
@@ -97,9 +119,11 @@ function toRegExp(text)
if (!endingChar && i != lastIndex)
continue;
- let hostname = text.substring(hostnameStart, endingChar ? i : i + 1);
- hostnameFinished = true;
- result.push(escapeRegExp(punycode.toASCII(hostname)));
+ hostname = punycode.toASCII(
+ text.substring(hostnameStart, endingChar ? i : i + 1)
+ );
+ hostnameFinished = justHostname = true;
+ regexp.push(escapeRegExp(hostname));
if (!endingChar)
break;
}
@@ -107,32 +131,32 @@ function toRegExp(text)
switch (c)
{
case "*":
- if (result.length > 0 && i < lastIndex && text[i + 1] != "*")
- result.push(".*");
+ if (regexp.length > 0 && i < lastIndex && text[i + 1] != "*")
+ regexp.push(".*");
break;
case "^":
if (i < lastIndex)
- result.push(".");
+ regexp.push(".");
break;
case "|":
if (i == 0)
{
- result.push("^");
+ regexp.push("^");
break;
}
if (i == lastIndex)
{
- result.push("$");
+ regexp.push("$");
break;
}
if (i == 1 && text[0] == "|")
{
hostnameStart = i + 1;
canSafelyMatchAsLowercase = true;
- result.push("https?://");
+ regexp.push("https?://");
break;
}
- result.push("\\|");
+ regexp.push("\\|");
break;
case "/":
if (!hostnameFinished &&
@@ -141,44 +165,27 @@ function toRegExp(text)
hostnameStart = i + 1;
canSafelyMatchAsLowercase = true;
}
- result.push("/");
+ regexp.push("/");
break;
case ".": case "+": case "$": case "?":
case "{": case "}": case "(": case ")":
case "[": case "]": case "\\":
- result.push("\\", c);
+ regexp.push("\\", c);
break;
default:
if (hostnameFinished && (c >= "a" && c <= "z" ||
c >= "A" && c <= "Z"))
canSafelyMatchAsLowercase = false;
- result.push(c);
+ regexp.push(c);
}
}
- return {regexp: result.join(""),
- canSafelyMatchAsLowercase: canSafelyMatchAsLowercase};
-}
-
-function getRegExpTrigger(filter)
-{
- let result = toRegExp(filter.regexpSource);
-
- let trigger = {"url-filter": result.regexp};
-
- // Limit rules to to HTTP(S) URLs
- if (!/^(\^|http)/i.test(trigger["url-filter"]))
- trigger["url-filter"] = "^https?://.*" + trigger["url-filter"];
-
- // For rules containing only a hostname we know that we're matching against
- // a lowercase string unless the matchCase option was passed.
- if (result.canSafelyMatchAsLowercase && !filter.matchCase)
- trigger["url-filter"] = trigger["url-filter"].toLowerCase();
-
- if (result.canSafelyMatchAsLowercase || filter.matchCase)
- trigger["url-filter-is-case-sensitive"] = true;
-
- return trigger;
+ return {
+ regexp: regexp.join(""),
+ canSafelyMatchAsLowercase: canSafelyMatchAsLowercase,
+ hostname: hostname,
+ justHostname: justHostname
+ };
}
function getResourceTypes(filter)
@@ -223,9 +230,43 @@ function addDomainPrefix(domains)
return result;
}
-function convertFilter(filter, action, withResourceTypes)
+function convertFilterAddRules(rules, filter, action, withResourceTypes)
{
- let trigger = getRegExpTrigger(filter);
+ let parsed = parseFilterRegexpSource(filter.regexpSource);
+
+ // For the special case of $document whitelisting filters with just a domain
+ // we can generate an equivalent blocking rule exception using if-domain.
+ if (filter instanceof filterClasses.WhitelistFilter &&
+ filter.contentType & typeMap.DOCUMENT &&
+ parsed.justHostname)
+ {
+ rules.push({
+ trigger: {
+ "url-filter": ".*",
+ "if-domain": addDomainPrefix([parsed.hostname])
+ },
+ action: {type: "ignore-previous-rules"}
+ });
+ // If the filter contains other supported options we'll need to generate
+ // further rules for it, but if not we can simply return now.
+ if (!(filter.contentType | whitelistableRequestTypes))
+ return;
+ }
+
+ let trigger = {"url-filter": parsed.regexp};
+
+ // Limit rules to HTTP(S) URLs
+ if (!/^(\^|http)/i.test(trigger["url-filter"]))
+ trigger["url-filter"] = "^https?://.*" + trigger["url-filter"];
+
+ // For rules containing only a hostname we know that we're matching against
+ // a lowercase string unless the matchCase option was passed.
+ if (parsed.canSafelyMatchAsLowercase && !filter.matchCase)
+ trigger["url-filter"] = trigger["url-filter"].toLowerCase();
+
+ if (parsed.canSafelyMatchAsLowercase || filter.matchCase)
+ trigger["url-filter-is-case-sensitive"] = true;
+
let included = [];
let excluded = [];
@@ -241,7 +282,7 @@ function convertFilter(filter, action, withResourceTypes)
else if (excluded.length > 0)
trigger["unless-domain"] = addDomainPrefix(excluded);
- return {trigger: trigger, action: {type: action}};
+ rules.push({trigger: trigger, action: {type: action}});
}
function hasNonASCI(obj)
@@ -352,18 +393,7 @@ ContentBlockerList.prototype.addFilter = function(filter)
if (filter instanceof filterClasses.WhitelistFilter)
{
- if (filter.contentType & (typeMap.IMAGE
- | typeMap.STYLESHEET
- | typeMap.SCRIPT
- | typeMap.FONT
- | typeMap.MEDIA
- | typeMap.POPUP
- | typeMap.OBJECT
- | typeMap.OBJECT_SUBREQUEST
- | typeMap.XMLHTTPREQUEST
- | typeMap.PING
- | typeMap.SUBDOCUMENT
- | typeMap.OTHER))
+ if (filter.contentType & (typeMap.DOCUMENT | whitelistableRequestTypes))
this.requestExceptions.push(filter);
if (filter.contentType & typeMap.ELEMHIDE)
@@ -392,12 +422,6 @@ ContentBlockerList.prototype.generateRules = function(filter)
{
let rules = [];
- function addRule(rule)
- {
- if (!hasNonASCI(rule))
- rules.push(rule);
- }
-
let groupedElemhideFilters = new Map();
for (let filter of this.elemhideFilters)
{
@@ -426,7 +450,7 @@ ContentBlockerList.prototype.generateRules = function(filter)
// this by converting to the attribute format [id="elementID"]
selector = convertIDSelectorsToAttributeSelectors(selector);
- addRule({
+ rules.push({
trigger: {"url-filter": matchDomain,
"url-filter-is-case-sensitive": true},
action: {type: "css-display-none",
@@ -436,11 +460,11 @@ ContentBlockerList.prototype.generateRules = function(filter)
});
for (let filter of this.elemhideExceptions)
- addRule(convertFilter(filter, "ignore-previous-rules", false));
+ convertFilterAddRules(rules, filter, "ignore-previous-rules", false);
for (let filter of this.requestFilters)
- addRule(convertFilter(filter, "block", true));
+ convertFilterAddRules(rules, filter, "block", true);
for (let filter of this.requestExceptions)
- addRule(convertFilter(filter, "ignore-previous-rules", true));
+ convertFilterAddRules(rules, filter, "ignore-previous-rules", true);
- return rules;
+ return rules.filter(rule => !hasNonASCI(rule));
};
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld