lib/abp2blocklist.js - Issue 29340694: Issue 3956 - Convert domain whitelisting filters

Unified Diff: lib/abp2blocklist.js

Issue 29340694: Issue 3956 - Convert domain whitelisting filters (Closed)

Patch Set: Fix whitelisting request type logic Created May 17, 2016, 11:22 a.m.

Use n/p to move between diff chunks; N/P to move between comments.

Jump to:

View side-by-side diff with in-line comments

Index: lib/abp2blocklist.js

diff --git a/lib/abp2blocklist.js b/lib/abp2blocklist.js

index 1bece259e455539c7aebdbf220479425f7eab0e3..fb90da33463b453806cae4fa5497cb31cef9223c 100644

--- a/lib/abp2blocklist.js

+++ b/lib/abp2blocklist.js

@@ -25,6 +25,18 @@ let punycode = require("punycode");

const selectorLimit = 5000;

const typeMap = filterClasses.RegExpFilter.typeMap;

+const whitelistableRequestTypes = (typeMap.IMAGE

+ | typeMap.STYLESHEET

+ | typeMap.SCRIPT

+ | typeMap.FONT

+ | typeMap.MEDIA

+ | typeMap.POPUP

+ | typeMap.OBJECT

+ | typeMap.OBJECT_SUBREQUEST

+ | typeMap.XMLHTTPREQUEST

+ | typeMap.PING

+ | typeMap.SUBDOCUMENT

+ | typeMap.OTHER);

function parseDomains(domains, included, excluded)

{

@@ -66,28 +78,38 @@ function convertElemHideFilter(filter, elemhideSelectorExceptions)

}

/**

- * Convert the given filter "regexpSource" string into a regular expression,

- * handling the conversion of unicode inside hostnames to punycode.

- * (Also deciding if the regular expression can be safely converted to and

- * matched as lower case or not.)

+ * Parse the given filter "regexpSource" string. Producing a regular expression,

+ * extracting the hostname (if any), deciding if the regular expression is safe

+ * to be converted + matched as lower case and noting if the source contains

+ * anything after the hostname.)

* @param {string} text regexpSource property of a filter

- * @returns {object} An object containing a regular expression string and a bool

+ * @returns {object} An object containing a regular expression string, a bool

* indicating if the filter can be safely matched as lower

- * case: {regexp: "...", canSafelyMatchAsLowercase: true/false}

+ * case, a hostname string (or undefined) and a bool

+ * indicating if the source only contains a hostname or not:

+ * {regexp: "...",

+ * canSafelyMatchAsLowercase: true/false,

+ * hostname: "...",

+ * justHostname: true/false}

-function toRegExp(text)

+function parseFilterRegexpSource(text)

{

- let result = [];

+ let regexp = [];

let lastIndex = text.length - 1;

+ let hostname;

let hostnameStart = null;

let hostnameFinished = false;

+ let justHostname = false;

let canSafelyMatchAsLowercase = false;

for (let i = 0; i < text.length; i++)

{

let c = text[i];

+ if (hostnameFinished)

+ justHostname = false;

// If we're currently inside the hostname we have to be careful not to

// escape any characters until after we have converted it to punycode.

if (hostnameStart != null && !hostnameFinished)

@@ -97,9 +119,11 @@ function toRegExp(text)

if (!endingChar && i != lastIndex)

continue;

- let hostname = text.substring(hostnameStart, endingChar ? i : i + 1);

- hostnameFinished = true;

- result.push(escapeRegExp(punycode.toASCII(hostname)));

+ hostname = punycode.toASCII(

+ text.substring(hostnameStart, endingChar ? i : i + 1)

+ );

+ hostnameFinished = justHostname = true;

+ regexp.push(escapeRegExp(hostname));

if (!endingChar)

break;

}

@@ -107,32 +131,32 @@ function toRegExp(text)

switch (c)

{

case "*":

- if (result.length > 0 && i < lastIndex && text[i + 1] != "*")

- result.push(".*");

+ if (regexp.length > 0 && i < lastIndex && text[i + 1] != "*")

+ regexp.push(".*");

break;

case "^":

if (i < lastIndex)

- result.push(".");

+ regexp.push(".");

break;

case "|":

if (i == 0)

{

- result.push("^");

+ regexp.push("^");

break;

}

if (i == lastIndex)

{

- result.push("$");

+ regexp.push("$");

break;

}

if (i == 1 && text[0] == "|")

{

hostnameStart = i + 1;

canSafelyMatchAsLowercase = true;

- result.push("https?://");

+ regexp.push("https?://");

break;

}

- result.push("\\|");

+ regexp.push("\\|");

break;

case "/":

if (!hostnameFinished &&

@@ -141,44 +165,27 @@ function toRegExp(text)

hostnameStart = i + 1;

canSafelyMatchAsLowercase = true;

}

- result.push("/");

+ regexp.push("/");

break;

case ".": case "+": case "$": case "?":

case "{": case "}": case "(": case ")":

case "[": case "]": case "\\":

- result.push("\\", c);

+ regexp.push("\\", c);

break;

default:

if (hostnameFinished && (c >= "a" && c <= "z" ||

c >= "A" && c <= "Z"))

canSafelyMatchAsLowercase = false;

- result.push(c);

+ regexp.push(c);

}

- return {regexp: result.join(""),

- canSafelyMatchAsLowercase: canSafelyMatchAsLowercase};

-function getRegExpTrigger(filter)

- let result = toRegExp(filter.regexpSource);

- let trigger = {"url-filter": result.regexp};

- // Limit rules to to HTTP(S) URLs

- if (!/^(\^|http)/i.test(trigger["url-filter"]))

- trigger["url-filter"] = "^https?://.*" + trigger["url-filter"];

- // For rules containing only a hostname we know that we're matching against

- // a lowercase string unless the matchCase option was passed.

- if (result.canSafelyMatchAsLowercase && !filter.matchCase)

- trigger["url-filter"] = trigger["url-filter"].toLowerCase();

- if (result.canSafelyMatchAsLowercase || filter.matchCase)

- trigger["url-filter-is-case-sensitive"] = true;

- return trigger;

+ return {

+ regexp: regexp.join(""),

+ canSafelyMatchAsLowercase: canSafelyMatchAsLowercase,

+ hostname: hostname,

+ justHostname: justHostname

+ };

}

function getResourceTypes(filter)

@@ -223,9 +230,43 @@ function addDomainPrefix(domains)

return result;

}

-function convertFilter(filter, action, withResourceTypes)

+function convertFilterAddRules(rules, filter, action, withResourceTypes)

{

- let trigger = getRegExpTrigger(filter);

+ let parsed = parseFilterRegexpSource(filter.regexpSource);

+ // For the special case of $document whitelisting filters with just a domain

+ // we can generate an equivalent blocking rule exception using if-domain.

+ if (filter instanceof filterClasses.WhitelistFilter &&

+ filter.contentType & typeMap.DOCUMENT &&

+ parsed.justHostname)

+ {

+ rules.push({

+ trigger: {

+ "url-filter": ".*",

+ "if-domain": addDomainPrefix([parsed.hostname])

+ },

+ action: {type: "ignore-previous-rules"}

+ });

+ // If the filter contains other supported options we'll need to generate

+ // further rules for it, but if not we can simply return now.

+ if (!(filter.contentType | whitelistableRequestTypes))

+ return;

+ }

+ let trigger = {"url-filter": parsed.regexp};

+ // Limit rules to HTTP(S) URLs

+ if (!/^(\^|http)/i.test(trigger["url-filter"]))

+ trigger["url-filter"] = "^https?://.*" + trigger["url-filter"];

+ // For rules containing only a hostname we know that we're matching against

+ // a lowercase string unless the matchCase option was passed.

+ if (parsed.canSafelyMatchAsLowercase && !filter.matchCase)

+ trigger["url-filter"] = trigger["url-filter"].toLowerCase();

+ if (parsed.canSafelyMatchAsLowercase || filter.matchCase)

+ trigger["url-filter-is-case-sensitive"] = true;

let included = [];

let excluded = [];

@@ -241,7 +282,7 @@ function convertFilter(filter, action, withResourceTypes)

else if (excluded.length > 0)

trigger["unless-domain"] = addDomainPrefix(excluded);

- return {trigger: trigger, action: {type: action}};

+ rules.push({trigger: trigger, action: {type: action}});

}

function hasNonASCI(obj)

@@ -352,18 +393,7 @@ ContentBlockerList.prototype.addFilter = function(filter)

if (filter instanceof filterClasses.WhitelistFilter)

{

- if (filter.contentType & (typeMap.IMAGE

- | typeMap.STYLESHEET

- | typeMap.SCRIPT

- | typeMap.FONT

- | typeMap.MEDIA

- | typeMap.POPUP

- | typeMap.OBJECT

- | typeMap.OBJECT_SUBREQUEST

- | typeMap.XMLHTTPREQUEST

- | typeMap.PING

- | typeMap.SUBDOCUMENT

- | typeMap.OTHER))

+ if (filter.contentType & (typeMap.DOCUMENT | whitelistableRequestTypes))

this.requestExceptions.push(filter);

if (filter.contentType & typeMap.ELEMHIDE)

@@ -392,12 +422,6 @@ ContentBlockerList.prototype.generateRules = function(filter)

{

let rules = [];

- function addRule(rule)

- {

- if (!hasNonASCI(rule))

- rules.push(rule);

- }

let groupedElemhideFilters = new Map();

for (let filter of this.elemhideFilters)

{

@@ -426,7 +450,7 @@ ContentBlockerList.prototype.generateRules = function(filter)

// this by converting to the attribute format [id="elementID"]

selector = convertIDSelectorsToAttributeSelectors(selector);

- addRule({

+ rules.push({

trigger: {"url-filter": matchDomain,

"url-filter-is-case-sensitive": true},

action: {type: "css-display-none",

@@ -436,11 +460,11 @@ ContentBlockerList.prototype.generateRules = function(filter)

});

for (let filter of this.elemhideExceptions)

- addRule(convertFilter(filter, "ignore-previous-rules", false));

+ convertFilterAddRules(rules, filter, "ignore-previous-rules", false);

for (let filter of this.requestFilters)

- addRule(convertFilter(filter, "block", true));

+ convertFilterAddRules(rules, filter, "block", true);

for (let filter of this.requestExceptions)

- addRule(convertFilter(filter, "ignore-previous-rules", true));

+ convertFilterAddRules(rules, filter, "ignore-previous-rules", true);

- return rules;

+ return rules.filter(rule => !hasNonASCI(rule));

};

« no previous file with comments | « no previous file | no next file » | no next file with comments »