| Index: lib/abp2blocklist.js |
| diff --git a/lib/abp2blocklist.js b/lib/abp2blocklist.js |
| index 1bece259e455539c7aebdbf220479425f7eab0e3..fb90da33463b453806cae4fa5497cb31cef9223c 100644 |
| --- a/lib/abp2blocklist.js |
| +++ b/lib/abp2blocklist.js |
| @@ -25,6 +25,18 @@ let punycode = require("punycode"); |
| const selectorLimit = 5000; |
| const typeMap = filterClasses.RegExpFilter.typeMap; |
| +const whitelistableRequestTypes = (typeMap.IMAGE |
| + | typeMap.STYLESHEET |
| + | typeMap.SCRIPT |
| + | typeMap.FONT |
| + | typeMap.MEDIA |
| + | typeMap.POPUP |
| + | typeMap.OBJECT |
| + | typeMap.OBJECT_SUBREQUEST |
| + | typeMap.XMLHTTPREQUEST |
| + | typeMap.PING |
| + | typeMap.SUBDOCUMENT |
| + | typeMap.OTHER); |
| function parseDomains(domains, included, excluded) |
| { |
| @@ -66,28 +78,38 @@ function convertElemHideFilter(filter, elemhideSelectorExceptions) |
| } |
| /** |
| - * Convert the given filter "regexpSource" string into a regular expression, |
| - * handling the conversion of unicode inside hostnames to punycode. |
| - * (Also deciding if the regular expression can be safely converted to and |
| - * matched as lower case or not.) |
| + * Parse the given filter "regexpSource" string. Producing a regular expression, |
| + * extracting the hostname (if any), deciding if the regular expression is safe |
| + * to be converted + matched as lower case and noting if the source contains |
| + * anything after the hostname.) |
| * |
| * @param {string} text regexpSource property of a filter |
| - * @returns {object} An object containing a regular expression string and a bool |
| + * @returns {object} An object containing a regular expression string, a bool |
| * indicating if the filter can be safely matched as lower |
| - * case: {regexp: "...", canSafelyMatchAsLowercase: true/false} |
| + * case, a hostname string (or undefined) and a bool |
| + * indicating if the source only contains a hostname or not: |
| + * {regexp: "...", |
| + * canSafelyMatchAsLowercase: true/false, |
| + * hostname: "...", |
| + * justHostname: true/false} |
| */ |
| -function toRegExp(text) |
| +function parseFilterRegexpSource(text) |
| { |
| - let result = []; |
| + let regexp = []; |
| let lastIndex = text.length - 1; |
| + let hostname; |
| let hostnameStart = null; |
| let hostnameFinished = false; |
| + let justHostname = false; |
| let canSafelyMatchAsLowercase = false; |
| for (let i = 0; i < text.length; i++) |
| { |
| let c = text[i]; |
| + if (hostnameFinished) |
| + justHostname = false; |
| + |
| // If we're currently inside the hostname we have to be careful not to |
| // escape any characters until after we have converted it to punycode. |
| if (hostnameStart != null && !hostnameFinished) |
| @@ -97,9 +119,11 @@ function toRegExp(text) |
| if (!endingChar && i != lastIndex) |
| continue; |
| - let hostname = text.substring(hostnameStart, endingChar ? i : i + 1); |
| - hostnameFinished = true; |
| - result.push(escapeRegExp(punycode.toASCII(hostname))); |
| + hostname = punycode.toASCII( |
| + text.substring(hostnameStart, endingChar ? i : i + 1) |
| + ); |
| + hostnameFinished = justHostname = true; |
| + regexp.push(escapeRegExp(hostname)); |
| if (!endingChar) |
| break; |
| } |
| @@ -107,32 +131,32 @@ function toRegExp(text) |
| switch (c) |
| { |
| case "*": |
| - if (result.length > 0 && i < lastIndex && text[i + 1] != "*") |
| - result.push(".*"); |
| + if (regexp.length > 0 && i < lastIndex && text[i + 1] != "*") |
| + regexp.push(".*"); |
| break; |
| case "^": |
| if (i < lastIndex) |
| - result.push("."); |
| + regexp.push("."); |
| break; |
| case "|": |
| if (i == 0) |
| { |
| - result.push("^"); |
| + regexp.push("^"); |
| break; |
| } |
| if (i == lastIndex) |
| { |
| - result.push("$"); |
| + regexp.push("$"); |
| break; |
| } |
| if (i == 1 && text[0] == "|") |
| { |
| hostnameStart = i + 1; |
| canSafelyMatchAsLowercase = true; |
| - result.push("https?://"); |
| + regexp.push("https?://"); |
| break; |
| } |
| - result.push("\\|"); |
| + regexp.push("\\|"); |
| break; |
| case "/": |
| if (!hostnameFinished && |
| @@ -141,44 +165,27 @@ function toRegExp(text) |
| hostnameStart = i + 1; |
| canSafelyMatchAsLowercase = true; |
| } |
| - result.push("/"); |
| + regexp.push("/"); |
| break; |
| case ".": case "+": case "$": case "?": |
| case "{": case "}": case "(": case ")": |
| case "[": case "]": case "\\": |
| - result.push("\\", c); |
| + regexp.push("\\", c); |
| break; |
| default: |
| if (hostnameFinished && (c >= "a" && c <= "z" || |
| c >= "A" && c <= "Z")) |
| canSafelyMatchAsLowercase = false; |
| - result.push(c); |
| + regexp.push(c); |
| } |
| } |
| - return {regexp: result.join(""), |
| - canSafelyMatchAsLowercase: canSafelyMatchAsLowercase}; |
| -} |
| - |
| -function getRegExpTrigger(filter) |
| -{ |
| - let result = toRegExp(filter.regexpSource); |
| - |
| - let trigger = {"url-filter": result.regexp}; |
| - |
| - // Limit rules to to HTTP(S) URLs |
| - if (!/^(\^|http)/i.test(trigger["url-filter"])) |
| - trigger["url-filter"] = "^https?://.*" + trigger["url-filter"]; |
| - |
| - // For rules containing only a hostname we know that we're matching against |
| - // a lowercase string unless the matchCase option was passed. |
| - if (result.canSafelyMatchAsLowercase && !filter.matchCase) |
| - trigger["url-filter"] = trigger["url-filter"].toLowerCase(); |
| - |
| - if (result.canSafelyMatchAsLowercase || filter.matchCase) |
| - trigger["url-filter-is-case-sensitive"] = true; |
| - |
| - return trigger; |
| + return { |
| + regexp: regexp.join(""), |
| + canSafelyMatchAsLowercase: canSafelyMatchAsLowercase, |
| + hostname: hostname, |
| + justHostname: justHostname |
| + }; |
| } |
| function getResourceTypes(filter) |
| @@ -223,9 +230,43 @@ function addDomainPrefix(domains) |
| return result; |
| } |
| -function convertFilter(filter, action, withResourceTypes) |
| +function convertFilterAddRules(rules, filter, action, withResourceTypes) |
| { |
| - let trigger = getRegExpTrigger(filter); |
| + let parsed = parseFilterRegexpSource(filter.regexpSource); |
| + |
| + // For the special case of $document whitelisting filters with just a domain |
| + // we can generate an equivalent blocking rule exception using if-domain. |
| + if (filter instanceof filterClasses.WhitelistFilter && |
| + filter.contentType & typeMap.DOCUMENT && |
| + parsed.justHostname) |
| + { |
| + rules.push({ |
| + trigger: { |
| + "url-filter": ".*", |
| + "if-domain": addDomainPrefix([parsed.hostname]) |
| + }, |
| + action: {type: "ignore-previous-rules"} |
| + }); |
| + // If the filter contains other supported options we'll need to generate |
| + // further rules for it, but if not we can simply return now. |
| + if (!(filter.contentType | whitelistableRequestTypes)) |
| + return; |
| + } |
| + |
| + let trigger = {"url-filter": parsed.regexp}; |
| + |
| + // Limit rules to HTTP(S) URLs |
| + if (!/^(\^|http)/i.test(trigger["url-filter"])) |
| + trigger["url-filter"] = "^https?://.*" + trigger["url-filter"]; |
| + |
| + // For rules containing only a hostname we know that we're matching against |
| + // a lowercase string unless the matchCase option was passed. |
| + if (parsed.canSafelyMatchAsLowercase && !filter.matchCase) |
| + trigger["url-filter"] = trigger["url-filter"].toLowerCase(); |
| + |
| + if (parsed.canSafelyMatchAsLowercase || filter.matchCase) |
| + trigger["url-filter-is-case-sensitive"] = true; |
| + |
| let included = []; |
| let excluded = []; |
| @@ -241,7 +282,7 @@ function convertFilter(filter, action, withResourceTypes) |
| else if (excluded.length > 0) |
| trigger["unless-domain"] = addDomainPrefix(excluded); |
| - return {trigger: trigger, action: {type: action}}; |
| + rules.push({trigger: trigger, action: {type: action}}); |
| } |
| function hasNonASCI(obj) |
| @@ -352,18 +393,7 @@ ContentBlockerList.prototype.addFilter = function(filter) |
| if (filter instanceof filterClasses.WhitelistFilter) |
| { |
| - if (filter.contentType & (typeMap.IMAGE |
| - | typeMap.STYLESHEET |
| - | typeMap.SCRIPT |
| - | typeMap.FONT |
| - | typeMap.MEDIA |
| - | typeMap.POPUP |
| - | typeMap.OBJECT |
| - | typeMap.OBJECT_SUBREQUEST |
| - | typeMap.XMLHTTPREQUEST |
| - | typeMap.PING |
| - | typeMap.SUBDOCUMENT |
| - | typeMap.OTHER)) |
| + if (filter.contentType & (typeMap.DOCUMENT | whitelistableRequestTypes)) |
| this.requestExceptions.push(filter); |
| if (filter.contentType & typeMap.ELEMHIDE) |
| @@ -392,12 +422,6 @@ ContentBlockerList.prototype.generateRules = function(filter) |
| { |
| let rules = []; |
| - function addRule(rule) |
| - { |
| - if (!hasNonASCI(rule)) |
| - rules.push(rule); |
| - } |
| - |
| let groupedElemhideFilters = new Map(); |
| for (let filter of this.elemhideFilters) |
| { |
| @@ -426,7 +450,7 @@ ContentBlockerList.prototype.generateRules = function(filter) |
| // this by converting to the attribute format [id="elementID"] |
| selector = convertIDSelectorsToAttributeSelectors(selector); |
| - addRule({ |
| + rules.push({ |
| trigger: {"url-filter": matchDomain, |
| "url-filter-is-case-sensitive": true}, |
| action: {type: "css-display-none", |
| @@ -436,11 +460,11 @@ ContentBlockerList.prototype.generateRules = function(filter) |
| }); |
| for (let filter of this.elemhideExceptions) |
| - addRule(convertFilter(filter, "ignore-previous-rules", false)); |
| + convertFilterAddRules(rules, filter, "ignore-previous-rules", false); |
| for (let filter of this.requestFilters) |
| - addRule(convertFilter(filter, "block", true)); |
| + convertFilterAddRules(rules, filter, "block", true); |
| for (let filter of this.requestExceptions) |
| - addRule(convertFilter(filter, "ignore-previous-rules", true)); |
| + convertFilterAddRules(rules, filter, "ignore-previous-rules", true); |
| - return rules; |
| + return rules.filter(rule => !hasNonASCI(rule)); |
| }; |