lib/contentBlockerList.js - Issue 29336753: Issue 3671 - Split out contentBlockerList API

Unified Diff: lib/contentBlockerList.js

Issue 29336753: Issue 3671 - Split out contentBlockerList API (Closed)

Patch Set: Reworked API into ContentBlockerList class and addressed other feedback Created Feb. 22, 2016, 12:21 p.m.

Use n/p to move between diff chunks; N/P to move between comments.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: lib/contentBlockerList.js

diff --git a/lib/contentBlockerList.js b/lib/contentBlockerList.js

new file mode 100644

index 0000000000000000000000000000000000000000..3ecca5f0b8f9f44abb803f9691f56a9ac8e8d56f

--- /dev/null

+++ b/lib/contentBlockerList.js

@@ -0,0 +1,394 @@

+/*

+ * This file is part of Adblock Plus <https://adblockplus.org/>,

+ *

+ * Adblock Plus is free software: you can redistribute it and/or modify

+ * it under the terms of the GNU General Public License version 3 as

+ * published by the Free Software Foundation.

+ *

+ * Adblock Plus is distributed in the hope that it will be useful,

+ * but WITHOUT ANY WARRANTY; without even the implied warranty of

+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

+ * GNU General Public License for more details.

+ *

+ * You should have received a copy of the GNU General Public License

+ * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.

+ */

+/** @module contentBlockerList */

+"use strict";

+let filterClasses = require("filterClasses");

+let getBaseDomain = require("urlHelpers").getBaseDomain;

+let punycode = require("punycode");

+const selectorLimit = 5000;

+const typeMap = filterClasses.RegExpFilter.typeMap;

+function parseDomains(domains, included, excluded)

+ for (let domain in domains)

+ {

+ if (domain != "")

+ {

+ let enabled = domains[domain];

+ domain = punycode.toASCII(domain.toLowerCase());

+ if (!enabled)

+ excluded.push(domain);

+ else if (!domains[""])

+ included.push(domain);

+ }

+function escapeRegExp(s)

+ return s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");

+function matchDomain(domain)

+ return "^https?://([^/:]*\\.)?" + escapeRegExp(domain) + "[/:]";

+function convertElemHideFilter(filter, elemhideSelectorExceptions)

+ let included = [];

+ let excluded = [];

+ let rules = [];

+ parseDomains(filter.domains, included, excluded);

+ if (excluded.length == 0 && !(filter.selector in elemhideSelectorExceptions))

+ return {matchDomains: included.map(matchDomain), selector: filter.selector};

+function toRegExp(text)

+ let result = [];

+ let lastIndex = text.length - 1;

+ for (let i = 0; i < text.length; i++)

+ {

+ let c = text[i];

+ switch (c)

+ {

+ case "*":

+ if (result.length > 0 && i < lastIndex && text[i + 1] != "*")

+ result.push(".*");

+ break;

+ case "^":

+ if (i < lastIndex)

+ result.push(".");

+ break;

+ case "|":

+ if (i == 0)

+ {

+ result.push("^");

+ break;

+ }

+ if (i == lastIndex)

+ {

+ result.push("$");

+ break;

+ }

+ if (i == 1 && text[0] == "|")

+ {

+ result.push("https?://");

+ break;

+ }

+ case ".": case "+": case "?": case "$":

+ case "{": case "}": case "(": case ")":

+ case "[": case "]": case "\\":

+ result.push("\\");

Sebastian Noack 2016/02/22 17:35:28 Nit: Perhaps we should change the code here to:

Sebastian Noack 2016/02/22 17:37:31 s/missing drop-through/missing break/

kzar 2016/02/22 18:09:29 Done.

+ default:

+ result.push(c);

+ }

+ return result.join("");

+function getRegExpSource(filter)

+ let source = toRegExp(filter.regexpSource.replace(

+ // Safari expects punycode, filter lists use unicode

+ /^(\|\||\|?https?:\/\/)([\w\-.*\u0080-\uFFFF]+)/i,

+ function (match, prefix, domain)

+ {

+ return prefix + punycode.toASCII(domain);

+ }

+ ));

+ // Limit rules to to HTTP(S) URLs

+ if (!/^(\^|http)/i.test(source))

+ source = "^https?://.*" + source;

+ return source;

+function getResourceTypes(filter)

+ let types = [];

+ if (filter.contentType & typeMap.IMAGE)

+ types.push("image");

+ if (filter.contentType & typeMap.STYLESHEET)

+ types.push("style-sheet");

+ if (filter.contentType & typeMap.SCRIPT)

+ types.push("script");

+ if (filter.contentType & typeMap.FONT)

+ types.push("font");

+ if (filter.contentType & (typeMap.MEDIA | typeMap.OBJECT))

+ types.push("media");

+ if (filter.contentType & typeMap.POPUP)

+ types.push("popup");

+ if (filter.contentType & (typeMap.XMLHTTPREQUEST |

+ typeMap.OBJECT_SUBREQUEST |

+ typeMap.PING | typeMap.OTHER))

Sebastian Noack 2016/02/22 17:35:28 Nit: I think it reads slightly better, and looks m

kzar 2016/02/22 18:09:29 Done.

+ types.push("raw");

+ if (filter.contentType & typeMap.SUBDOCUMENT)

+ types.push("document");

+ return types;

+function addDomainPrefix(domains)

+ let result = [];

+ for (let domain of domains)

+ {

+ result.push(domain);

+ if (getBaseDomain(domain) == domain)

+ result.push("www." + domain);

+ }

+ return result;

+function convertFilter(filter, action, withResourceTypes)

+ let trigger = {"url-filter": getRegExpSource(filter)};

+ let included = [];

+ let excluded = [];

+ parseDomains(filter.domains, included, excluded);

+ if (withResourceTypes)

+ trigger["resource-type"] = getResourceTypes(filter);

+ if (filter.thirdParty != null)

+ trigger["load-type"] = [filter.thirdParty ? "third-party" : "first-party"];

+ if (included.length > 0)

+ trigger["if-domain"] = addDomainPrefix(included);

+ else if (excluded.length > 0)

+ trigger["unless-domain"] = addDomainPrefix(excluded);

+ return {trigger: trigger, action: {type: action}};

+function hasNonASCI(obj)

+ if (typeof obj == "string")

+ {

+ if (/[^\x00-\x7F]/.test(obj))

+ return true;

+ }

+ if (typeof obj == "object")

+ {

+ if (obj instanceof Array)

+ for (let item of obj)

+ if (hasNonASCI(item))

+ return true;

+ let names = Object.getOwnPropertyNames(obj);

+ for (let name of names)

+ if (hasNonASCI(obj[name]))

+ return true;

+ }

+ return false;

+function convertIDSelectorsToAttributeSelectors(selector)

+ // First we figure out where all the IDs are

+ let sep = "";

+ let start = null;

+ let positions = [];

+ for (let i = 0; i < selector.length; i++)

+ {

+ let chr = selector[i];

+ if (chr == "\\") // ignore escaped characters

+ i++;

+ else if (chr == sep) // don't match IDs within quoted text

+ sep = ""; // e.g. [attr="#Hello"]

+ else if (sep == "")

+ {

+ if (chr == '"' || chr == "'")

+ sep = chr;

+ else if (start == null) // look for the start of an ID

+ {

+ if (chr == "#")

+ start = i;

+ }

+ else if (chr != "-" && chr != "_" &&

+ (chr < "0" ||

+ chr > "9" && chr < "A" ||

+ chr > "Z" && chr < "a" ||

+ chr > "z" && chr < "\x80")) // look for the end of the ID

+ {

+ positions.push({start: start, end: i});

+ start = null;

+ }

+ if (start != null)

+ positions.push({start: start, end: selector.length});

+ // Now replace them all with the [id="someID"] form

+ let newSelector = [];

+ let i = 0;

+ for (let pos of positions)

+ {

+ newSelector.push(selector.substring(i, pos.start));

+ newSelector.push('[id=', selector.substring(pos.start + 1, pos.end), ']');

+ i = pos.end;

+ }

+ newSelector.push(selector.substring(i));

+ return newSelector.join("");

+let ContentBlockerList =

+/**

+ * Create a new Adblock Plus filter to content blocker list converter

+ *

+ * @constructor

+ */

+exports.ContentBlockerList = function ()

+ this.requestFilters = [];

+ this.requestExceptions = [];

+ this.elemhideFilters = [];

+ this.elemhideExceptions = [];

+ this.elemhideSelectorExceptions = new Map();

+};

+ContentBlockerList.prototype = {};

Sebastian Noack 2016/02/22 17:35:28 There is no need to set the prototype to an empty

kzar 2016/02/22 18:09:29 Done.

+/**

+ * Add Adblock Plus filter to be converted

+ *

+ * @param {Filter} filter Filter to convert

+ */

+ContentBlockerList.prototype.addFilter = function(filter)

+ if (filter.sitekeys)

+ return;

+ if (filter instanceof filterClasses.RegExpFilter && !filter.regexpSource)

+ return;

+ if (filter instanceof filterClasses.BlockingFilter)

+ this.requestFilters.push(filter);

+ if (filter instanceof filterClasses.WhitelistFilter)

+ {

+ if (filter.contentType & (typeMap.IMAGE

+ | typeMap.STYLESHEET

+ | typeMap.SCRIPT

+ | typeMap.FONT

+ | typeMap.MEDIA

+ | typeMap.POPUP

+ | typeMap.OBJECT

+ | typeMap.OBJECT_SUBREQUEST

+ | typeMap.XMLHTTPREQUEST

+ | typeMap.PING

+ | typeMap.SUBDOCUMENT

+ | typeMap.OTHER))

+ this.requestExceptions.push(filter);

+ if (filter.contentType & typeMap.ELEMHIDE)

+ this.elemhideExceptions.push(filter);

+ }

+ if (filter instanceof filterClasses.ElemHideFilter)

+ this.elemhideFilters.push(filter);

+ if (filter instanceof filterClasses.ElemHideException)

+ {

+ let domains = this.elemhideSelectorExceptions[filter.selector];

+ if (!domains)

+ domains = this.elemhideSelectorExceptions[filter.selector] = [];

+ parseDomains(filter.domains, domains, []);

+ }

+};

+/**

+ * Generate content blocker list for all filters that were added

+ *

+ * @returns {Filter} filter Filter to convert

+ */

+ContentBlockerList.prototype.generateRules = function(filter)

+ let rules = [];

+ function addRule(rule)

+ {

+ if (!hasNonASCI(rule))

+ rules.push(rule);

+ }

+ let groupedElemhideFilters = new Map();

+ for (let filter of this.elemhideFilters)

+ {

+ let result = convertElemHideFilter(filter, this.elemhideSelectorExceptions);

+ if (!result)

+ continue;

+ if (result.matchDomains.length == 0)

+ result.matchDomains = ["^https?://"];

+ for (let matchDomain of result.matchDomains)

+ {

+ let group = groupedElemhideFilters.get(matchDomain) || [];

+ group.push(result.selector);

+ groupedElemhideFilters.set(matchDomain, group);

+ }

+ groupedElemhideFilters.forEach((selectors, matchDomain) =>

+ {

+ while (selectors.length)

+ {

+ let selector = selectors.splice(0, selectorLimit).join(", ");

+ // As of Safari 9.0 element IDs are matched as lowercase. We work around

+ // this by converting to the attribute format [id="elementID"]

+ selector = convertIDSelectorsToAttributeSelectors(selector);

+ addRule({

+ trigger: {"url-filter": matchDomain},

+ action: {type: "css-display-none",

+ selector: selector}

+ });

+ }

+ });

+ for (let filter of this.elemhideExceptions)

+ addRule(convertFilter(filter, "ignore-previous-rules", false));

+ for (let filter of this.requestFilters)

+ addRule(convertFilter(filter, "block", true));

+ for (let filter of this.requestExceptions)

+ addRule(convertFilter(filter, "ignore-previous-rules", true));

+ return rules;

+};

« no previous file with comments | « adblockplus.js ('k') | node_modules/filterClasses.js » ('j') | no next file with comments »