 Issue 29336753:
  Issue 3671 - Split out contentBlockerList API  (Closed)
    
  
    Issue 29336753:
  Issue 3671 - Split out contentBlockerList API  (Closed) 
  | Index: lib/contentBlockerList.js | 
| diff --git a/lib/contentBlockerList.js b/lib/contentBlockerList.js | 
| new file mode 100644 | 
| index 0000000000000000000000000000000000000000..3ecca5f0b8f9f44abb803f9691f56a9ac8e8d56f | 
| --- /dev/null | 
| +++ b/lib/contentBlockerList.js | 
| @@ -0,0 +1,394 @@ | 
| +/* | 
| + * This file is part of Adblock Plus <https://adblockplus.org/>, | 
| + * Copyright (C) 2006-2016 Eyeo GmbH | 
| + * | 
| + * Adblock Plus is free software: you can redistribute it and/or modify | 
| + * it under the terms of the GNU General Public License version 3 as | 
| + * published by the Free Software Foundation. | 
| + * | 
| + * Adblock Plus is distributed in the hope that it will be useful, | 
| + * but WITHOUT ANY WARRANTY; without even the implied warranty of | 
| + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | 
| + * GNU General Public License for more details. | 
| + * | 
| + * You should have received a copy of the GNU General Public License | 
| + * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. | 
| + */ | 
| + | 
| +/** @module contentBlockerList */ | 
| + | 
| +"use strict"; | 
| + | 
| +let filterClasses = require("filterClasses"); | 
| +let getBaseDomain = require("urlHelpers").getBaseDomain; | 
| +let punycode = require("punycode"); | 
| + | 
| +const selectorLimit = 5000; | 
| +const typeMap = filterClasses.RegExpFilter.typeMap; | 
| + | 
| +function parseDomains(domains, included, excluded) | 
| +{ | 
| + for (let domain in domains) | 
| + { | 
| + if (domain != "") | 
| + { | 
| + let enabled = domains[domain]; | 
| + domain = punycode.toASCII(domain.toLowerCase()); | 
| + | 
| + if (!enabled) | 
| + excluded.push(domain); | 
| + else if (!domains[""]) | 
| + included.push(domain); | 
| + } | 
| + } | 
| +} | 
| + | 
| +function escapeRegExp(s) | 
| +{ | 
| + return s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); | 
| +} | 
| + | 
| +function matchDomain(domain) | 
| +{ | 
| + return "^https?://([^/:]*\\.)?" + escapeRegExp(domain) + "[/:]"; | 
| +} | 
| + | 
| +function convertElemHideFilter(filter, elemhideSelectorExceptions) | 
| +{ | 
| + let included = []; | 
| + let excluded = []; | 
| + let rules = []; | 
| + | 
| + parseDomains(filter.domains, included, excluded); | 
| + | 
| + if (excluded.length == 0 && !(filter.selector in elemhideSelectorExceptions)) | 
| + return {matchDomains: included.map(matchDomain), selector: filter.selector}; | 
| +} | 
| + | 
| +function toRegExp(text) | 
| +{ | 
| + let result = []; | 
| + let lastIndex = text.length - 1; | 
| + | 
| + for (let i = 0; i < text.length; i++) | 
| + { | 
| + let c = text[i]; | 
| + | 
| + switch (c) | 
| + { | 
| + case "*": | 
| + if (result.length > 0 && i < lastIndex && text[i + 1] != "*") | 
| + result.push(".*"); | 
| + break; | 
| + case "^": | 
| + if (i < lastIndex) | 
| + result.push("."); | 
| + break; | 
| + case "|": | 
| + if (i == 0) | 
| + { | 
| + result.push("^"); | 
| + break; | 
| + } | 
| + if (i == lastIndex) | 
| + { | 
| + result.push("$"); | 
| + break; | 
| + } | 
| + if (i == 1 && text[0] == "|") | 
| + { | 
| + result.push("https?://"); | 
| + break; | 
| + } | 
| + case ".": case "+": case "?": case "$": | 
| + case "{": case "}": case "(": case ")": | 
| + case "[": case "]": case "\\": | 
| + result.push("\\"); | 
| 
Sebastian Noack
2016/02/22 17:35:28
Nit: Perhaps we should change the code here to:
 
Sebastian Noack
2016/02/22 17:37:31
s/missing drop-through/missing break/
 
kzar
2016/02/22 18:09:29
Done.
 | 
| + default: | 
| + result.push(c); | 
| + } | 
| + } | 
| + | 
| + return result.join(""); | 
| +} | 
| + | 
| +function getRegExpSource(filter) | 
| +{ | 
| + let source = toRegExp(filter.regexpSource.replace( | 
| + // Safari expects punycode, filter lists use unicode | 
| + /^(\|\||\|?https?:\/\/)([\w\-.*\u0080-\uFFFF]+)/i, | 
| + function (match, prefix, domain) | 
| + { | 
| + return prefix + punycode.toASCII(domain); | 
| + } | 
| + )); | 
| + | 
| + // Limit rules to to HTTP(S) URLs | 
| + if (!/^(\^|http)/i.test(source)) | 
| + source = "^https?://.*" + source; | 
| + | 
| + return source; | 
| +} | 
| + | 
| +function getResourceTypes(filter) | 
| +{ | 
| + let types = []; | 
| + | 
| + if (filter.contentType & typeMap.IMAGE) | 
| + types.push("image"); | 
| + if (filter.contentType & typeMap.STYLESHEET) | 
| + types.push("style-sheet"); | 
| + if (filter.contentType & typeMap.SCRIPT) | 
| + types.push("script"); | 
| + if (filter.contentType & typeMap.FONT) | 
| + types.push("font"); | 
| + if (filter.contentType & (typeMap.MEDIA | typeMap.OBJECT)) | 
| + types.push("media"); | 
| + if (filter.contentType & typeMap.POPUP) | 
| + types.push("popup"); | 
| + if (filter.contentType & (typeMap.XMLHTTPREQUEST | | 
| + typeMap.OBJECT_SUBREQUEST | | 
| + typeMap.PING | typeMap.OTHER)) | 
| 
Sebastian Noack
2016/02/22 17:35:28
Nit: I think it reads slightly better, and looks m
 
kzar
2016/02/22 18:09:29
Done.
 | 
| + types.push("raw"); | 
| + if (filter.contentType & typeMap.SUBDOCUMENT) | 
| + types.push("document"); | 
| + | 
| + return types; | 
| +} | 
| + | 
| +function addDomainPrefix(domains) | 
| +{ | 
| + let result = []; | 
| + | 
| + for (let domain of domains) | 
| + { | 
| + result.push(domain); | 
| + | 
| + if (getBaseDomain(domain) == domain) | 
| + result.push("www." + domain); | 
| + } | 
| + | 
| + return result; | 
| +} | 
| + | 
| +function convertFilter(filter, action, withResourceTypes) | 
| +{ | 
| + let trigger = {"url-filter": getRegExpSource(filter)}; | 
| + let included = []; | 
| + let excluded = []; | 
| + | 
| + parseDomains(filter.domains, included, excluded); | 
| + | 
| + if (withResourceTypes) | 
| + trigger["resource-type"] = getResourceTypes(filter); | 
| + if (filter.thirdParty != null) | 
| + trigger["load-type"] = [filter.thirdParty ? "third-party" : "first-party"]; | 
| + | 
| + if (included.length > 0) | 
| + trigger["if-domain"] = addDomainPrefix(included); | 
| + else if (excluded.length > 0) | 
| + trigger["unless-domain"] = addDomainPrefix(excluded); | 
| + | 
| + return {trigger: trigger, action: {type: action}}; | 
| +} | 
| + | 
| +function hasNonASCI(obj) | 
| +{ | 
| + if (typeof obj == "string") | 
| + { | 
| + if (/[^\x00-\x7F]/.test(obj)) | 
| + return true; | 
| + } | 
| + | 
| + if (typeof obj == "object") | 
| + { | 
| + if (obj instanceof Array) | 
| + for (let item of obj) | 
| + if (hasNonASCI(item)) | 
| + return true; | 
| + | 
| + let names = Object.getOwnPropertyNames(obj); | 
| + for (let name of names) | 
| + if (hasNonASCI(obj[name])) | 
| + return true; | 
| + } | 
| + | 
| + return false; | 
| +} | 
| + | 
| +function convertIDSelectorsToAttributeSelectors(selector) | 
| +{ | 
| + // First we figure out where all the IDs are | 
| + let sep = ""; | 
| + let start = null; | 
| + let positions = []; | 
| + for (let i = 0; i < selector.length; i++) | 
| + { | 
| + let chr = selector[i]; | 
| + | 
| + if (chr == "\\") // ignore escaped characters | 
| + i++; | 
| + else if (chr == sep) // don't match IDs within quoted text | 
| + sep = ""; // e.g. [attr="#Hello"] | 
| + else if (sep == "") | 
| + { | 
| + if (chr == '"' || chr == "'") | 
| + sep = chr; | 
| + else if (start == null) // look for the start of an ID | 
| + { | 
| + if (chr == "#") | 
| + start = i; | 
| + } | 
| + else if (chr != "-" && chr != "_" && | 
| + (chr < "0" || | 
| + chr > "9" && chr < "A" || | 
| + chr > "Z" && chr < "a" || | 
| + chr > "z" && chr < "\x80")) // look for the end of the ID | 
| + { | 
| + positions.push({start: start, end: i}); | 
| + start = null; | 
| + } | 
| + } | 
| + } | 
| + if (start != null) | 
| + positions.push({start: start, end: selector.length}); | 
| + | 
| + // Now replace them all with the [id="someID"] form | 
| + let newSelector = []; | 
| + let i = 0; | 
| + for (let pos of positions) | 
| + { | 
| + newSelector.push(selector.substring(i, pos.start)); | 
| + newSelector.push('[id=', selector.substring(pos.start + 1, pos.end), ']'); | 
| + i = pos.end; | 
| + } | 
| + newSelector.push(selector.substring(i)); | 
| + | 
| + return newSelector.join(""); | 
| +} | 
| + | 
| +let ContentBlockerList = | 
| +/** | 
| + * Create a new Adblock Plus filter to content blocker list converter | 
| + * | 
| + * @constructor | 
| + */ | 
| +exports.ContentBlockerList = function () | 
| +{ | 
| + this.requestFilters = []; | 
| + this.requestExceptions = []; | 
| + this.elemhideFilters = []; | 
| + this.elemhideExceptions = []; | 
| + this.elemhideSelectorExceptions = new Map(); | 
| +}; | 
| + | 
| +ContentBlockerList.prototype = {}; | 
| 
Sebastian Noack
2016/02/22 17:35:28
There is no need to set the prototype to an empty
 
kzar
2016/02/22 18:09:29
Done.
 | 
| + | 
| +/** | 
| + * Add Adblock Plus filter to be converted | 
| + * | 
| + * @param {Filter} filter Filter to convert | 
| + */ | 
| +ContentBlockerList.prototype.addFilter = function(filter) | 
| +{ | 
| + if (filter.sitekeys) | 
| + return; | 
| + if (filter instanceof filterClasses.RegExpFilter && !filter.regexpSource) | 
| + return; | 
| + | 
| + if (filter instanceof filterClasses.BlockingFilter) | 
| + this.requestFilters.push(filter); | 
| + | 
| + if (filter instanceof filterClasses.WhitelistFilter) | 
| + { | 
| + if (filter.contentType & (typeMap.IMAGE | 
| + | typeMap.STYLESHEET | 
| + | typeMap.SCRIPT | 
| + | typeMap.FONT | 
| + | typeMap.MEDIA | 
| + | typeMap.POPUP | 
| + | typeMap.OBJECT | 
| + | typeMap.OBJECT_SUBREQUEST | 
| + | typeMap.XMLHTTPREQUEST | 
| + | typeMap.PING | 
| + | typeMap.SUBDOCUMENT | 
| + | typeMap.OTHER)) | 
| + this.requestExceptions.push(filter); | 
| + | 
| + if (filter.contentType & typeMap.ELEMHIDE) | 
| + this.elemhideExceptions.push(filter); | 
| + } | 
| + | 
| + if (filter instanceof filterClasses.ElemHideFilter) | 
| + this.elemhideFilters.push(filter); | 
| + | 
| + if (filter instanceof filterClasses.ElemHideException) | 
| + { | 
| + let domains = this.elemhideSelectorExceptions[filter.selector]; | 
| + if (!domains) | 
| + domains = this.elemhideSelectorExceptions[filter.selector] = []; | 
| + | 
| + parseDomains(filter.domains, domains, []); | 
| + } | 
| +}; | 
| + | 
| +/** | 
| + * Generate content blocker list for all filters that were added | 
| + * | 
| + * @returns {Filter} filter Filter to convert | 
| + */ | 
| +ContentBlockerList.prototype.generateRules = function(filter) | 
| +{ | 
| + let rules = []; | 
| + | 
| + function addRule(rule) | 
| + { | 
| + if (!hasNonASCI(rule)) | 
| + rules.push(rule); | 
| + } | 
| + | 
| + let groupedElemhideFilters = new Map(); | 
| + for (let filter of this.elemhideFilters) | 
| + { | 
| + let result = convertElemHideFilter(filter, this.elemhideSelectorExceptions); | 
| + if (!result) | 
| + continue; | 
| + | 
| + if (result.matchDomains.length == 0) | 
| + result.matchDomains = ["^https?://"]; | 
| + | 
| + for (let matchDomain of result.matchDomains) | 
| + { | 
| + let group = groupedElemhideFilters.get(matchDomain) || []; | 
| + group.push(result.selector); | 
| + groupedElemhideFilters.set(matchDomain, group); | 
| + } | 
| + } | 
| + | 
| + groupedElemhideFilters.forEach((selectors, matchDomain) => | 
| + { | 
| + while (selectors.length) | 
| + { | 
| + let selector = selectors.splice(0, selectorLimit).join(", "); | 
| + | 
| + // As of Safari 9.0 element IDs are matched as lowercase. We work around | 
| + // this by converting to the attribute format [id="elementID"] | 
| + selector = convertIDSelectorsToAttributeSelectors(selector); | 
| + | 
| + addRule({ | 
| + trigger: {"url-filter": matchDomain}, | 
| + action: {type: "css-display-none", | 
| + selector: selector} | 
| + }); | 
| + } | 
| + }); | 
| + | 
| + for (let filter of this.elemhideExceptions) | 
| + addRule(convertFilter(filter, "ignore-previous-rules", false)); | 
| + for (let filter of this.requestFilters) | 
| + addRule(convertFilter(filter, "block", true)); | 
| + for (let filter of this.requestExceptions) | 
| + addRule(convertFilter(filter, "ignore-previous-rules", true)); | 
| + | 
| + return rules; | 
| +}; |