Index: lib/contentBlockerList.js |
diff --git a/lib/contentBlockerList.js b/lib/contentBlockerList.js |
new file mode 100644 |
index 0000000000000000000000000000000000000000..3ecca5f0b8f9f44abb803f9691f56a9ac8e8d56f |
--- /dev/null |
+++ b/lib/contentBlockerList.js |
@@ -0,0 +1,394 @@ |
+/* |
+ * This file is part of Adblock Plus <https://adblockplus.org/>, |
+ * Copyright (C) 2006-2016 Eyeo GmbH |
+ * |
+ * Adblock Plus is free software: you can redistribute it and/or modify |
+ * it under the terms of the GNU General Public License version 3 as |
+ * published by the Free Software Foundation. |
+ * |
+ * Adblock Plus is distributed in the hope that it will be useful, |
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of |
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
+ * GNU General Public License for more details. |
+ * |
+ * You should have received a copy of the GNU General Public License |
+ * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. |
+ */ |
+ |
+/** @module contentBlockerList */ |
+ |
+"use strict"; |
+ |
+let filterClasses = require("filterClasses"); |
+let getBaseDomain = require("urlHelpers").getBaseDomain; |
+let punycode = require("punycode"); |
+ |
+const selectorLimit = 5000; |
+const typeMap = filterClasses.RegExpFilter.typeMap; |
+ |
+function parseDomains(domains, included, excluded) |
+{ |
+ for (let domain in domains) |
+ { |
+ if (domain != "") |
+ { |
+ let enabled = domains[domain]; |
+ domain = punycode.toASCII(domain.toLowerCase()); |
+ |
+ if (!enabled) |
+ excluded.push(domain); |
+ else if (!domains[""]) |
+ included.push(domain); |
+ } |
+ } |
+} |
+ |
+function escapeRegExp(s) |
+{ |
+ return s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); |
+} |
+ |
+function matchDomain(domain) |
+{ |
+ return "^https?://([^/:]*\\.)?" + escapeRegExp(domain) + "[/:]"; |
+} |
+ |
+function convertElemHideFilter(filter, elemhideSelectorExceptions) |
+{ |
+ let included = []; |
+ let excluded = []; |
+ let rules = []; |
+ |
+ parseDomains(filter.domains, included, excluded); |
+ |
+ if (excluded.length == 0 && !(filter.selector in elemhideSelectorExceptions)) |
+ return {matchDomains: included.map(matchDomain), selector: filter.selector}; |
+} |
+ |
+function toRegExp(text) |
+{ |
+ let result = []; |
+ let lastIndex = text.length - 1; |
+ |
+ for (let i = 0; i < text.length; i++) |
+ { |
+ let c = text[i]; |
+ |
+ switch (c) |
+ { |
+ case "*": |
+ if (result.length > 0 && i < lastIndex && text[i + 1] != "*") |
+ result.push(".*"); |
+ break; |
+ case "^": |
+ if (i < lastIndex) |
+ result.push("."); |
+ break; |
+ case "|": |
+ if (i == 0) |
+ { |
+ result.push("^"); |
+ break; |
+ } |
+ if (i == lastIndex) |
+ { |
+ result.push("$"); |
+ break; |
+ } |
+ if (i == 1 && text[0] == "|") |
+ { |
+ result.push("https?://"); |
+ break; |
+ } |
+ case ".": case "+": case "?": case "$": |
+ case "{": case "}": case "(": case ")": |
+ case "[": case "]": case "\\": |
+ result.push("\\"); |
Sebastian Noack
2016/02/22 17:35:28
Nit: Perhaps we should change the code here to:
Sebastian Noack
2016/02/22 17:37:31
s/missing drop-through/missing break/
kzar
2016/02/22 18:09:29
Done.
|
+ default: |
+ result.push(c); |
+ } |
+ } |
+ |
+ return result.join(""); |
+} |
+ |
+function getRegExpSource(filter) |
+{ |
+ let source = toRegExp(filter.regexpSource.replace( |
+ // Safari expects punycode, filter lists use unicode |
+ /^(\|\||\|?https?:\/\/)([\w\-.*\u0080-\uFFFF]+)/i, |
+ function (match, prefix, domain) |
+ { |
+ return prefix + punycode.toASCII(domain); |
+ } |
+ )); |
+ |
+ // Limit rules to to HTTP(S) URLs |
+ if (!/^(\^|http)/i.test(source)) |
+ source = "^https?://.*" + source; |
+ |
+ return source; |
+} |
+ |
+function getResourceTypes(filter) |
+{ |
+ let types = []; |
+ |
+ if (filter.contentType & typeMap.IMAGE) |
+ types.push("image"); |
+ if (filter.contentType & typeMap.STYLESHEET) |
+ types.push("style-sheet"); |
+ if (filter.contentType & typeMap.SCRIPT) |
+ types.push("script"); |
+ if (filter.contentType & typeMap.FONT) |
+ types.push("font"); |
+ if (filter.contentType & (typeMap.MEDIA | typeMap.OBJECT)) |
+ types.push("media"); |
+ if (filter.contentType & typeMap.POPUP) |
+ types.push("popup"); |
+ if (filter.contentType & (typeMap.XMLHTTPREQUEST | |
+ typeMap.OBJECT_SUBREQUEST | |
+ typeMap.PING | typeMap.OTHER)) |
Sebastian Noack
2016/02/22 17:35:28
Nit: I think it reads slightly better, and looks m
kzar
2016/02/22 18:09:29
Done.
|
+ types.push("raw"); |
+ if (filter.contentType & typeMap.SUBDOCUMENT) |
+ types.push("document"); |
+ |
+ return types; |
+} |
+ |
+function addDomainPrefix(domains) |
+{ |
+ let result = []; |
+ |
+ for (let domain of domains) |
+ { |
+ result.push(domain); |
+ |
+ if (getBaseDomain(domain) == domain) |
+ result.push("www." + domain); |
+ } |
+ |
+ return result; |
+} |
+ |
+function convertFilter(filter, action, withResourceTypes) |
+{ |
+ let trigger = {"url-filter": getRegExpSource(filter)}; |
+ let included = []; |
+ let excluded = []; |
+ |
+ parseDomains(filter.domains, included, excluded); |
+ |
+ if (withResourceTypes) |
+ trigger["resource-type"] = getResourceTypes(filter); |
+ if (filter.thirdParty != null) |
+ trigger["load-type"] = [filter.thirdParty ? "third-party" : "first-party"]; |
+ |
+ if (included.length > 0) |
+ trigger["if-domain"] = addDomainPrefix(included); |
+ else if (excluded.length > 0) |
+ trigger["unless-domain"] = addDomainPrefix(excluded); |
+ |
+ return {trigger: trigger, action: {type: action}}; |
+} |
+ |
+function hasNonASCI(obj) |
+{ |
+ if (typeof obj == "string") |
+ { |
+ if (/[^\x00-\x7F]/.test(obj)) |
+ return true; |
+ } |
+ |
+ if (typeof obj == "object") |
+ { |
+ if (obj instanceof Array) |
+ for (let item of obj) |
+ if (hasNonASCI(item)) |
+ return true; |
+ |
+ let names = Object.getOwnPropertyNames(obj); |
+ for (let name of names) |
+ if (hasNonASCI(obj[name])) |
+ return true; |
+ } |
+ |
+ return false; |
+} |
+ |
+function convertIDSelectorsToAttributeSelectors(selector) |
+{ |
+ // First we figure out where all the IDs are |
+ let sep = ""; |
+ let start = null; |
+ let positions = []; |
+ for (let i = 0; i < selector.length; i++) |
+ { |
+ let chr = selector[i]; |
+ |
+ if (chr == "\\") // ignore escaped characters |
+ i++; |
+ else if (chr == sep) // don't match IDs within quoted text |
+ sep = ""; // e.g. [attr="#Hello"] |
+ else if (sep == "") |
+ { |
+ if (chr == '"' || chr == "'") |
+ sep = chr; |
+ else if (start == null) // look for the start of an ID |
+ { |
+ if (chr == "#") |
+ start = i; |
+ } |
+ else if (chr != "-" && chr != "_" && |
+ (chr < "0" || |
+ chr > "9" && chr < "A" || |
+ chr > "Z" && chr < "a" || |
+ chr > "z" && chr < "\x80")) // look for the end of the ID |
+ { |
+ positions.push({start: start, end: i}); |
+ start = null; |
+ } |
+ } |
+ } |
+ if (start != null) |
+ positions.push({start: start, end: selector.length}); |
+ |
+ // Now replace them all with the [id="someID"] form |
+ let newSelector = []; |
+ let i = 0; |
+ for (let pos of positions) |
+ { |
+ newSelector.push(selector.substring(i, pos.start)); |
+ newSelector.push('[id=', selector.substring(pos.start + 1, pos.end), ']'); |
+ i = pos.end; |
+ } |
+ newSelector.push(selector.substring(i)); |
+ |
+ return newSelector.join(""); |
+} |
+ |
+let ContentBlockerList = |
+/** |
+ * Create a new Adblock Plus filter to content blocker list converter |
+ * |
+ * @constructor |
+ */ |
+exports.ContentBlockerList = function () |
+{ |
+ this.requestFilters = []; |
+ this.requestExceptions = []; |
+ this.elemhideFilters = []; |
+ this.elemhideExceptions = []; |
+ this.elemhideSelectorExceptions = new Map(); |
+}; |
+ |
+ContentBlockerList.prototype = {}; |
Sebastian Noack
2016/02/22 17:35:28
There is no need to set the prototype to an empty
kzar
2016/02/22 18:09:29
Done.
|
+ |
+/** |
+ * Add Adblock Plus filter to be converted |
+ * |
+ * @param {Filter} filter Filter to convert |
+ */ |
+ContentBlockerList.prototype.addFilter = function(filter) |
+{ |
+ if (filter.sitekeys) |
+ return; |
+ if (filter instanceof filterClasses.RegExpFilter && !filter.regexpSource) |
+ return; |
+ |
+ if (filter instanceof filterClasses.BlockingFilter) |
+ this.requestFilters.push(filter); |
+ |
+ if (filter instanceof filterClasses.WhitelistFilter) |
+ { |
+ if (filter.contentType & (typeMap.IMAGE |
+ | typeMap.STYLESHEET |
+ | typeMap.SCRIPT |
+ | typeMap.FONT |
+ | typeMap.MEDIA |
+ | typeMap.POPUP |
+ | typeMap.OBJECT |
+ | typeMap.OBJECT_SUBREQUEST |
+ | typeMap.XMLHTTPREQUEST |
+ | typeMap.PING |
+ | typeMap.SUBDOCUMENT |
+ | typeMap.OTHER)) |
+ this.requestExceptions.push(filter); |
+ |
+ if (filter.contentType & typeMap.ELEMHIDE) |
+ this.elemhideExceptions.push(filter); |
+ } |
+ |
+ if (filter instanceof filterClasses.ElemHideFilter) |
+ this.elemhideFilters.push(filter); |
+ |
+ if (filter instanceof filterClasses.ElemHideException) |
+ { |
+ let domains = this.elemhideSelectorExceptions[filter.selector]; |
+ if (!domains) |
+ domains = this.elemhideSelectorExceptions[filter.selector] = []; |
+ |
+ parseDomains(filter.domains, domains, []); |
+ } |
+}; |
+ |
+/** |
+ * Generate content blocker list for all filters that were added |
+ * |
+ * @returns {Filter} filter Filter to convert |
+ */ |
+ContentBlockerList.prototype.generateRules = function(filter) |
+{ |
+ let rules = []; |
+ |
+ function addRule(rule) |
+ { |
+ if (!hasNonASCI(rule)) |
+ rules.push(rule); |
+ } |
+ |
+ let groupedElemhideFilters = new Map(); |
+ for (let filter of this.elemhideFilters) |
+ { |
+ let result = convertElemHideFilter(filter, this.elemhideSelectorExceptions); |
+ if (!result) |
+ continue; |
+ |
+ if (result.matchDomains.length == 0) |
+ result.matchDomains = ["^https?://"]; |
+ |
+ for (let matchDomain of result.matchDomains) |
+ { |
+ let group = groupedElemhideFilters.get(matchDomain) || []; |
+ group.push(result.selector); |
+ groupedElemhideFilters.set(matchDomain, group); |
+ } |
+ } |
+ |
+ groupedElemhideFilters.forEach((selectors, matchDomain) => |
+ { |
+ while (selectors.length) |
+ { |
+ let selector = selectors.splice(0, selectorLimit).join(", "); |
+ |
+ // As of Safari 9.0 element IDs are matched as lowercase. We work around |
+ // this by converting to the attribute format [id="elementID"] |
+ selector = convertIDSelectorsToAttributeSelectors(selector); |
+ |
+ addRule({ |
+ trigger: {"url-filter": matchDomain}, |
+ action: {type: "css-display-none", |
+ selector: selector} |
+ }); |
+ } |
+ }); |
+ |
+ for (let filter of this.elemhideExceptions) |
+ addRule(convertFilter(filter, "ignore-previous-rules", false)); |
+ for (let filter of this.requestFilters) |
+ addRule(convertFilter(filter, "block", true)); |
+ for (let filter of this.requestExceptions) |
+ addRule(convertFilter(filter, "ignore-previous-rules", true)); |
+ |
+ return rules; |
+}; |