Index: lib/domain.js |
=================================================================== |
new file mode 100644 |
--- /dev/null |
+++ b/lib/domain.js |
@@ -0,0 +1,162 @@ |
+/* |
+ * This file is part of Adblock Plus <https://adblockplus.org/>, |
+ * Copyright (C) 2006-2017 eyeo GmbH |
+ * |
+ * Adblock Plus is free software: you can redistribute it and/or modify |
+ * it under the terms of the GNU General Public License version 3 as |
+ * published by the Free Software Foundation. |
+ * |
+ * Adblock Plus is distributed in the hope that it will be useful, |
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of |
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
+ * GNU General Public License for more details. |
+ * |
+ * You should have received a copy of the GNU General Public License |
+ * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. |
+ */ |
+ |
+/** @module domain */ |
+ |
+// This is a mostly correct and relatively efficient implementation of the |
+// Public Suffix List specification available at https://publicsuffix.org/list/ |
+ |
+"use strict"; |
+ |
+const fs = require("fs"); |
+const path = require("path"); |
+ |
+const punycode = require("punycode"); |
+ |
+let initialized = false; |
+ |
+let publicSuffixTree = null; |
+ |
+function canonicalizeLabel(label) |
+{ |
+ if (label != "*") |
+ { |
+ let exception = label.charAt(0) == "!"; |
+ let name = exception ? label.substring(1) : label; |
+ |
+ // Lower-case and Punycode the name. |
+ name = punycode.toASCII(name.toLowerCase()); |
+ |
+ label = (exception ? "!" : "") + name; |
+ } |
+ |
+ return label; |
+} |
+ |
+function registerLabels(labels, node) |
+{ |
+ let currentLabel = labels.pop(); |
+ |
+ currentLabel = canonicalizeLabel(currentLabel); |
+ |
+ let childNode = node.get(currentLabel); |
+ if (!childNode) |
+ node.set(currentLabel, childNode = new Map()); |
+ |
+ if (labels.length > 0) |
+ registerLabels(labels, childNode); |
+} |
+ |
+function addPublicSuffixRule(text) |
+{ |
+ // Add rule to the public suffix tree. |
+ registerLabels(text.split("."), publicSuffixTree); |
+} |
+ |
+function loadPublicSuffixRules() |
+{ |
+ let content = fs.readFileSync(path.join(__dirname, "..", |
+ "public_suffix_list.dat"), |
+ "utf8"); |
+ |
+ let nonBlank = new RegExp(/[^\s]/); |
+ let comment = new RegExp(/^\s*\/\//); |
+ |
+ publicSuffixTree = new Map(); |
+ |
+ for (let line of content.split("\n")) |
+ { |
+ // Skip blank lines and comments. |
+ if (!line || !nonBlank.test(line) || comment.test(line)) |
+ continue; |
+ |
+ addPublicSuffixRule(line); |
+ } |
+} |
+ |
+function initialize() |
+{ |
+ if (initialized) |
+ return; |
+ |
+ loadPublicSuffixRules(); |
+ |
+ // Add implicit "*" rule. |
+ addPublicSuffixRule("*"); |
+ |
+ initialized = true; |
+} |
+ |
+/** |
+ * Return the base domain for a given domain based on Mozilla's Public Suffix |
+ * List. |
+ * |
+ * @param {string} The canonicalized domain (lower-case, Punycoded) for which |
+ * to return the base domain |
+ * |
+ * @returns {string} The base domain for the given domain |
+ */ |
+exports.getBaseDomain = function(domain) |
+{ |
+ if (!initialized) |
+ initialize(); |
+ |
+ let names = domain.split(".").reverse(); |
+ let node = publicSuffixTree; |
+ |
+ let name = ""; |
+ let level = 0; |
+ |
+ for (name of names) |
+ { |
+ let childNode = node.get(name); |
+ |
+ if (!childNode) |
+ { |
+ // If we don't have an exact match, look for a wildcard, but ignore it if |
+ // there's an exception for this name. |
+ // |
+ // Note: This is where we differ from the specification. We're supposed |
+ // to follow both paths and pick the longer one, but we simply stick with |
+ // the more specific path instead. This appears not to affect us in |
+ // practice; however, in the interest of correctness we might want to fix |
+ // it sometime. |
+ childNode = !node.has("!" + name) && node.get("*"); |
+ } |
+ |
+ node = childNode; |
+ |
+ if (!node) |
+ break; |
+ |
+ level++; |
+ } |
+ |
+ if (!node) |
+ { |
+ // Construct the base domain by combining the last unmatched name with the |
+ // public suffix. |
+ let baseDomain = names.slice(0, level).reverse().join("."); |
+ |
+ if (baseDomain && name) |
+ baseDomain = name + "." + baseDomain; |
+ |
+ return baseDomain; |
+ } |
+ |
+ return null; |
+}; |