| Index: lib/domain.js |
| =================================================================== |
| new file mode 100644 |
| --- /dev/null |
| +++ b/lib/domain.js |
| @@ -0,0 +1,162 @@ |
| +/* |
| + * This file is part of Adblock Plus <https://adblockplus.org/>, |
| + * Copyright (C) 2006-2017 eyeo GmbH |
| + * |
| + * Adblock Plus is free software: you can redistribute it and/or modify |
| + * it under the terms of the GNU General Public License version 3 as |
| + * published by the Free Software Foundation. |
| + * |
| + * Adblock Plus is distributed in the hope that it will be useful, |
| + * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| + * GNU General Public License for more details. |
| + * |
| + * You should have received a copy of the GNU General Public License |
| + * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. |
| + */ |
| + |
| +/** @module domain */ |
| + |
| +// This is a mostly correct and relatively efficient implementation of the |
| +// Public Suffix List specification available at https://publicsuffix.org/list/ |
| + |
| +"use strict"; |
| + |
| +const fs = require("fs"); |
| +const path = require("path"); |
| + |
| +const punycode = require("punycode"); |
| + |
| +let initialized = false; |
| + |
| +let publicSuffixTree = null; |
| + |
| +function canonicalizeLabel(label) |
| +{ |
| + if (label != "*") |
| + { |
| + let exception = label.charAt(0) == "!"; |
| + let name = exception ? label.substring(1) : label; |
| + |
| + // Lower-case and Punycode the name. |
| + name = punycode.toASCII(name.toLowerCase()); |
| + |
| + label = (exception ? "!" : "") + name; |
| + } |
| + |
| + return label; |
| +} |
| + |
| +function registerLabels(labels, node) |
| +{ |
| + let currentLabel = labels.pop(); |
| + |
| + currentLabel = canonicalizeLabel(currentLabel); |
| + |
| + let childNode = node.get(currentLabel); |
| + if (!childNode) |
| + node.set(currentLabel, childNode = new Map()); |
| + |
| + if (labels.length > 0) |
| + registerLabels(labels, childNode); |
| +} |
| + |
| +function addPublicSuffixRule(text) |
| +{ |
| + // Add rule to the public suffix tree. |
| + registerLabels(text.split("."), publicSuffixTree); |
| +} |
| + |
| +function loadPublicSuffixRules() |
| +{ |
| + let content = fs.readFileSync(path.join(__dirname, "..", |
| + "public_suffix_list.dat"), |
| + "utf8"); |
| + |
| + let nonBlank = new RegExp(/[^\s]/); |
| + let comment = new RegExp(/^\s*\/\//); |
| + |
| + publicSuffixTree = new Map(); |
| + |
| + for (let line of content.split("\n")) |
| + { |
| + // Skip blank lines and comments. |
| + if (!line || !nonBlank.test(line) || comment.test(line)) |
| + continue; |
| + |
| + addPublicSuffixRule(line); |
| + } |
| +} |
| + |
| +function initialize() |
| +{ |
| + if (initialized) |
| + return; |
| + |
| + loadPublicSuffixRules(); |
| + |
| + // Add implicit "*" rule. |
| + addPublicSuffixRule("*"); |
| + |
| + initialized = true; |
| +} |
| + |
| +/** |
| + * Return the base domain for a given domain based on Mozilla's Public Suffix |
| + * List. |
| + * |
| + * @param {string} The canonicalized domain (lower-case, Punycoded) for which |
| + * to return the base domain |
| + * |
| + * @returns {string} The base domain for the given domain |
| + */ |
| +exports.getBaseDomain = function(domain) |
| +{ |
| + if (!initialized) |
| + initialize(); |
| + |
| + let names = domain.split(".").reverse(); |
| + let node = publicSuffixTree; |
| + |
| + let name = ""; |
| + let level = 0; |
| + |
| + for (name of names) |
| + { |
| + let childNode = node.get(name); |
| + |
| + if (!childNode) |
| + { |
| + // If we don't have an exact match, look for a wildcard, but ignore it if |
| + // there's an exception for this name. |
| + // |
| + // Note: This is where we differ from the specification. We're supposed |
| + // to follow both paths and pick the longer one, but we simply stick with |
| + // the more specific path instead. This appears not to affect us in |
| + // practice; however, in the interest of correctness we might want to fix |
| + // it sometime. |
| + childNode = !node.has("!" + name) && node.get("*"); |
| + } |
| + |
| + node = childNode; |
| + |
| + if (!node) |
| + break; |
| + |
| + level++; |
| + } |
| + |
| + if (!node) |
| + { |
| + // Construct the base domain by combining the last unmatched name with the |
| + // public suffix. |
| + let baseDomain = names.slice(0, level).reverse().join("."); |
| + |
| + if (baseDomain && name) |
| + baseDomain = name + "." + baseDomain; |
| + |
| + return baseDomain; |
| + } |
| + |
| + return null; |
| +}; |