Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Unified Diff: lib/domain.js

Issue 29426579: Noissue - Reimplement public suffix matching more efficiently (Closed) Base URL: https://hg.adblockplus.org/abp2blocklist
Patch Set: Created May 1, 2017, 6:48 p.m.
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « lib/abp2blocklist.js ('k') | package.json » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: lib/domain.js
===================================================================
new file mode 100644
--- /dev/null
+++ b/lib/domain.js
@@ -0,0 +1,162 @@
+/*
+ * This file is part of Adblock Plus <https://adblockplus.org/>,
+ * Copyright (C) 2006-2017 eyeo GmbH
+ *
+ * Adblock Plus is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 3 as
+ * published by the Free Software Foundation.
+ *
+ * Adblock Plus is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/** @module domain */
+
+// This is a mostly correct and relatively efficient implementation of the
+// Public Suffix List specification available at https://publicsuffix.org/list/
+
+"use strict";
+
+const fs = require("fs");
+const path = require("path");
+
+const punycode = require("punycode");
+
+let initialized = false;
+
+let publicSuffixTree = null;
+
+function canonicalizeLabel(label)
+{
+ if (label != "*")
+ {
+ let exception = label.charAt(0) == "!";
+ let name = exception ? label.substring(1) : label;
+
+ // Lower-case and Punycode the name.
+ name = punycode.toASCII(name.toLowerCase());
+
+ label = (exception ? "!" : "") + name;
+ }
+
+ return label;
+}
+
+function registerLabels(labels, node)
+{
+ let currentLabel = labels.pop();
+
+ currentLabel = canonicalizeLabel(currentLabel);
+
+ let childNode = node.get(currentLabel);
+ if (!childNode)
+ node.set(currentLabel, childNode = new Map());
+
+ if (labels.length > 0)
+ registerLabels(labels, childNode);
+}
+
+function addPublicSuffixRule(text)
+{
+ // Add rule to the public suffix tree.
+ registerLabels(text.split("."), publicSuffixTree);
+}
+
+function loadPublicSuffixRules()
+{
+ let content = fs.readFileSync(path.join(__dirname, "..",
+ "public_suffix_list.dat"),
+ "utf8");
+
+ let nonBlank = new RegExp(/[^\s]/);
+ let comment = new RegExp(/^\s*\/\//);
+
+ publicSuffixTree = new Map();
+
+ for (let line of content.split("\n"))
+ {
+ // Skip blank lines and comments.
+ if (!line || !nonBlank.test(line) || comment.test(line))
+ continue;
+
+ addPublicSuffixRule(line);
+ }
+}
+
+function initialize()
+{
+ if (initialized)
+ return;
+
+ loadPublicSuffixRules();
+
+ // Add implicit "*" rule.
+ addPublicSuffixRule("*");
+
+ initialized = true;
+}
+
+/**
+ * Return the base domain for a given domain based on Mozilla's Public Suffix
+ * List.
+ *
+ * @param {string} The canonicalized domain (lower-case, Punycoded) for which
+ * to return the base domain
+ *
+ * @returns {string} The base domain for the given domain
+ */
+exports.getBaseDomain = function(domain)
+{
+ if (!initialized)
+ initialize();
+
+ let names = domain.split(".").reverse();
+ let node = publicSuffixTree;
+
+ let name = "";
+ let level = 0;
+
+ for (name of names)
+ {
+ let childNode = node.get(name);
+
+ if (!childNode)
+ {
+ // If we don't have an exact match, look for a wildcard, but ignore it if
+ // there's an exception for this name.
+ //
+ // Note: This is where we differ from the specification. We're supposed
+ // to follow both paths and pick the longer one, but we simply stick with
+ // the more specific path instead. This appears not to affect us in
+ // practice; however, in the interest of correctness we might want to fix
+ // it sometime.
+ childNode = !node.has("!" + name) && node.get("*");
+ }
+
+ node = childNode;
+
+ if (!node)
+ break;
+
+ level++;
+ }
+
+ if (!node)
+ {
+ // Construct the base domain by combining the last unmatched name with the
+ // public suffix.
+ let baseDomain = names.slice(0, level).reverse().join(".");
+
+ if (baseDomain && name)
+ baseDomain = name + "." + baseDomain;
+
+ return baseDomain;
+ }
+
+ return null;
+};
« no previous file with comments | « lib/abp2blocklist.js ('k') | package.json » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld