lib/domain.js - Issue 29426579: Noissue - Reimplement public suffix matching more efficiently

Keyboard Shortcuts

	File
u :	up to issue
m :	publish + mail comments
M :	edit review message
j / k :	jump to file after / before current file
J / K :	jump to next file with a comment after / before current file
	Side-by-side diff
i :	toggle intra-line diffs
e :	expand all comments
c :	collapse all comments
s :	toggle showing all comments
n / p :	next / previous diff chunk or comment
N / P :	next / previous comment
<Up> / <Down> :	next / previous line
<Enter> :	respond to / edit current comment
d :	mark current comment as done

	Issue
u :	up to list of issues
m :	publish + mail comments
j / k :	jump to patch after / before current patch
o / <Enter> :	open current patch in side-by-side view
i :	open current patch in unified diff view

	Issue List
j / k :	jump to issue after / before current issue
o / <Enter> :	open current issue
# :	close issue

	Comment/message editing
<Ctrl> + s or <Ctrl> + Enter :	save comment
<Esc> :	cancel edit

Unified Diff: lib/domain.js

Issue 29426579: Noissue - Reimplement public suffix matching more efficiently (Closed) Base URL: https://hg.adblockplus.org/abp2blocklist

Patch Set: Created May 1, 2017, 6:48 p.m.

Use n/p to move between diff chunks; N/P to move between comments.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: lib/domain.js

===================================================================

new file mode 100644

--- /dev/null

+++ b/lib/domain.js

@@ -0,0 +1,162 @@

+/*

+ * This file is part of Adblock Plus <https://adblockplus.org/>,

+ *

+ * Adblock Plus is free software: you can redistribute it and/or modify

+ * it under the terms of the GNU General Public License version 3 as

+ * published by the Free Software Foundation.

+ *

+ * Adblock Plus is distributed in the hope that it will be useful,

+ * but WITHOUT ANY WARRANTY; without even the implied warranty of

+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

+ * GNU General Public License for more details.

+ *

+ * You should have received a copy of the GNU General Public License

+ * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.

+ */

+/** @module domain */

+// This is a mostly correct and relatively efficient implementation of the

+// Public Suffix List specification available at https://publicsuffix.org/list/

+"use strict";

+const fs = require("fs");

+const path = require("path");

+const punycode = require("punycode");

+let initialized = false;

+let publicSuffixTree = null;

+function canonicalizeLabel(label)

+ if (label != "*")

+ {

+ let exception = label.charAt(0) == "!";

+ let name = exception ? label.substring(1) : label;

+ // Lower-case and Punycode the name.

+ name = punycode.toASCII(name.toLowerCase());

+ label = (exception ? "!" : "") + name;

+ }

+ return label;

+function registerLabels(labels, node)

+ let currentLabel = labels.pop();

+ currentLabel = canonicalizeLabel(currentLabel);

+ let childNode = node.get(currentLabel);

+ if (!childNode)

+ node.set(currentLabel, childNode = new Map());

+ if (labels.length > 0)

+ registerLabels(labels, childNode);

+function addPublicSuffixRule(text)

+ // Add rule to the public suffix tree.

+ registerLabels(text.split("."), publicSuffixTree);

+function loadPublicSuffixRules()

+ let content = fs.readFileSync(path.join(__dirname, "..",

+ "public_suffix_list.dat"),

+ "utf8");

+ let nonBlank = new RegExp(/[^\s]/);

+ let comment = new RegExp(/^\s*\/\//);

+ publicSuffixTree = new Map();

+ for (let line of content.split("\n"))

+ {

+ // Skip blank lines and comments.

+ if (!line || !nonBlank.test(line) || comment.test(line))

+ continue;

+ addPublicSuffixRule(line);

+ }

+function initialize()

+ if (initialized)

+ return;

+ loadPublicSuffixRules();

+ // Add implicit "*" rule.

+ addPublicSuffixRule("*");

+ initialized = true;

+/**

+ * Return the base domain for a given domain based on Mozilla's Public Suffix

+ * List.

+ *

+ * @param {string} The canonicalized domain (lower-case, Punycoded) for which

+ * to return the base domain

+ *

+ * @returns {string} The base domain for the given domain

+ */

+exports.getBaseDomain = function(domain)

+ if (!initialized)

+ initialize();

+ let names = domain.split(".").reverse();

+ let node = publicSuffixTree;

+ let name = "";

+ let level = 0;

+ for (name of names)

+ {

+ let childNode = node.get(name);

+ if (!childNode)

+ {

+ // If we don't have an exact match, look for a wildcard, but ignore it if

+ // there's an exception for this name.

+ //

+ // Note: This is where we differ from the specification. We're supposed

+ // to follow both paths and pick the longer one, but we simply stick with

+ // the more specific path instead. This appears not to affect us in

+ // practice; however, in the interest of correctness we might want to fix

+ // it sometime.

+ childNode = !node.has("!" + name) && node.get("*");

+ }

+ node = childNode;

+ if (!node)

+ break;

+ level++;

+ }

+ if (!node)

+ {

+ // Construct the base domain by combining the last unmatched name with the

+ // public suffix.

+ let baseDomain = names.slice(0, level).reverse().join(".");

+ if (baseDomain && name)

+ baseDomain = name + "." + baseDomain;

+ return baseDomain;

+ }

+ return null;

+};

« no previous file with comments | « lib/abp2blocklist.js ('k') | package.json » ('j') | no next file with comments »