lib/domain.js - Issue 29426579: Noissue - Reimplement public suffix matching more efficiently

Side by Side Diff

Use n/p to move between diff chunks; N/P to move between comments.

Keyboard Shortcuts

	File
u :	up to issue
m :	publish + mail comments
M :	edit review message
j / k :	jump to file after / before current file
J / K :	jump to next file with a comment after / before current file
	Side-by-side diff
i :	toggle intra-line diffs
e :	expand all comments
c :	collapse all comments
s :	toggle showing all comments
n / p :	next / previous diff chunk or comment
N / P :	next / previous comment
<Up> / <Down> :	next / previous line
<Enter> :	respond to / edit current comment
d :	mark current comment as done

	Issue
u :	up to list of issues
m :	publish + mail comments
j / k :	jump to patch after / before current patch
o / <Enter> :	open current patch in side-by-side view
i :	open current patch in unified diff view

	Issue List
j / k :	jump to issue after / before current issue
o / <Enter> :	open current issue
# :	close issue

	Comment/message editing
<Ctrl> + s or <Ctrl> + Enter :	save comment
<Esc> :	cancel edit

Side by Side Diff: lib/domain.js

Issue 29426579: Noissue - Reimplement public suffix matching more efficiently (Closed) Base URL: https://hg.adblockplus.org/abp2blocklist

Patch Set: Created May 1, 2017, 6:48 p.m.

Left:
Right:

Use n/p to move between diff chunks; N/P to move between comments.

Jump to:

View unified diff | Download patch

OLD	NEW
(Empty)
	1 /*

	2 * This file is part of Adblock Plus <https://adblockplus.org/>,

	3 * Copyright (C) 2006-2017 eyeo GmbH

	4 *

	5 * Adblock Plus is free software: you can redistribute it and/or modify

	6 * it under the terms of the GNU General Public License version 3 as

	7 * published by the Free Software Foundation.

	8 *

	9 * Adblock Plus is distributed in the hope that it will be useful,

	10 * but WITHOUT ANY WARRANTY; without even the implied warranty of

	11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

	12 * GNU General Public License for more details.

	13 *

	14 * You should have received a copy of the GNU General Public License

	15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.

	16 */

	17

	18 /** @module domain */

	19

	20 // This is a mostly correct and relatively efficient implementation of the

	21 // Public Suffix List specification available at https://publicsuffix.org/list/

	22

	23 "use strict";

	24

	25 const fs = require("fs");

	26 const path = require("path");

	27

	28 const punycode = require("punycode");

	29

	30 let initialized = false;

	31

	32 let publicSuffixTree = null;

	33

	34 function canonicalizeLabel(label)

	35 {

	36 if (label != "*")

	37 {

	38 let exception = label.charAt(0) == "!";

	39 let name = exception ? label.substring(1) : label;

	40

	41 // Lower-case and Punycode the name.

	42 name = punycode.toASCII(name.toLowerCase());

	43

	44 label = (exception ? "!" : "") + name;

	45 }

	46

	47 return label;

	48 }

	49

	50 function registerLabels(labels, node)

	51 {

	52 let currentLabel = labels.pop();

	53

	54 currentLabel = canonicalizeLabel(currentLabel);

	55

	56 let childNode = node.get(currentLabel);

	57 if (!childNode)

	58 node.set(currentLabel, childNode = new Map());

	59

	60 if (labels.length > 0)

	61 registerLabels(labels, childNode);

	62 }

	63

	64 function addPublicSuffixRule(text)

	65 {

	66 // Add rule to the public suffix tree.

	67 registerLabels(text.split("."), publicSuffixTree);

	68 }

	69

	70 function loadPublicSuffixRules()

	71 {

	72 let content = fs.readFileSync(path.join(__dirname, "..",

	73 "public_suffix_list.dat"),

	74 "utf8");

	75

	76 let nonBlank = new RegExp(/[^\s]/);

	77 let comment = new RegExp(/^\s*\/\//);

	78

	79 publicSuffixTree = new Map();

	80

	81 for (let line of content.split("\n"))

	82 {

	83 // Skip blank lines and comments.

	84 if (!line \|\| !nonBlank.test(line) \|\| comment.test(line))

	85 continue;

	86

	87 addPublicSuffixRule(line);

	88 }

	89 }

	90

	91 function initialize()

	92 {

	93 if (initialized)

	94 return;

	95

	96 loadPublicSuffixRules();

	97

	98 // Add implicit "*" rule.

	99 addPublicSuffixRule("*");

	100

	101 initialized = true;

	102 }

	103

	104 /**

	105 * Return the base domain for a given domain based on Mozilla's Public Suffix

	106 * List.

	107 *

	108 * @param {string} The canonicalized domain (lower-case, Punycoded) for which

	109 * to return the base domain

	110 *

	111 * @returns {string} The base domain for the given domain

	112 */

	113 exports.getBaseDomain = function(domain)

	114 {

	115 if (!initialized)

	116 initialize();

	117

	118 let names = domain.split(".").reverse();

	119 let node = publicSuffixTree;

	120

	121 let name = "";

	122 let level = 0;

	123

	124 for (name of names)

	125 {

	126 let childNode = node.get(name);

	127

	128 if (!childNode)

	129 {

	130 // If we don't have an exact match, look for a wildcard, but ignore it if

	131 // there's an exception for this name.

	132 //

	133 // Note: This is where we differ from the specification. We're supposed

	134 // to follow both paths and pick the longer one, but we simply stick with

	135 // the more specific path instead. This appears not to affect us in

	136 // practice; however, in the interest of correctness we might want to fix

	137 // it sometime.

	138 childNode = !node.has("!" + name) && node.get("*");

	139 }

	140

	141 node = childNode;

	142

	143 if (!node)

	144 break;

	145

	146 level++;

	147 }

	148

	149 if (!node)

	150 {

	151 // Construct the base domain by combining the last unmatched name with the

	152 // public suffix.

	153 let baseDomain = names.slice(0, level).reverse().join(".");

	154

	155 if (baseDomain && name)

	156 baseDomain = name + "." + baseDomain;

	157

	158 return baseDomain;

	159 }

	160

	161 return null;

	162 };

OLD	NEW

« no previous file with comments | « lib/abp2blocklist.js ('k') | package.json » ('j') | no next file with comments »