Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Side by Side Diff: lib/domain.js

Issue 29426579: Noissue - Reimplement public suffix matching more efficiently (Closed) Base URL: https://hg.adblockplus.org/abp2blocklist
Patch Set: Created May 1, 2017, 6:48 p.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View unified diff | Download patch
« no previous file with comments | « lib/abp2blocklist.js ('k') | package.json » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 /*
2 * This file is part of Adblock Plus <https://adblockplus.org/>,
3 * Copyright (C) 2006-2017 eyeo GmbH
4 *
5 * Adblock Plus is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 3 as
7 * published by the Free Software Foundation.
8 *
9 * Adblock Plus is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.
16 */
17
18 /** @module domain */
19
20 // This is a mostly correct and relatively efficient implementation of the
21 // Public Suffix List specification available at https://publicsuffix.org/list/
22
23 "use strict";
24
25 const fs = require("fs");
26 const path = require("path");
27
28 const punycode = require("punycode");
29
30 let initialized = false;
31
32 let publicSuffixTree = null;
33
34 function canonicalizeLabel(label)
35 {
36 if (label != "*")
37 {
38 let exception = label.charAt(0) == "!";
39 let name = exception ? label.substring(1) : label;
40
41 // Lower-case and Punycode the name.
42 name = punycode.toASCII(name.toLowerCase());
43
44 label = (exception ? "!" : "") + name;
45 }
46
47 return label;
48 }
49
50 function registerLabels(labels, node)
51 {
52 let currentLabel = labels.pop();
53
54 currentLabel = canonicalizeLabel(currentLabel);
55
56 let childNode = node.get(currentLabel);
57 if (!childNode)
58 node.set(currentLabel, childNode = new Map());
59
60 if (labels.length > 0)
61 registerLabels(labels, childNode);
62 }
63
64 function addPublicSuffixRule(text)
65 {
66 // Add rule to the public suffix tree.
67 registerLabels(text.split("."), publicSuffixTree);
68 }
69
70 function loadPublicSuffixRules()
71 {
72 let content = fs.readFileSync(path.join(__dirname, "..",
73 "public_suffix_list.dat"),
74 "utf8");
75
76 let nonBlank = new RegExp(/[^\s]/);
77 let comment = new RegExp(/^\s*\/\//);
78
79 publicSuffixTree = new Map();
80
81 for (let line of content.split("\n"))
82 {
83 // Skip blank lines and comments.
84 if (!line || !nonBlank.test(line) || comment.test(line))
85 continue;
86
87 addPublicSuffixRule(line);
88 }
89 }
90
91 function initialize()
92 {
93 if (initialized)
94 return;
95
96 loadPublicSuffixRules();
97
98 // Add implicit "*" rule.
99 addPublicSuffixRule("*");
100
101 initialized = true;
102 }
103
104 /**
105 * Return the base domain for a given domain based on Mozilla's Public Suffix
106 * List.
107 *
108 * @param {string} The canonicalized domain (lower-case, Punycoded) for which
109 * to return the base domain
110 *
111 * @returns {string} The base domain for the given domain
112 */
113 exports.getBaseDomain = function(domain)
114 {
115 if (!initialized)
116 initialize();
117
118 let names = domain.split(".").reverse();
119 let node = publicSuffixTree;
120
121 let name = "";
122 let level = 0;
123
124 for (name of names)
125 {
126 let childNode = node.get(name);
127
128 if (!childNode)
129 {
130 // If we don't have an exact match, look for a wildcard, but ignore it if
131 // there's an exception for this name.
132 //
133 // Note: This is where we differ from the specification. We're supposed
134 // to follow both paths and pick the longer one, but we simply stick with
135 // the more specific path instead. This appears not to affect us in
136 // practice; however, in the interest of correctness we might want to fix
137 // it sometime.
138 childNode = !node.has("!" + name) && node.get("*");
139 }
140
141 node = childNode;
142
143 if (!node)
144 break;
145
146 level++;
147 }
148
149 if (!node)
150 {
151 // Construct the base domain by combining the last unmatched name with the
152 // public suffix.
153 let baseDomain = names.slice(0, level).reverse().join(".");
154
155 if (baseDomain && name)
156 baseDomain = name + "." + baseDomain;
157
158 return baseDomain;
159 }
160
161 return null;
162 };
OLDNEW
« no previous file with comments | « lib/abp2blocklist.js ('k') | package.json » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld