Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Side by Side Diff: lib/matcher.js

Issue 29892596: Issue 6992 - Remove keyword-by-filter map (Closed) Base URL: https://hg.adblockplus.org/adblockpluscore/
Patch Set: Implement faster version of isSlowFilter Created Sept. 28, 2018, 9:22 a.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View unified diff | Download patch
« no previous file with comments | « lib/filterClasses.js ('k') | test/filterListener.js » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * This file is part of Adblock Plus <https://adblockplus.org/>, 2 * This file is part of Adblock Plus <https://adblockplus.org/>,
3 * Copyright (C) 2006-present eyeo GmbH 3 * Copyright (C) 2006-present eyeo GmbH
4 * 4 *
5 * Adblock Plus is free software: you can redistribute it and/or modify 5 * Adblock Plus is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 3 as 6 * it under the terms of the GNU General Public License version 3 as
7 * published by the Free Software Foundation. 7 * published by the Free Software Foundation.
8 * 8 *
9 * Adblock Plus is distributed in the hope that it will be useful, 9 * Adblock Plus is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details. 12 * GNU General Public License for more details.
13 * 13 *
14 * You should have received a copy of the GNU General Public License 14 * You should have received a copy of the GNU General Public License
15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. 15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.
16 */ 16 */
17 17
18 "use strict"; 18 "use strict";
19 19
20 /** 20 /**
21 * @fileOverview Matcher class implementing matching addresses against 21 * @fileOverview Matcher class implementing matching addresses against
22 * a list of filters. 22 * a list of filters.
23 */ 23 */
24 24
25 const {WhitelistFilter} = require("./filterClasses"); 25 const {WhitelistFilter} = require("./filterClasses");
26 26
27 /** 27 /**
28 * Regular expression for matching a keyword in a filter.
29 * @type {RegExp}
30 */
31 const keywordRegExp = /[^a-z0-9%*][a-z0-9%]{3,}(?=[^a-z0-9%*])/;
32
33 /**
34 * Regular expression for matching all keywords in a filter.
35 * @type {RegExp}
36 */
37 const allKeywordsRegExp = new RegExp(keywordRegExp, "g");
38
39 /**
40 * Checks whether a particular filter is slow.
41 * @param {RegExpFilter} filter
42 * @returns {boolean}
43 */
44 function isSlowFilter(filter)
45 {
46 return !filter.pattern || !keywordRegExp.test(filter.pattern);
47 }
48
49 exports.isSlowFilter = isSlowFilter;
50
51 /**
28 * Blacklist/whitelist filter matching 52 * Blacklist/whitelist filter matching
29 */ 53 */
30 class Matcher 54 class Matcher
31 { 55 {
32 constructor() 56 constructor()
33 { 57 {
34 /** 58 /**
35 * Lookup table for filters by their associated keyword 59 * Lookup table for filters by their associated keyword
36 * @type {Map.<string,(Filter|Filter[])>} 60 * @type {Map.<string,(Filter|Set.<Filter>)>}
37 */ 61 */
38 this.filterByKeyword = new Map(); 62 this.filterByKeyword = new Map();
39
40 /**
41 * Lookup table for keywords by the filter
42 * @type {Map.<Filter,string>}
43 */
44 this.keywordByFilter = new Map();
45 } 63 }
46 64
47 /** 65 /**
48 * Removes all known filters 66 * Removes all known filters
49 */ 67 */
50 clear() 68 clear()
51 { 69 {
52 this.filterByKeyword.clear(); 70 this.filterByKeyword.clear();
53 this.keywordByFilter.clear();
54 } 71 }
55 72
56 /** 73 /**
57 * Adds a filter to the matcher 74 * Adds a filter to the matcher
58 * @param {RegExpFilter} filter 75 * @param {RegExpFilter} filter
59 */ 76 */
60 add(filter) 77 add(filter)
61 { 78 {
62 if (this.keywordByFilter.has(filter))
63 return;
64
65 // Look for a suitable keyword 79 // Look for a suitable keyword
66 let keyword = this.findKeyword(filter); 80 let keyword = this.findKeyword(filter);
67 let oldEntry = this.filterByKeyword.get(keyword); 81 let set = this.filterByKeyword.get(keyword);
68 if (typeof oldEntry == "undefined") 82 if (typeof set == "undefined")
83 {
69 this.filterByKeyword.set(keyword, filter); 84 this.filterByKeyword.set(keyword, filter);
70 else if (oldEntry.length == 1) 85 }
71 this.filterByKeyword.set(keyword, [oldEntry, filter]); 86 else if (set.size == 1)
87 {
88 if (filter != set)
89 this.filterByKeyword.set(keyword, new Set([set, filter]));
90 }
72 else 91 else
73 oldEntry.push(filter); 92 {
74 this.keywordByFilter.set(filter, keyword); 93 set.add(filter);
94 }
75 } 95 }
76 96
77 /** 97 /**
78 * Removes a filter from the matcher 98 * Removes a filter from the matcher
79 * @param {RegExpFilter} filter 99 * @param {RegExpFilter} filter
80 */ 100 */
81 remove(filter) 101 remove(filter)
82 { 102 {
83 let keyword = this.keywordByFilter.get(filter); 103 let keyword = this.findKeyword(filter);
84 if (typeof keyword == "undefined") 104 let set = this.filterByKeyword.get(keyword);
105 if (typeof set == "undefined")
85 return; 106 return;
86 107
87 let list = this.filterByKeyword.get(keyword); 108 if (set.size == 1)
88 if (list.length <= 1) 109 {
89 this.filterByKeyword.delete(keyword); 110 if (filter == set)
111 this.filterByKeyword.delete(keyword);
112 }
90 else 113 else
91 { 114 {
92 let index = list.indexOf(filter); 115 set.delete(filter);
93 if (index >= 0) 116
94 { 117 if (set.size == 1)
95 list.splice(index, 1); 118 this.filterByKeyword.set(keyword, [...set][0]);
96 if (list.length == 1)
97 this.filterByKeyword.set(keyword, list[0]);
98 }
99 } 119 }
100
101 this.keywordByFilter.delete(filter);
102 } 120 }
103 121
104 /** 122 /**
105 * Chooses a keyword to be associated with the filter 123 * Chooses a keyword to be associated with the filter
106 * @param {Filter} filter 124 * @param {Filter} filter
107 * @returns {string} keyword or an empty string if no keyword could be found 125 * @returns {string} keyword or an empty string if no keyword could be found
108 */ 126 */
109 findKeyword(filter) 127 findKeyword(filter)
110 { 128 {
111 let result = ""; 129 let result = "";
112 let {pattern} = filter; 130 let {pattern} = filter;
113 if (pattern == null) 131 if (pattern == null)
114 return result; 132 return result;
115 133
116 let candidates = pattern.toLowerCase().match( 134 let candidates = pattern.toLowerCase().match(allKeywordsRegExp);
117 /[^a-z0-9%*][a-z0-9%]{3,}(?=[^a-z0-9%*])/g
118 );
119 if (!candidates) 135 if (!candidates)
120 return result; 136 return result;
121 137
122 let hash = this.filterByKeyword; 138 let hash = this.filterByKeyword;
123 let resultCount = 0xFFFFFF; 139 let resultCount = 0xFFFFFF;
124 let resultLength = 0; 140 let resultLength = 0;
125 for (let i = 0, l = candidates.length; i < l; i++) 141 for (let i = 0, l = candidates.length; i < l; i++)
126 { 142 {
127 let candidate = candidates[i].substr(1); 143 let candidate = candidates[i].substr(1);
128 let filters = hash.get(candidate); 144 let filters = hash.get(candidate);
129 let count = typeof filters != "undefined" ? filters.length : 0; 145 let count = typeof filters != "undefined" ? filters.size : 0;
130 if (count < resultCount || 146 if (count < resultCount ||
131 (count == resultCount && candidate.length > resultLength)) 147 (count == resultCount && candidate.length > resultLength))
132 { 148 {
133 result = candidate; 149 result = candidate;
134 resultCount = count; 150 resultCount = count;
135 resultLength = candidate.length; 151 resultLength = candidate.length;
136 } 152 }
137 } 153 }
138 return result; 154 return result;
139 } 155 }
140 156
141 /** 157 /**
142 * Checks whether a particular filter is being matched against.
143 * @param {RegExpFilter} filter
144 * @returns {boolean}
145 */
146 hasFilter(filter)
147 {
148 return this.keywordByFilter.has(filter);
149 }
150
151 /**
152 * Returns the keyword used for a filter, <code>null</code>
153 * for unknown filters.
154 * @param {RegExpFilter} filter
155 * @returns {?string}
156 */
157 getKeywordForFilter(filter)
158 {
159 let keyword = this.keywordByFilter.get(filter);
160 return typeof keyword != "undefined" ? keyword : null;
161 }
162
163 /**
164 * Checks whether the entries for a particular keyword match a URL 158 * Checks whether the entries for a particular keyword match a URL
165 * @param {string} keyword 159 * @param {string} keyword
166 * @param {string} location 160 * @param {string} location
167 * @param {number} typeMask 161 * @param {number} typeMask
168 * @param {string} [docDomain] 162 * @param {string} [docDomain]
169 * @param {boolean} [thirdParty] 163 * @param {boolean} [thirdParty]
170 * @param {string} [sitekey] 164 * @param {string} [sitekey]
171 * @param {boolean} [specificOnly] 165 * @param {boolean} [specificOnly]
172 * @returns {?Filter} 166 * @returns {?Filter}
173 */ 167 */
174 _checkEntryMatch(keyword, location, typeMask, docDomain, thirdParty, sitekey, 168 _checkEntryMatch(keyword, location, typeMask, docDomain, thirdParty, sitekey,
175 specificOnly) 169 specificOnly)
176 { 170 {
177 let list = this.filterByKeyword.get(keyword); 171 let set = this.filterByKeyword.get(keyword);
178 if (typeof list == "undefined") 172 if (typeof set == "undefined")
179 return null; 173 return null;
180 for (let i = 0; i < list.length; i++) 174
175 for (let filter of set)
181 { 176 {
182 let filter = list[i];
183
184 if (specificOnly && filter.isGeneric() && 177 if (specificOnly && filter.isGeneric() &&
185 !(filter instanceof WhitelistFilter)) 178 !(filter instanceof WhitelistFilter))
186 continue; 179 continue;
187 180
188 if (filter.matches(location, typeMask, docDomain, thirdParty, sitekey)) 181 if (filter.matches(location, typeMask, docDomain, thirdParty, sitekey))
189 return filter; 182 return filter;
190 } 183 }
191 return null; 184 return null;
192 } 185 }
193 186
(...skipping 112 matching lines...) Expand 10 before | Expand all | Expand 10 after
306 * @returns {string} keyword 299 * @returns {string} keyword
307 */ 300 */
308 findKeyword(filter) 301 findKeyword(filter)
309 { 302 {
310 if (filter instanceof WhitelistFilter) 303 if (filter instanceof WhitelistFilter)
311 return this.whitelist.findKeyword(filter); 304 return this.whitelist.findKeyword(filter);
312 return this.blacklist.findKeyword(filter); 305 return this.blacklist.findKeyword(filter);
313 } 306 }
314 307
315 /** 308 /**
316 * @see Matcher#hasFilter
317 * @param {Filter} filter
318 * @returns {boolean}
319 */
320 hasFilter(filter)
321 {
322 if (filter instanceof WhitelistFilter)
323 return this.whitelist.hasFilter(filter);
324 return this.blacklist.hasFilter(filter);
325 }
326
327 /**
328 * @see Matcher#getKeywordForFilter
329 * @param {Filter} filter
330 * @returns {string} keyword
331 */
332 getKeywordForFilter(filter)
333 {
334 if (filter instanceof WhitelistFilter)
335 return this.whitelist.getKeywordForFilter(filter);
336 return this.blacklist.getKeywordForFilter(filter);
337 }
338
339 /**
340 * Checks whether a particular filter is slow
341 * @param {RegExpFilter} filter
342 * @returns {boolean}
343 */
344 isSlowFilter(filter)
345 {
346 let matcher = (
347 filter instanceof WhitelistFilter ? this.whitelist : this.blacklist
348 );
349 let keyword = matcher.getKeywordForFilter(filter);
350 if (keyword != null)
351 return !keyword;
352 return !matcher.findKeyword(filter);
353 }
354
355 /**
356 * Optimized filter matching testing both whitelist and blacklist matchers 309 * Optimized filter matching testing both whitelist and blacklist matchers
357 * simultaneously. For parameters see 310 * simultaneously. For parameters see
358 {@link Matcher#matchesAny Matcher.matchesAny()}. 311 {@link Matcher#matchesAny Matcher.matchesAny()}.
359 * @see Matcher#matchesAny 312 * @see Matcher#matchesAny
360 * @inheritdoc 313 * @inheritdoc
361 */ 314 */
362 matchesAnyInternal(location, typeMask, docDomain, thirdParty, sitekey, 315 matchesAnyInternal(location, typeMask, docDomain, thirdParty, sitekey,
363 specificOnly) 316 specificOnly)
364 { 317 {
365 let candidates = location.toLowerCase().match(/[a-z0-9%]{3,}/g); 318 let candidates = location.toLowerCase().match(/[a-z0-9%]{3,}/g);
(...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after
414 367
415 exports.CombinedMatcher = CombinedMatcher; 368 exports.CombinedMatcher = CombinedMatcher;
416 369
417 /** 370 /**
418 * Shared {@link CombinedMatcher} instance that should usually be used. 371 * Shared {@link CombinedMatcher} instance that should usually be used.
419 * @type {CombinedMatcher} 372 * @type {CombinedMatcher}
420 */ 373 */
421 let defaultMatcher = new CombinedMatcher(); 374 let defaultMatcher = new CombinedMatcher();
422 375
423 exports.defaultMatcher = defaultMatcher; 376 exports.defaultMatcher = defaultMatcher;
OLDNEW
« no previous file with comments | « lib/filterClasses.js ('k') | test/filterListener.js » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld