Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Delta Between Two Patch Sets: lib/matcher.js

Issue 29907586: Issue 6994 - Use shortcut matching for location only filters (Closed)
Left Patch Set: Reduce ternary use Created Oct. 21, 2018, 4:26 a.m.
Right Patch Set: Address PS4, and rebase Created Oct. 24, 2018, 8:40 p.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
Left: Side by side diff | Download
Right: Side by side diff | Download
LEFTRIGHT
1 /* 1 /*
2 * This file is part of Adblock Plus <https://adblockplus.org/>, 2 * This file is part of Adblock Plus <https://adblockplus.org/>,
3 * Copyright (C) 2006-present eyeo GmbH 3 * Copyright (C) 2006-present eyeo GmbH
4 * 4 *
5 * Adblock Plus is free software: you can redistribute it and/or modify 5 * Adblock Plus is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 3 as 6 * it under the terms of the GNU General Public License version 3 as
7 * published by the Free Software Foundation. 7 * published by the Free Software Foundation.
8 * 8 *
9 * Adblock Plus is distributed in the hope that it will be useful, 9 * Adblock Plus is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details. 12 * GNU General Public License for more details.
13 * 13 *
14 * You should have received a copy of the GNU General Public License 14 * You should have received a copy of the GNU General Public License
15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. 15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.
16 */ 16 */
17 17
18 "use strict"; 18 "use strict";
19 19
20 /** 20 /**
21 * @fileOverview Matcher class implementing matching addresses against 21 * @fileOverview Matcher class implementing matching addresses against
22 * a list of filters. 22 * a list of filters.
23 */ 23 */
24 24
25 const {WhitelistFilter} = require("./filterClasses"); 25 const {RegExpFilter, WhitelistFilter} = require("./filterClasses");
26 26
27 /** 27 /**
28 * Regular expression for matching a keyword in a filter. 28 * Regular expression for matching a keyword in a filter.
29 * @type {RegExp} 29 * @type {RegExp}
30 */ 30 */
31 const keywordRegExp = /[^a-z0-9%*][a-z0-9%]{3,}(?=[^a-z0-9%*])/; 31 const keywordRegExp = /[^a-z0-9%*][a-z0-9%]{3,}(?=[^a-z0-9%*])/;
32 32
33 /** 33 /**
34 * Regular expression for matching all keywords in a filter. 34 * Regular expression for matching all keywords in a filter.
35 * @type {RegExp} 35 * @type {RegExp}
36 */ 36 */
37 const allKeywordsRegExp = new RegExp(keywordRegExp, "g"); 37 const allKeywordsRegExp = new RegExp(keywordRegExp, "g");
38
39 /**
40 * Bitmask for "types" that are for exception rules only, like
41 * <code>$document</code>, <code>$elemhide</code>, and so on.
42 * @type {number}
43 */
44 const WHITELIST_ONLY_TYPES = RegExpFilter.typeMap.DOCUMENT |
45 RegExpFilter.typeMap.ELEMHIDE |
46 RegExpFilter.typeMap.GENERICHIDE |
47 RegExpFilter.typeMap.GENERICBLOCK;
38 48
39 /** 49 /**
40 * Checks whether a particular filter is slow. 50 * Checks whether a particular filter is slow.
41 * @param {RegExpFilter} filter 51 * @param {RegExpFilter} filter
42 * @returns {boolean} 52 * @returns {boolean}
43 */ 53 */
44 function isSlowFilter(filter) 54 function isSlowFilter(filter)
45 { 55 {
46 return !filter.pattern || !keywordRegExp.test(filter.pattern); 56 return !filter.pattern || !keywordRegExp.test(filter.pattern);
47 } 57 }
48 58
49 exports.isSlowFilter = isSlowFilter; 59 exports.isSlowFilter = isSlowFilter;
50 60
51 /** 61 /**
52 * Blacklist/whitelist filter matching 62 * Blacklist/whitelist filter matching
53 */ 63 */
54 class Matcher 64 class Matcher
55 { 65 {
56 constructor() 66 constructor()
57 { 67 {
58 /** 68 /**
59 * Lookup table for filters by their associated keyword 69 * Lookup table for filters by their associated keyword
60 * @type {Map.<string,(Filter|Set.<Filter>)>} 70 * @type {Map.<string,(Filter|Set.<Filter>)>}
61 */ 71 * @private
62 this.filterByKeyword = new Map(); 72 */
73 this._filterByKeyword = new Map();
63 74
64 /** 75 /**
65 * Lookup table for location only filters by their associated keyword 76 * Lookup table for location only filters by their associated keyword
Manish Jethani 2018/10/24 21:35:28 Nit: I don't know if this is right, but I would ha
Jon Sonesen 2018/10/24 21:46:51 Acknowledged.
66 * for shortcut matching. 77 * for shortcut matching.
67 * @private 78 * @private
Manish Jethani 2018/10/24 21:35:28 Let's put @private last.
Jon Sonesen 2018/10/24 21:46:52 Acknowledged.
68 * @type {Map.<string,(Filter|Set.<Filter>)>} 79 * @type {Map.<string,(Filter|Set.<Filter>)>}
69 */ 80 */
70 this._fastFilterByKeyword = new Map(); 81 this._fastFilterByKeyword = new Map();
Manish Jethani 2018/10/24 21:35:28 I'm thinking about the nomenclature here, about ho
Jon Sonesen 2018/10/24 21:46:52 I like this, it makes more sense from a reader's p
71 } 82 }
72 83
73 /** 84 /**
74 * Removes all known filters 85 * Removes all known filters
75 */ 86 */
76 clear() 87 clear()
77 { 88 {
78 this.filterByKeyword.clear(); 89 this._filterByKeyword.clear();
79 this._fastFilterByKeyword.clear(); 90 this._fastFilterByKeyword.clear();
80 } 91 }
81 92
82 /** 93 /**
83 * Adds a filter to the matcher 94 * Adds a filter to the matcher
84 * @param {RegExpFilter} filter 95 * @param {RegExpFilter} filter
85 */ 96 */
86 add(filter) 97 add(filter)
87 { 98 {
88 // Look for a suitable keyword 99 // Look for a suitable keyword
89 let keyword = this.findKeyword(filter); 100 let keyword = this.findKeyword(filter);
90 let filterMap = filter.isLocationOnly ? this._fastFilterByKeyword : 101 let filterMap = filter.isLocationOnly() ? this._fastFilterByKeyword :
91 this.filterByKeyword; 102 this._filterByKeyword;
92 let set = filterMap.get(keyword); 103 let set = filterMap.get(keyword);
93 104
94 if (typeof set == "undefined") 105 if (typeof set == "undefined")
95 { 106 {
96 filterMap.set(keyword, filter); 107 filterMap.set(keyword, filter);
97 } 108 }
98 else if (set.size == 1) 109 else if (set.size == 1)
99 { 110 {
100 if (filter != set) 111 if (filter != set)
101 filterMap.set(keyword, new Set([set, filter])); 112 filterMap.set(keyword, new Set([set, filter]));
102 } 113 }
103 else 114 else
104 { 115 {
105 set.add(filter); 116 set.add(filter);
106 } 117 }
107 } 118 }
108 119
109 /** 120 /**
110 * Removes a filter from the matcher 121 * Removes a filter from the matcher
111 * @param {RegExpFilter} filter 122 * @param {RegExpFilter} filter
112 */ 123 */
113 remove(filter) 124 remove(filter)
114 { 125 {
115 let keyword = this.findKeyword(filter); 126 let keyword = this.findKeyword(filter);
116 let filterMap = filter.isLocationOnly ? this._fastFilterByKeyword : 127 let filterMap = filter.isLocationOnly() ? this._fastFilterByKeyword :
117 this.filterByKeyword; 128 this._filterByKeyword;
118 let set = filterMap.get(keyword); 129 let set = filterMap.get(keyword);
119 130
120 if (typeof set == "undefined") 131 if (typeof set == "undefined")
121 return; 132 return;
122 133
123 if (set.size == 1) 134 if (set.size == 1)
124 { 135 {
125 if (filter == set) 136 if (filter == set)
126 filterMap.delete(keyword); 137 filterMap.delete(keyword);
127 } 138 }
128 else 139 else
129 { 140 {
130 set.delete(filter); 141 set.delete(filter);
131 142
132 if (set.size == 1) 143 if (set.size == 1)
133 filterMap.set(keyword, [...set][0]); 144 filterMap.set(keyword, [...set][0]);
134 } 145 }
135 } 146 }
136 147
137 /** 148 /**
138 * Chooses a keyword to be associated with the filter 149 * Chooses a keyword to be associated with the filter
139 * @param {Filter} filter 150 * @param {Filter} filter
140 * @returns {string} keyword or an empty string if no keyword could be found 151 * @returns {string} keyword or an empty string if no keyword could be found
152 * @protected
141 */ 153 */
142 findKeyword(filter) 154 findKeyword(filter)
143 { 155 {
144 let result = ""; 156 let result = "";
145 let {pattern} = filter; 157 let {pattern} = filter;
146 if (pattern == null) 158 if (pattern == null)
147 return result; 159 return result;
148 160
149 let candidates = pattern.toLowerCase().match(allKeywordsRegExp); 161 let candidates = pattern.toLowerCase().match(allKeywordsRegExp);
150 if (!candidates) 162 if (!candidates)
151 return result; 163 return result;
152 164
153 let hash = filter.isLocationOnly ? this._fastFilterByKeyword : 165 let hash = filter.isLocationOnly() ? this._fastFilterByKeyword :
154 this.filterByKeyword; 166 this._filterByKeyword;
Jon Sonesen 2018/10/21 17:29:51 In the tests this fails on the last test of extrac
167
155 let resultCount = 0xFFFFFF; 168 let resultCount = 0xFFFFFF;
156 let resultLength = 0; 169 let resultLength = 0;
157 for (let i = 0, l = candidates.length; i < l; i++) 170 for (let i = 0, l = candidates.length; i < l; i++)
158 { 171 {
159 let candidate = candidates[i].substr(1); 172 let candidate = candidates[i].substr(1);
160 let filters = hash.get(candidate); 173 let filters = hash.get(candidate);
161 let count = typeof filters != "undefined" ? filters.size : 0; 174 let count = typeof filters != "undefined" ? filters.size : 0;
Manish Jethani 2018/10/25 00:49:42 We need `count` to be the sum of `this._simpleFilt
162 if (count < resultCount || 175 if (count < resultCount ||
163 (count == resultCount && candidate.length > resultLength)) 176 (count == resultCount && candidate.length > resultLength))
164 { 177 {
165 result = candidate; 178 result = candidate;
166 resultCount = count; 179 resultCount = count;
167 resultLength = candidate.length; 180 resultLength = candidate.length;
168 } 181 }
169 } 182 }
170 return result; 183 return result;
171 } 184 }
172 185
173 /** 186 /**
174 * Checks whether the entries for a particular keyword match a URL 187 * Checks whether the entries for a particular keyword match a URL
175 * @param {string} keyword 188 * @param {string} keyword
176 * @param {string} location 189 * @param {string} location
177 * @param {number} typeMask 190 * @param {number} typeMask
178 * @param {string} [docDomain] 191 * @param {string} [docDomain]
179 * @param {boolean} [thirdParty] 192 * @param {boolean} [thirdParty]
180 * @param {string} [sitekey] 193 * @param {string} [sitekey]
181 * @param {boolean} [specificOnly] 194 * @param {boolean} [specificOnly]
182 * @returns {?Filter} 195 * @returns {?Filter}
183 */ 196 * @protected
184 _checkEntryMatch(keyword, location, typeMask, docDomain, thirdParty, sitekey, 197 */
185 specificOnly) 198 checkEntryMatch(keyword, location, typeMask, docDomain, thirdParty, sitekey,
186 { 199 specificOnly)
187 let set = this.filterByKeyword.get(keyword); 200 {
201 let set = this._filterByKeyword.get(keyword);
188 if (typeof set == "undefined") 202 if (typeof set == "undefined")
Manish Jethani 2018/10/25 00:49:41 This is clearly wrong. We want to check all filter
189 { 203 {
190 let fastSet = this._fastFilterByKeyword.get(keyword); 204 let fastSet = this._fastFilterByKeyword.get(keyword);
191 if (typeof fastSet == "undefined") 205 if (typeof fastSet == "undefined")
192 return null; 206 return null;
193 207
194 for (let filter of fastSet) 208 for (let filter of fastSet)
195 { 209 {
196 if (specificOnly && filter.isGeneric() && 210 if (specificOnly && filter.isGeneric() &&
197 !(filter instanceof WhitelistFilter)) 211 !(filter instanceof WhitelistFilter))
198 continue; 212 continue;
199 213
200 if (filter.matches(location, typeMask, docDomain, thirdParty, sitekey)) 214 if (filter.matchesLocation(location))
201 return filter; 215 return filter;
202 } 216 }
203 return null; 217 return null;
204 } 218 }
205 219
206 for (let filter of set) 220 for (let filter of set)
207 { 221 {
208 if (specificOnly && filter.isGeneric() && 222 if (specificOnly && filter.isGeneric() &&
209 !(filter instanceof WhitelistFilter)) 223 !(filter instanceof WhitelistFilter))
210 continue; 224 continue;
(...skipping 22 matching lines...) Expand all
233 * matching filter or <code>null</code> 247 * matching filter or <code>null</code>
234 */ 248 */
235 matchesAny(location, typeMask, docDomain, thirdParty, sitekey, specificOnly) 249 matchesAny(location, typeMask, docDomain, thirdParty, sitekey, specificOnly)
236 { 250 {
237 let candidates = location.toLowerCase().match(/[a-z0-9%]{3,}/g); 251 let candidates = location.toLowerCase().match(/[a-z0-9%]{3,}/g);
238 if (candidates === null) 252 if (candidates === null)
239 candidates = []; 253 candidates = [];
240 candidates.push(""); 254 candidates.push("");
241 for (let i = 0, l = candidates.length; i < l; i++) 255 for (let i = 0, l = candidates.length; i < l; i++)
242 { 256 {
243 let result = this._checkEntryMatch(candidates[i], location, typeMask, 257 let result = this.checkEntryMatch(candidates[i], location, typeMask,
244 docDomain, thirdParty, sitekey, 258 docDomain, thirdParty, sitekey,
245 specificOnly); 259 specificOnly);
246 if (result) 260 if (result)
247 return result; 261 return result;
248 } 262 }
249 263
250 return null; 264 return null;
251 } 265 }
252 } 266 }
253 267
254 exports.Matcher = Matcher; 268 exports.Matcher = Matcher;
255 269
256 /** 270 /**
257 * Combines a matcher for blocking and exception rules, automatically sorts 271 * Combines a matcher for blocking and exception rules, automatically sorts
258 * rules into two {@link Matcher} instances. 272 * rules into two {@link Matcher} instances.
259 */ 273 */
260 class CombinedMatcher 274 class CombinedMatcher
261 { 275 {
262 constructor() 276 constructor()
263 { 277 {
264 /** 278 /**
265 * Maximal number of matching cache entries to be kept 279 * Maximal number of matching cache entries to be kept
266 * @type {number} 280 * @type {number}
267 */ 281 */
268 this.maxCacheEntries = 1000; 282 this.maxCacheEntries = 1000;
269 283
270 /** 284 /**
271 * Matcher for blocking rules. 285 * Matcher for blocking rules.
272 * @type {Matcher} 286 * @type {Matcher}
273 */ 287 * @private
274 this.blacklist = new Matcher(); 288 */
289 this._blacklist = new Matcher();
275 290
276 /** 291 /**
277 * Matcher for exception rules. 292 * Matcher for exception rules.
278 * @type {Matcher} 293 * @type {Matcher}
279 */ 294 * @private
280 this.whitelist = new Matcher(); 295 */
296 this._whitelist = new Matcher();
281 297
282 /** 298 /**
283 * Lookup table of previous {@link Matcher#matchesAny} results 299 * Lookup table of previous {@link Matcher#matchesAny} results
284 * @type {Map.<string,Filter>} 300 * @type {Map.<string,Filter>}
285 */ 301 * @private
286 this.resultCache = new Map(); 302 */
303 this._resultCache = new Map();
287 } 304 }
288 305
289 /** 306 /**
290 * @see Matcher#clear 307 * @see Matcher#clear
291 */ 308 */
292 clear() 309 clear()
293 { 310 {
294 this.blacklist.clear(); 311 this._blacklist.clear();
295 this.whitelist.clear(); 312 this._whitelist.clear();
296 this.resultCache.clear(); 313 this._resultCache.clear();
297 } 314 }
298 315
299 /** 316 /**
300 * @see Matcher#add 317 * @see Matcher#add
301 * @param {Filter} filter 318 * @param {Filter} filter
302 */ 319 */
303 add(filter) 320 add(filter)
304 { 321 {
305 if (filter instanceof WhitelistFilter) 322 if (filter instanceof WhitelistFilter)
306 this.whitelist.add(filter); 323 this._whitelist.add(filter);
307 else 324 else
308 this.blacklist.add(filter); 325 this._blacklist.add(filter);
309 326
310 this.resultCache.clear(); 327 this._resultCache.clear();
311 } 328 }
312 329
313 /** 330 /**
314 * @see Matcher#remove 331 * @see Matcher#remove
315 * @param {Filter} filter 332 * @param {Filter} filter
316 */ 333 */
317 remove(filter) 334 remove(filter)
318 { 335 {
319 if (filter instanceof WhitelistFilter) 336 if (filter instanceof WhitelistFilter)
320 this.whitelist.remove(filter); 337 this._whitelist.remove(filter);
321 else 338 else
322 this.blacklist.remove(filter); 339 this._blacklist.remove(filter);
323 340
324 this.resultCache.clear(); 341 this._resultCache.clear();
325 } 342 }
326 343
327 /** 344 /**
328 * @see Matcher#findKeyword 345 * @see Matcher#findKeyword
329 * @param {Filter} filter 346 * @param {Filter} filter
330 * @returns {string} keyword 347 * @returns {string} keyword
348 * @protected
331 */ 349 */
332 findKeyword(filter) 350 findKeyword(filter)
333 { 351 {
334 if (filter instanceof WhitelistFilter) 352 if (filter instanceof WhitelistFilter)
335 return this.whitelist.findKeyword(filter); 353 return this._whitelist.findKeyword(filter);
336 return this.blacklist.findKeyword(filter); 354 return this._blacklist.findKeyword(filter);
337 } 355 }
338 356
339 /** 357 /**
340 * Optimized filter matching testing both whitelist and blacklist matchers 358 * Optimized filter matching testing both whitelist and blacklist matchers
341 * simultaneously. For parameters see 359 * simultaneously. For parameters see
342 {@link Matcher#matchesAny Matcher.matchesAny()}. 360 {@link Matcher#matchesAny Matcher.matchesAny()}.
343 * @see Matcher#matchesAny 361 * @see Matcher#matchesAny
344 * @inheritdoc 362 * @inheritdoc
345 */ 363 * @private
346 matchesAnyInternal(location, typeMask, docDomain, thirdParty, sitekey, 364 */
347 specificOnly) 365 _matchesAnyInternal(location, typeMask, docDomain, thirdParty, sitekey,
366 specificOnly)
348 { 367 {
349 let candidates = location.toLowerCase().match(/[a-z0-9%]{3,}/g); 368 let candidates = location.toLowerCase().match(/[a-z0-9%]{3,}/g);
350 if (candidates === null) 369 if (candidates === null)
351 candidates = []; 370 candidates = [];
352 candidates.push(""); 371 candidates.push("");
353 372
373 let whitelistHit = null;
354 let blacklistHit = null; 374 let blacklistHit = null;
355 for (let i = 0, l = candidates.length; i < l; i++) 375
356 { 376 // If the type mask includes no types other than whitelist-only types, we
357 let substr = candidates[i]; 377 // can skip the blacklist.
358 let result = this.whitelist._checkEntryMatch( 378 if ((typeMask & ~WHITELIST_ONLY_TYPES) != 0)
359 substr, location, typeMask, docDomain, thirdParty, sitekey 379 {
360 ); 380 for (let i = 0, l = candidates.length; !blacklistHit && i < l; i++)
361 if (result)
362 return result;
363 if (blacklistHit === null)
364 { 381 {
365 blacklistHit = this.blacklist._checkEntryMatch( 382 blacklistHit = this._blacklist.checkEntryMatch(candidates[i], location,
366 substr, location, typeMask, docDomain, thirdParty, sitekey, 383 typeMask, docDomain,
367 specificOnly 384 thirdParty, sitekey,
368 ); 385 specificOnly);
369 } 386 }
370 } 387 }
371 return blacklistHit; 388
389 // If the type mask includes any whitelist-only types, we need to check the
390 // whitelist.
391 if (blacklistHit || (typeMask & WHITELIST_ONLY_TYPES) != 0)
392 {
393 for (let i = 0, l = candidates.length; !whitelistHit && i < l; i++)
394 {
395 whitelistHit = this._whitelist.checkEntryMatch(candidates[i], location,
396 typeMask, docDomain,
397 thirdParty, sitekey);
398 }
399 }
400
401 return whitelistHit || blacklistHit;
372 } 402 }
373 403
374 /** 404 /**
375 * @see Matcher#matchesAny 405 * @see Matcher#matchesAny
376 * @inheritdoc 406 * @inheritdoc
377 */ 407 */
378 matchesAny(location, typeMask, docDomain, thirdParty, sitekey, specificOnly) 408 matchesAny(location, typeMask, docDomain, thirdParty, sitekey, specificOnly)
379 { 409 {
380 let key = location + " " + typeMask + " " + docDomain + " " + thirdParty + 410 let key = location + " " + typeMask + " " + docDomain + " " + thirdParty +
381 " " + sitekey + " " + specificOnly; 411 " " + sitekey + " " + specificOnly;
382 412
383 let result = this.resultCache.get(key); 413 let result = this._resultCache.get(key);
384 if (typeof result != "undefined") 414 if (typeof result != "undefined")
385 return result; 415 return result;
386 416
387 result = this.matchesAnyInternal(location, typeMask, docDomain, 417 result = this._matchesAnyInternal(location, typeMask, docDomain,
388 thirdParty, sitekey, specificOnly); 418 thirdParty, sitekey, specificOnly);
389 419
390 if (this.resultCache.size >= this.maxCacheEntries) 420 if (this._resultCache.size >= this.maxCacheEntries)
391 this.resultCache.clear(); 421 this._resultCache.clear();
392 422
393 this.resultCache.set(key, result); 423 this._resultCache.set(key, result);
394 424
395 return result; 425 return result;
396 } 426 }
397 } 427 }
398 428
399 exports.CombinedMatcher = CombinedMatcher; 429 exports.CombinedMatcher = CombinedMatcher;
400 430
401 /** 431 /**
402 * Shared {@link CombinedMatcher} instance that should usually be used. 432 * Shared {@link CombinedMatcher} instance that should usually be used.
403 * @type {CombinedMatcher} 433 * @type {CombinedMatcher}
404 */ 434 */
405 let defaultMatcher = new CombinedMatcher(); 435 let defaultMatcher = new CombinedMatcher();
406 436
407 exports.defaultMatcher = defaultMatcher; 437 exports.defaultMatcher = defaultMatcher;
LEFTRIGHT

Powered by Google App Engine
This is Rietveld