Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Delta Between Two Patch Sets: lib/matcher.js

Issue 29907586: Issue 6994 - Use shortcut matching for location only filters (Closed)
Left Patch Set: Created Oct. 21, 2018, 3:51 a.m.
Right Patch Set: Address PS4, and rebase Created Oct. 24, 2018, 8:40 p.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
Left: Side by side diff | Download
Right: Side by side diff | Download
LEFTRIGHT
1 /* 1 /*
2 * This file is part of Adblock Plus <https://adblockplus.org/>, 2 * This file is part of Adblock Plus <https://adblockplus.org/>,
3 * Copyright (C) 2006-present eyeo GmbH 3 * Copyright (C) 2006-present eyeo GmbH
4 * 4 *
5 * Adblock Plus is free software: you can redistribute it and/or modify 5 * Adblock Plus is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 3 as 6 * it under the terms of the GNU General Public License version 3 as
7 * published by the Free Software Foundation. 7 * published by the Free Software Foundation.
8 * 8 *
9 * Adblock Plus is distributed in the hope that it will be useful, 9 * Adblock Plus is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details. 12 * GNU General Public License for more details.
13 * 13 *
14 * You should have received a copy of the GNU General Public License 14 * You should have received a copy of the GNU General Public License
15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. 15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.
16 */ 16 */
17 17
18 "use strict"; 18 "use strict";
19 19
20 /** 20 /**
21 * @fileOverview Matcher class implementing matching addresses against 21 * @fileOverview Matcher class implementing matching addresses against
22 * a list of filters. 22 * a list of filters.
23 */ 23 */
24 24
25 const {WhitelistFilter} = require("./filterClasses"); 25 const {RegExpFilter, WhitelistFilter} = require("./filterClasses");
26 26
27 /** 27 /**
28 * Regular expression for matching a keyword in a filter. 28 * Regular expression for matching a keyword in a filter.
29 * @type {RegExp} 29 * @type {RegExp}
30 */ 30 */
31 const keywordRegExp = /[^a-z0-9%*][a-z0-9%]{3,}(?=[^a-z0-9%*])/; 31 const keywordRegExp = /[^a-z0-9%*][a-z0-9%]{3,}(?=[^a-z0-9%*])/;
32 32
33 /** 33 /**
34 * Regular expression for matching all keywords in a filter. 34 * Regular expression for matching all keywords in a filter.
35 * @type {RegExp} 35 * @type {RegExp}
36 */ 36 */
37 const allKeywordsRegExp = new RegExp(keywordRegExp, "g"); 37 const allKeywordsRegExp = new RegExp(keywordRegExp, "g");
38
39 /**
40 * Bitmask for "types" that are for exception rules only, like
41 * <code>$document</code>, <code>$elemhide</code>, and so on.
42 * @type {number}
43 */
44 const WHITELIST_ONLY_TYPES = RegExpFilter.typeMap.DOCUMENT |
45 RegExpFilter.typeMap.ELEMHIDE |
46 RegExpFilter.typeMap.GENERICHIDE |
47 RegExpFilter.typeMap.GENERICBLOCK;
38 48
39 /** 49 /**
40 * Checks whether a particular filter is slow. 50 * Checks whether a particular filter is slow.
41 * @param {RegExpFilter} filter 51 * @param {RegExpFilter} filter
42 * @returns {boolean} 52 * @returns {boolean}
43 */ 53 */
44 function isSlowFilter(filter) 54 function isSlowFilter(filter)
45 { 55 {
46 return !filter.pattern || !keywordRegExp.test(filter.pattern); 56 return !filter.pattern || !keywordRegExp.test(filter.pattern);
47 } 57 }
48 58
49 exports.isSlowFilter = isSlowFilter; 59 exports.isSlowFilter = isSlowFilter;
50 60
51 /** 61 /**
52 * Blacklist/whitelist filter matching 62 * Blacklist/whitelist filter matching
53 */ 63 */
54 class Matcher 64 class Matcher
55 { 65 {
56 constructor() 66 constructor()
57 { 67 {
58 /** 68 /**
59 * Lookup table for filters by their associated keyword 69 * Lookup table for filters by their associated keyword
60 * @type {Map.<string,(Filter|Set.<Filter>)>} 70 * @type {Map.<string,(Filter|Set.<Filter>)>}
61 */ 71 * @private
62 this.filterByKeyword = new Map(); 72 */
73 this._filterByKeyword = new Map();
63 74
64 /** 75 /**
65 * Lookup table for location only filters by their associated keyword 76 * Lookup table for location only filters by their associated keyword
Manish Jethani 2018/10/24 21:35:28 Nit: I don't know if this is right, but I would ha
Jon Sonesen 2018/10/24 21:46:51 Acknowledged.
66 * for shortcut matching. 77 * for shortcut matching.
67 * @private 78 * @private
Manish Jethani 2018/10/24 21:35:28 Let's put @private last.
Jon Sonesen 2018/10/24 21:46:52 Acknowledged.
68 * @type {Map.<string,(Filter|Set.<Filter>)>} 79 * @type {Map.<string,(Filter|Set.<Filter>)>}
69 */ 80 */
70 this._fastFilterByKeyword = new Map(); 81 this._fastFilterByKeyword = new Map();
Manish Jethani 2018/10/24 21:35:28 I'm thinking about the nomenclature here, about ho
Jon Sonesen 2018/10/24 21:46:52 I like this, it makes more sense from a reader's p
71 } 82 }
72 83
73 /** 84 /**
74 * Removes all known filters 85 * Removes all known filters
75 */ 86 */
76 clear() 87 clear()
77 { 88 {
78 this.filterByKeyword.clear(); 89 this._filterByKeyword.clear();
79 this._fastFilterByKeyword.clear(); 90 this._fastFilterByKeyword.clear();
80 } 91 }
81 92
82 /** 93 /**
83 * Adds a filter to the matcher 94 * Adds a filter to the matcher
84 * @param {RegExpFilter} filter 95 * @param {RegExpFilter} filter
85 */ 96 */
86 add(filter) 97 add(filter)
87 { 98 {
88 // Look for a suitable keyword 99 // Look for a suitable keyword
89 let keyword = this.findKeyword(filter); 100 let keyword = this.findKeyword(filter);
90 let set = filter.isLocationOnly ? this._fastFilterByKeyword.get(keyword) : 101 let filterMap = filter.isLocationOnly() ? this._fastFilterByKeyword :
91 this.filterByKeyword.get(keyword); 102 this._filterByKeyword;
103 let set = filterMap.get(keyword);
92 104
93 if (typeof set == "undefined") 105 if (typeof set == "undefined")
94 { 106 {
95 filter.isLocationOnly ? 107 filterMap.set(keyword, filter);
96 this._fastFilterByKeyword.set(keyword, filter) :
97 this.filterByKeyword.set(keyword, filter);
98 } 108 }
99 else if (set.size == 1) 109 else if (set.size == 1)
100 { 110 {
101 if (filter != set) 111 if (filter != set)
102 filter.isLocationOnly ? 112 filterMap.set(keyword, new Set([set, filter]));
103 this._fastFilterByKeyword.set(keyword, new Set([set, filter])) :
104 this.filterByKeyword.set(keyword, new Set([set, filter]));
105 } 113 }
106 else 114 else
107 { 115 {
108 set.add(filter); 116 set.add(filter);
109 } 117 }
110 } 118 }
111 119
112 /** 120 /**
113 * Removes a filter from the matcher 121 * Removes a filter from the matcher
114 * @param {RegExpFilter} filter 122 * @param {RegExpFilter} filter
115 */ 123 */
116 remove(filter) 124 remove(filter)
117 { 125 {
118 let keyword = this.findKeyword(filter); 126 let keyword = this.findKeyword(filter);
119 let set = filter.isLocationOnly ? this._fastFilterByKeyword.get(keyword) : 127 let filterMap = filter.isLocationOnly() ? this._fastFilterByKeyword :
120 this.filterByKeyword.get(keyword); 128 this._filterByKeyword;
129 let set = filterMap.get(keyword);
121 130
122 if (typeof set == "undefined") 131 if (typeof set == "undefined")
123 return; 132 return;
124 133
125 if (set.size == 1) 134 if (set.size == 1)
126 { 135 {
127 if (filter == set) 136 if (filter == set)
128 filter.isLocationOnly ? 137 filterMap.delete(keyword);
129 this._fastFilterByKeyword.delete(keyword) :
130 this.filterByKeyword.delete(keyword);
131 } 138 }
132 else 139 else
133 { 140 {
134 set.delete(filter); 141 set.delete(filter);
135 142
136 if (set.size == 1) 143 if (set.size == 1)
137 filter.isLocationOnly ? 144 filterMap.set(keyword, [...set][0]);
138 this._fastFilterByKeyword.set(keyword, [...set][0]) :
139 this.filterByKeyword.set(keyword, [...set][0]);
140 } 145 }
141 } 146 }
142 147
143 /** 148 /**
144 * Chooses a keyword to be associated with the filter 149 * Chooses a keyword to be associated with the filter
145 * @param {Filter} filter 150 * @param {Filter} filter
146 * @returns {string} keyword or an empty string if no keyword could be found 151 * @returns {string} keyword or an empty string if no keyword could be found
152 * @protected
147 */ 153 */
148 findKeyword(filter) 154 findKeyword(filter)
149 { 155 {
150 let result = ""; 156 let result = "";
151 let {pattern} = filter; 157 let {pattern} = filter;
152 if (pattern == null) 158 if (pattern == null)
153 return result; 159 return result;
154 160
155 let candidates = pattern.toLowerCase().match(allKeywordsRegExp); 161 let candidates = pattern.toLowerCase().match(allKeywordsRegExp);
156 if (!candidates) 162 if (!candidates)
157 return result; 163 return result;
158 164
159 let hash = filter.isLocationOnly ? this._fastFilterByKeyword : 165 let hash = filter.isLocationOnly() ? this._fastFilterByKeyword :
160 this.filterByKeyword; 166 this._filterByKeyword;
167
161 let resultCount = 0xFFFFFF; 168 let resultCount = 0xFFFFFF;
162 let resultLength = 0; 169 let resultLength = 0;
163 for (let i = 0, l = candidates.length; i < l; i++) 170 for (let i = 0, l = candidates.length; i < l; i++)
164 { 171 {
165 let candidate = candidates[i].substr(1); 172 let candidate = candidates[i].substr(1);
166 let filters = hash.get(candidate); 173 let filters = hash.get(candidate);
167 let count = typeof filters != "undefined" ? filters.size : 0; 174 let count = typeof filters != "undefined" ? filters.size : 0;
Manish Jethani 2018/10/25 00:49:42 We need `count` to be the sum of `this._simpleFilt
168 if (count < resultCount || 175 if (count < resultCount ||
169 (count == resultCount && candidate.length > resultLength)) 176 (count == resultCount && candidate.length > resultLength))
170 { 177 {
171 result = candidate; 178 result = candidate;
172 resultCount = count; 179 resultCount = count;
173 resultLength = candidate.length; 180 resultLength = candidate.length;
174 } 181 }
175 } 182 }
176 return result; 183 return result;
177 } 184 }
178 185
179 /** 186 /**
180 * Checks whether the entries for a particular keyword match a URL 187 * Checks whether the entries for a particular keyword match a URL
181 * @param {string} keyword 188 * @param {string} keyword
182 * @param {string} location 189 * @param {string} location
183 * @param {number} typeMask 190 * @param {number} typeMask
184 * @param {string} [docDomain] 191 * @param {string} [docDomain]
185 * @param {boolean} [thirdParty] 192 * @param {boolean} [thirdParty]
186 * @param {string} [sitekey] 193 * @param {string} [sitekey]
187 * @param {boolean} [specificOnly] 194 * @param {boolean} [specificOnly]
188 * @returns {?Filter} 195 * @returns {?Filter}
189 */ 196 * @protected
190 _checkEntryMatch(keyword, location, typeMask, docDomain, thirdParty, sitekey, 197 */
191 specificOnly) 198 checkEntryMatch(keyword, location, typeMask, docDomain, thirdParty, sitekey,
192 { 199 specificOnly)
193 let fastSet = this._fastFilterByKeyword.get(keyword); 200 {
194 if (typeof fastSet != "undefined") 201 let set = this._filterByKeyword.get(keyword);
195 { 202 if (typeof set == "undefined")
Manish Jethani 2018/10/25 00:49:41 This is clearly wrong. We want to check all filter
203 {
204 let fastSet = this._fastFilterByKeyword.get(keyword);
205 if (typeof fastSet == "undefined")
206 return null;
207
196 for (let filter of fastSet) 208 for (let filter of fastSet)
197 { 209 {
198 if (filter.contentType == 0 && typeMask == 0)
199 continue;
200
201 if (specificOnly && filter.isGeneric() && 210 if (specificOnly && filter.isGeneric() &&
202 !(filter instanceof WhitelistFilter)) 211 !(filter instanceof WhitelistFilter))
203 continue; 212 continue;
204 213
205 if (filter.matchesLocation(location)) 214 if (filter.matchesLocation(location))
206 return filter; 215 return filter;
207 } 216 }
208 }
209 let set = this.filterByKeyword.get(keyword);
210 if (typeof set == "undefined")
211 return null; 217 return null;
218 }
212 219
213 for (let filter of set) 220 for (let filter of set)
214 { 221 {
215 if (specificOnly && filter.isGeneric() && 222 if (specificOnly && filter.isGeneric() &&
216 !(filter instanceof WhitelistFilter)) 223 !(filter instanceof WhitelistFilter))
217 continue; 224 continue;
218 225
219 if (filter.matches(location, typeMask, docDomain, thirdParty, sitekey)) 226 if (filter.matches(location, typeMask, docDomain, thirdParty, sitekey))
220 return filter; 227 return filter;
221 } 228 }
(...skipping 18 matching lines...) Expand all
240 * matching filter or <code>null</code> 247 * matching filter or <code>null</code>
241 */ 248 */
242 matchesAny(location, typeMask, docDomain, thirdParty, sitekey, specificOnly) 249 matchesAny(location, typeMask, docDomain, thirdParty, sitekey, specificOnly)
243 { 250 {
244 let candidates = location.toLowerCase().match(/[a-z0-9%]{3,}/g); 251 let candidates = location.toLowerCase().match(/[a-z0-9%]{3,}/g);
245 if (candidates === null) 252 if (candidates === null)
246 candidates = []; 253 candidates = [];
247 candidates.push(""); 254 candidates.push("");
248 for (let i = 0, l = candidates.length; i < l; i++) 255 for (let i = 0, l = candidates.length; i < l; i++)
249 { 256 {
250 let result = this._checkEntryMatch(candidates[i], location, typeMask, 257 let result = this.checkEntryMatch(candidates[i], location, typeMask,
251 docDomain, thirdParty, sitekey, 258 docDomain, thirdParty, sitekey,
252 specificOnly); 259 specificOnly);
253 if (result) 260 if (result)
254 return result; 261 return result;
255 } 262 }
256 263
257 return null; 264 return null;
258 } 265 }
259 } 266 }
260 267
261 exports.Matcher = Matcher; 268 exports.Matcher = Matcher;
262 269
263 /** 270 /**
264 * Combines a matcher for blocking and exception rules, automatically sorts 271 * Combines a matcher for blocking and exception rules, automatically sorts
265 * rules into two {@link Matcher} instances. 272 * rules into two {@link Matcher} instances.
266 */ 273 */
267 class CombinedMatcher 274 class CombinedMatcher
268 { 275 {
269 constructor() 276 constructor()
270 { 277 {
271 /** 278 /**
272 * Maximal number of matching cache entries to be kept 279 * Maximal number of matching cache entries to be kept
273 * @type {number} 280 * @type {number}
274 */ 281 */
275 this.maxCacheEntries = 1000; 282 this.maxCacheEntries = 1000;
276 283
277 /** 284 /**
278 * Matcher for blocking rules. 285 * Matcher for blocking rules.
279 * @type {Matcher} 286 * @type {Matcher}
280 */ 287 * @private
281 this.blacklist = new Matcher(); 288 */
289 this._blacklist = new Matcher();
282 290
283 /** 291 /**
284 * Matcher for exception rules. 292 * Matcher for exception rules.
285 * @type {Matcher} 293 * @type {Matcher}
286 */ 294 * @private
287 this.whitelist = new Matcher(); 295 */
296 this._whitelist = new Matcher();
288 297
289 /** 298 /**
290 * Lookup table of previous {@link Matcher#matchesAny} results 299 * Lookup table of previous {@link Matcher#matchesAny} results
291 * @type {Map.<string,Filter>} 300 * @type {Map.<string,Filter>}
292 */ 301 * @private
293 this.resultCache = new Map(); 302 */
303 this._resultCache = new Map();
294 } 304 }
295 305
296 /** 306 /**
297 * @see Matcher#clear 307 * @see Matcher#clear
298 */ 308 */
299 clear() 309 clear()
300 { 310 {
301 this.blacklist.clear(); 311 this._blacklist.clear();
302 this.whitelist.clear(); 312 this._whitelist.clear();
303 this.resultCache.clear(); 313 this._resultCache.clear();
304 } 314 }
305 315
306 /** 316 /**
307 * @see Matcher#add 317 * @see Matcher#add
308 * @param {Filter} filter 318 * @param {Filter} filter
309 */ 319 */
310 add(filter) 320 add(filter)
311 { 321 {
312 if (filter instanceof WhitelistFilter) 322 if (filter instanceof WhitelistFilter)
313 this.whitelist.add(filter); 323 this._whitelist.add(filter);
314 else 324 else
315 this.blacklist.add(filter); 325 this._blacklist.add(filter);
316 326
317 this.resultCache.clear(); 327 this._resultCache.clear();
318 } 328 }
319 329
320 /** 330 /**
321 * @see Matcher#remove 331 * @see Matcher#remove
322 * @param {Filter} filter 332 * @param {Filter} filter
323 */ 333 */
324 remove(filter) 334 remove(filter)
325 { 335 {
326 if (filter instanceof WhitelistFilter) 336 if (filter instanceof WhitelistFilter)
327 this.whitelist.remove(filter); 337 this._whitelist.remove(filter);
328 else 338 else
329 this.blacklist.remove(filter); 339 this._blacklist.remove(filter);
330 340
331 this.resultCache.clear(); 341 this._resultCache.clear();
332 } 342 }
333 343
334 /** 344 /**
335 * @see Matcher#findKeyword 345 * @see Matcher#findKeyword
336 * @param {Filter} filter 346 * @param {Filter} filter
337 * @returns {string} keyword 347 * @returns {string} keyword
348 * @protected
338 */ 349 */
339 findKeyword(filter) 350 findKeyword(filter)
340 { 351 {
341 if (filter instanceof WhitelistFilter) 352 if (filter instanceof WhitelistFilter)
342 return this.whitelist.findKeyword(filter); 353 return this._whitelist.findKeyword(filter);
343 return this.blacklist.findKeyword(filter); 354 return this._blacklist.findKeyword(filter);
344 } 355 }
345 356
346 /** 357 /**
347 * Optimized filter matching testing both whitelist and blacklist matchers 358 * Optimized filter matching testing both whitelist and blacklist matchers
348 * simultaneously. For parameters see 359 * simultaneously. For parameters see
349 {@link Matcher#matchesAny Matcher.matchesAny()}. 360 {@link Matcher#matchesAny Matcher.matchesAny()}.
350 * @see Matcher#matchesAny 361 * @see Matcher#matchesAny
351 * @inheritdoc 362 * @inheritdoc
352 */ 363 * @private
353 matchesAnyInternal(location, typeMask, docDomain, thirdParty, sitekey, 364 */
354 specificOnly) 365 _matchesAnyInternal(location, typeMask, docDomain, thirdParty, sitekey,
366 specificOnly)
355 { 367 {
356 let candidates = location.toLowerCase().match(/[a-z0-9%]{3,}/g); 368 let candidates = location.toLowerCase().match(/[a-z0-9%]{3,}/g);
357 if (candidates === null) 369 if (candidates === null)
358 candidates = []; 370 candidates = [];
359 candidates.push(""); 371 candidates.push("");
360 372
373 let whitelistHit = null;
361 let blacklistHit = null; 374 let blacklistHit = null;
362 for (let i = 0, l = candidates.length; i < l; i++) 375
363 { 376 // If the type mask includes no types other than whitelist-only types, we
364 let substr = candidates[i]; 377 // can skip the blacklist.
365 let result = this.whitelist._checkEntryMatch( 378 if ((typeMask & ~WHITELIST_ONLY_TYPES) != 0)
366 substr, location, typeMask, docDomain, thirdParty, sitekey 379 {
367 ); 380 for (let i = 0, l = candidates.length; !blacklistHit && i < l; i++)
368 if (result)
369 return result;
370 if (blacklistHit === null)
371 { 381 {
372 blacklistHit = this.blacklist._checkEntryMatch( 382 blacklistHit = this._blacklist.checkEntryMatch(candidates[i], location,
373 substr, location, typeMask, docDomain, thirdParty, sitekey, 383 typeMask, docDomain,
374 specificOnly 384 thirdParty, sitekey,
375 ); 385 specificOnly);
376 } 386 }
377 } 387 }
378 return blacklistHit; 388
389 // If the type mask includes any whitelist-only types, we need to check the
390 // whitelist.
391 if (blacklistHit || (typeMask & WHITELIST_ONLY_TYPES) != 0)
392 {
393 for (let i = 0, l = candidates.length; !whitelistHit && i < l; i++)
394 {
395 whitelistHit = this._whitelist.checkEntryMatch(candidates[i], location,
396 typeMask, docDomain,
397 thirdParty, sitekey);
398 }
399 }
400
401 return whitelistHit || blacklistHit;
379 } 402 }
380 403
381 /** 404 /**
382 * @see Matcher#matchesAny 405 * @see Matcher#matchesAny
383 * @inheritdoc 406 * @inheritdoc
384 */ 407 */
385 matchesAny(location, typeMask, docDomain, thirdParty, sitekey, specificOnly) 408 matchesAny(location, typeMask, docDomain, thirdParty, sitekey, specificOnly)
386 { 409 {
387 let key = location + " " + typeMask + " " + docDomain + " " + thirdParty + 410 let key = location + " " + typeMask + " " + docDomain + " " + thirdParty +
388 " " + sitekey + " " + specificOnly; 411 " " + sitekey + " " + specificOnly;
389 412
390 let result = this.resultCache.get(key); 413 let result = this._resultCache.get(key);
391 if (typeof result != "undefined") 414 if (typeof result != "undefined")
392 return result; 415 return result;
393 416
394 result = this.matchesAnyInternal(location, typeMask, docDomain, 417 result = this._matchesAnyInternal(location, typeMask, docDomain,
395 thirdParty, sitekey, specificOnly); 418 thirdParty, sitekey, specificOnly);
396 419
397 if (this.resultCache.size >= this.maxCacheEntries) 420 if (this._resultCache.size >= this.maxCacheEntries)
398 this.resultCache.clear(); 421 this._resultCache.clear();
399 422
400 this.resultCache.set(key, result); 423 this._resultCache.set(key, result);
401 424
402 return result; 425 return result;
403 } 426 }
404 } 427 }
405 428
406 exports.CombinedMatcher = CombinedMatcher; 429 exports.CombinedMatcher = CombinedMatcher;
407 430
408 /** 431 /**
409 * Shared {@link CombinedMatcher} instance that should usually be used. 432 * Shared {@link CombinedMatcher} instance that should usually be used.
410 * @type {CombinedMatcher} 433 * @type {CombinedMatcher}
411 */ 434 */
412 let defaultMatcher = new CombinedMatcher(); 435 let defaultMatcher = new CombinedMatcher();
413 436
414 exports.defaultMatcher = defaultMatcher; 437 exports.defaultMatcher = defaultMatcher;
LEFTRIGHT

Powered by Google App Engine
This is Rietveld