Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Delta Between Two Patch Sets: lib/matcher.js

Issue 29907586: Issue 6994 - Use shortcut matching for location only filters (Closed)
Left Patch Set: Address PS1 Comments Created Oct. 20, 2018, 11:07 p.m.
Right Patch Set: Address PS4, and rebase Created Oct. 24, 2018, 8:40 p.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
Left: Side by side diff | Download
Right: Side by side diff | Download
LEFTRIGHT
1 /* 1 /*
2 * This file is part of Adblock Plus <https://adblockplus.org/>, 2 * This file is part of Adblock Plus <https://adblockplus.org/>,
3 * Copyright (C) 2006-present eyeo GmbH 3 * Copyright (C) 2006-present eyeo GmbH
4 * 4 *
5 * Adblock Plus is free software: you can redistribute it and/or modify 5 * Adblock Plus is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 3 as 6 * it under the terms of the GNU General Public License version 3 as
7 * published by the Free Software Foundation. 7 * published by the Free Software Foundation.
8 * 8 *
9 * Adblock Plus is distributed in the hope that it will be useful, 9 * Adblock Plus is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details. 12 * GNU General Public License for more details.
13 * 13 *
14 * You should have received a copy of the GNU General Public License 14 * You should have received a copy of the GNU General Public License
15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. 15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.
16 */ 16 */
17 17
18 "use strict"; 18 "use strict";
19 19
20 /** 20 /**
21 * @fileOverview Matcher class implementing matching addresses against 21 * @fileOverview Matcher class implementing matching addresses against
22 * a list of filters. 22 * a list of filters.
23 */ 23 */
24 24
25 const {WhitelistFilter} = require("./filterClasses"); 25 const {RegExpFilter, WhitelistFilter} = require("./filterClasses");
26 26
27 /** 27 /**
28 * Regular expression for matching a keyword in a filter. 28 * Regular expression for matching a keyword in a filter.
29 * @type {RegExp} 29 * @type {RegExp}
30 */ 30 */
31 const keywordRegExp = /[^a-z0-9%*][a-z0-9%]{3,}(?=[^a-z0-9%*])/; 31 const keywordRegExp = /[^a-z0-9%*][a-z0-9%]{3,}(?=[^a-z0-9%*])/;
32 32
33 /** 33 /**
34 * Regular expression for matching all keywords in a filter. 34 * Regular expression for matching all keywords in a filter.
35 * @type {RegExp} 35 * @type {RegExp}
36 */ 36 */
37 const allKeywordsRegExp = new RegExp(keywordRegExp, "g"); 37 const allKeywordsRegExp = new RegExp(keywordRegExp, "g");
38
39 /**
40 * Bitmask for "types" that are for exception rules only, like
41 * <code>$document</code>, <code>$elemhide</code>, and so on.
42 * @type {number}
43 */
44 const WHITELIST_ONLY_TYPES = RegExpFilter.typeMap.DOCUMENT |
45 RegExpFilter.typeMap.ELEMHIDE |
46 RegExpFilter.typeMap.GENERICHIDE |
47 RegExpFilter.typeMap.GENERICBLOCK;
38 48
39 /** 49 /**
40 * Checks whether a particular filter is slow. 50 * Checks whether a particular filter is slow.
41 * @param {RegExpFilter} filter 51 * @param {RegExpFilter} filter
42 * @returns {boolean} 52 * @returns {boolean}
43 */ 53 */
44 function isSlowFilter(filter) 54 function isSlowFilter(filter)
45 { 55 {
46 return !filter.pattern || !keywordRegExp.test(filter.pattern); 56 return !filter.pattern || !keywordRegExp.test(filter.pattern);
47 } 57 }
48 58
49 exports.isSlowFilter = isSlowFilter; 59 exports.isSlowFilter = isSlowFilter;
50 60
51 /** 61 /**
52 * Blacklist/whitelist filter matching 62 * Blacklist/whitelist filter matching
53 */ 63 */
54 class Matcher 64 class Matcher
55 { 65 {
56 constructor() 66 constructor()
57 { 67 {
58 /** 68 /**
59 * Lookup table for filters by their associated keyword 69 * Lookup table for filters by their associated keyword
60 * @type {Map.<string,(Filter|Set.<Filter>)>} 70 * @type {Map.<string,(Filter|Set.<Filter>)>}
61 */ 71 * @private
62 this.filterByKeyword = new Map(); 72 */
73 this._filterByKeyword = new Map();
63 74
64 /** 75 /**
65 * Lookup table for location only filters by their associated keyword 76 * Lookup table for location only filters by their associated keyword
Manish Jethani 2018/10/24 21:35:28 Nit: I don't know if this is right, but I would ha
Jon Sonesen 2018/10/24 21:46:51 Acknowledged.
66 * for shortcut matching. 77 * for shortcut matching.
78 * @private
Manish Jethani 2018/10/24 21:35:28 Let's put @private last.
Jon Sonesen 2018/10/24 21:46:52 Acknowledged.
67 * @type {Map.<string,(Filter|Set.<Filter>)>} 79 * @type {Map.<string,(Filter|Set.<Filter>)>}
68 */ 80 */
69 this.locationFiltersByKeyword = new Map(); 81 this._fastFilterByKeyword = new Map();
Manish Jethani 2018/10/24 21:35:28 I'm thinking about the nomenclature here, about ho
Jon Sonesen 2018/10/24 21:46:52 I like this, it makes more sense from a reader's p
70 } 82 }
71 83
72 /** 84 /**
73 * Removes all known filters 85 * Removes all known filters
74 */ 86 */
75 clear() 87 clear()
76 { 88 {
77 this.filterByKeyword.clear(); 89 this._filterByKeyword.clear();
78 this.locationFiltersByKeyword.clear(); 90 this._fastFilterByKeyword.clear();
79 } 91 }
80 92
81 /** 93 /**
82 * Adds a filter to the matcher 94 * Adds a filter to the matcher
83 * @param {RegExpFilter} filter 95 * @param {RegExpFilter} filter
84 */ 96 */
85 add(filter) 97 add(filter)
86 { 98 {
87 // Look for a suitable keyword 99 // Look for a suitable keyword
88 let keyword = this.findKeyword(filter); 100 let keyword = this.findKeyword(filter);
89 let set = this.filterByKeyword.get(keyword); 101 let filterMap = filter.isLocationOnly() ? this._fastFilterByKeyword :
90 102 this._filterByKeyword;
91 if (filter.isLocationOnly) 103 let set = filterMap.get(keyword);
92 {
93 let locationSet = this.locationFiltersByKeyword.get(keyword);
94 if (typeof locationSet == "undefined")
95 this.locationFiltersByKeyword.set(keyword, new Set([filter]));
96 else
97 {
98 locationSet.add(filter);
99 }
100 }
101 104
102 if (typeof set == "undefined") 105 if (typeof set == "undefined")
103 { 106 {
104 this.filterByKeyword.set(keyword, filter); 107 filterMap.set(keyword, filter);
105 } 108 }
106 else if (set.size == 1) 109 else if (set.size == 1)
107 { 110 {
108 if (filter != set) 111 if (filter != set)
109 this.filterByKeyword.set(keyword, new Set([set, filter])); 112 filterMap.set(keyword, new Set([set, filter]));
110 } 113 }
111 else 114 else
112 { 115 {
113 set.add(filter); 116 set.add(filter);
114 } 117 }
115 } 118 }
116 119
117 /** 120 /**
118 * Removes a filter from the matcher 121 * Removes a filter from the matcher
119 * @param {RegExpFilter} filter 122 * @param {RegExpFilter} filter
120 */ 123 */
121 remove(filter) 124 remove(filter)
122 { 125 {
123 let keyword = this.findKeyword(filter); 126 let keyword = this.findKeyword(filter);
124 let set = this.filterByKeyword.get(keyword); 127 let filterMap = filter.isLocationOnly() ? this._fastFilterByKeyword :
128 this._filterByKeyword;
129 let set = filterMap.get(keyword);
130
125 if (typeof set == "undefined") 131 if (typeof set == "undefined")
126 return; 132 return;
127 133
128 if (set.size == 1) 134 if (set.size == 1)
129 { 135 {
130 if (filter == set) 136 if (filter == set)
131 this.filterByKeyword.delete(keyword); 137 filterMap.delete(keyword);
132 } 138 }
133 else 139 else
134 { 140 {
135 set.delete(filter); 141 set.delete(filter);
136 142
137 if (set.size == 1) 143 if (set.size == 1)
138 this.filterByKeyword.set(keyword, [...set][0]); 144 filterMap.set(keyword, [...set][0]);
139 } 145 }
140 } 146 }
141 147
142 /** 148 /**
143 * Chooses a keyword to be associated with the filter 149 * Chooses a keyword to be associated with the filter
144 * @param {Filter} filter 150 * @param {Filter} filter
145 * @returns {string} keyword or an empty string if no keyword could be found 151 * @returns {string} keyword or an empty string if no keyword could be found
152 * @protected
146 */ 153 */
147 findKeyword(filter) 154 findKeyword(filter)
148 { 155 {
149 let result = ""; 156 let result = "";
150 let {pattern} = filter; 157 let {pattern} = filter;
151 if (pattern == null) 158 if (pattern == null)
152 return result; 159 return result;
153 160
154 let candidates = pattern.toLowerCase().match(allKeywordsRegExp); 161 let candidates = pattern.toLowerCase().match(allKeywordsRegExp);
155 if (!candidates) 162 if (!candidates)
156 return result; 163 return result;
157 164
158 let hash = this.filterByKeyword; 165 let hash = filter.isLocationOnly() ? this._fastFilterByKeyword :
166 this._filterByKeyword;
167
159 let resultCount = 0xFFFFFF; 168 let resultCount = 0xFFFFFF;
160 let resultLength = 0; 169 let resultLength = 0;
161 for (let i = 0, l = candidates.length; i < l; i++) 170 for (let i = 0, l = candidates.length; i < l; i++)
162 { 171 {
163 let candidate = candidates[i].substr(1); 172 let candidate = candidates[i].substr(1);
164 let filters = hash.get(candidate); 173 let filters = hash.get(candidate);
165 let count = typeof filters != "undefined" ? filters.size : 0; 174 let count = typeof filters != "undefined" ? filters.size : 0;
Manish Jethani 2018/10/25 00:49:42 We need `count` to be the sum of `this._simpleFilt
166 if (count < resultCount || 175 if (count < resultCount ||
167 (count == resultCount && candidate.length > resultLength)) 176 (count == resultCount && candidate.length > resultLength))
168 { 177 {
169 result = candidate; 178 result = candidate;
170 resultCount = count; 179 resultCount = count;
171 resultLength = candidate.length; 180 resultLength = candidate.length;
172 } 181 }
173 } 182 }
174 return result; 183 return result;
175 } 184 }
176 185
177 /** 186 /**
178 * Checks whether the entries for a particular keyword match a URL 187 * Checks whether the entries for a particular keyword match a URL
179 * @param {string} keyword 188 * @param {string} keyword
180 * @param {string} location 189 * @param {string} location
181 * @param {number} typeMask 190 * @param {number} typeMask
182 * @param {string} [docDomain] 191 * @param {string} [docDomain]
183 * @param {boolean} [thirdParty] 192 * @param {boolean} [thirdParty]
184 * @param {string} [sitekey] 193 * @param {string} [sitekey]
185 * @param {boolean} [specificOnly] 194 * @param {boolean} [specificOnly]
186 * @returns {?Filter} 195 * @returns {?Filter}
187 */ 196 * @protected
188 _checkEntryMatch(keyword, location, typeMask, docDomain, thirdParty, sitekey, 197 */
189 specificOnly) 198 checkEntryMatch(keyword, location, typeMask, docDomain, thirdParty, sitekey,
190 { 199 specificOnly)
191 let locationFilters = this.locationFiltersByKeyword.get(keyword); 200 {
192 if (typeof locationFilters != "undefined") 201 let set = this._filterByKeyword.get(keyword);
193 { 202 if (typeof set == "undefined")
Manish Jethani 2018/10/25 00:49:41 This is clearly wrong. We want to check all filter
194 for (let filter of locationFilters) 203 {
204 let fastSet = this._fastFilterByKeyword.get(keyword);
205 if (typeof fastSet == "undefined")
206 return null;
207
208 for (let filter of fastSet)
195 { 209 {
196 if (filter.contentType == 0 && typeMask == 0) 210 if (specificOnly && filter.isGeneric() &&
211 !(filter instanceof WhitelistFilter))
197 continue; 212 continue;
Jon Sonesen 2018/10/20 23:09:48 I think this should be in both iterations?
213
198 if (filter.matchesLocation(location)) 214 if (filter.matchesLocation(location))
199 return filter; 215 return filter;
200 } 216 }
201 }
202 let set = this.filterByKeyword.get(keyword);
203 if (typeof set == "undefined")
204 return null; 217 return null;
218 }
205 219
206 for (let filter of set) 220 for (let filter of set)
207 { 221 {
208 if (specificOnly && filter.isGeneric() && 222 if (specificOnly && filter.isGeneric() &&
209 !(filter instanceof WhitelistFilter)) 223 !(filter instanceof WhitelistFilter))
210 continue; 224 continue;
211 225
212 if (filter.matches(location, typeMask, docDomain, thirdParty, sitekey)) 226 if (filter.matches(location, typeMask, docDomain, thirdParty, sitekey))
213 return filter; 227 return filter;
214 } 228 }
(...skipping 18 matching lines...) Expand all
233 * matching filter or <code>null</code> 247 * matching filter or <code>null</code>
234 */ 248 */
235 matchesAny(location, typeMask, docDomain, thirdParty, sitekey, specificOnly) 249 matchesAny(location, typeMask, docDomain, thirdParty, sitekey, specificOnly)
236 { 250 {
237 let candidates = location.toLowerCase().match(/[a-z0-9%]{3,}/g); 251 let candidates = location.toLowerCase().match(/[a-z0-9%]{3,}/g);
238 if (candidates === null) 252 if (candidates === null)
239 candidates = []; 253 candidates = [];
240 candidates.push(""); 254 candidates.push("");
241 for (let i = 0, l = candidates.length; i < l; i++) 255 for (let i = 0, l = candidates.length; i < l; i++)
242 { 256 {
243 let result = this._checkEntryMatch(candidates[i], location, typeMask, 257 let result = this.checkEntryMatch(candidates[i], location, typeMask,
244 docDomain, thirdParty, sitekey, 258 docDomain, thirdParty, sitekey,
245 specificOnly); 259 specificOnly);
246 if (result) 260 if (result)
247 return result; 261 return result;
248 } 262 }
249 263
250 return null; 264 return null;
251 } 265 }
252 } 266 }
253 267
254 exports.Matcher = Matcher; 268 exports.Matcher = Matcher;
255 269
256 /** 270 /**
257 * Combines a matcher for blocking and exception rules, automatically sorts 271 * Combines a matcher for blocking and exception rules, automatically sorts
258 * rules into two {@link Matcher} instances. 272 * rules into two {@link Matcher} instances.
259 */ 273 */
260 class CombinedMatcher 274 class CombinedMatcher
261 { 275 {
262 constructor() 276 constructor()
263 { 277 {
264 /** 278 /**
265 * Maximal number of matching cache entries to be kept 279 * Maximal number of matching cache entries to be kept
266 * @type {number} 280 * @type {number}
267 */ 281 */
268 this.maxCacheEntries = 1000; 282 this.maxCacheEntries = 1000;
269 283
270 /** 284 /**
271 * Matcher for blocking rules. 285 * Matcher for blocking rules.
272 * @type {Matcher} 286 * @type {Matcher}
273 */ 287 * @private
274 this.blacklist = new Matcher(); 288 */
289 this._blacklist = new Matcher();
275 290
276 /** 291 /**
277 * Matcher for exception rules. 292 * Matcher for exception rules.
278 * @type {Matcher} 293 * @type {Matcher}
279 */ 294 * @private
280 this.whitelist = new Matcher(); 295 */
296 this._whitelist = new Matcher();
281 297
282 /** 298 /**
283 * Lookup table of previous {@link Matcher#matchesAny} results 299 * Lookup table of previous {@link Matcher#matchesAny} results
284 * @type {Map.<string,Filter>} 300 * @type {Map.<string,Filter>}
285 */ 301 * @private
286 this.resultCache = new Map(); 302 */
303 this._resultCache = new Map();
287 } 304 }
288 305
289 /** 306 /**
290 * @see Matcher#clear 307 * @see Matcher#clear
291 */ 308 */
292 clear() 309 clear()
293 { 310 {
294 this.blacklist.clear(); 311 this._blacklist.clear();
295 this.whitelist.clear(); 312 this._whitelist.clear();
296 this.resultCache.clear(); 313 this._resultCache.clear();
297 } 314 }
298 315
299 /** 316 /**
300 * @see Matcher#add 317 * @see Matcher#add
301 * @param {Filter} filter 318 * @param {Filter} filter
302 */ 319 */
303 add(filter) 320 add(filter)
304 { 321 {
305 if (filter instanceof WhitelistFilter) 322 if (filter instanceof WhitelistFilter)
306 this.whitelist.add(filter); 323 this._whitelist.add(filter);
307 else 324 else
308 this.blacklist.add(filter); 325 this._blacklist.add(filter);
309 326
310 this.resultCache.clear(); 327 this._resultCache.clear();
311 } 328 }
312 329
313 /** 330 /**
314 * @see Matcher#remove 331 * @see Matcher#remove
315 * @param {Filter} filter 332 * @param {Filter} filter
316 */ 333 */
317 remove(filter) 334 remove(filter)
318 { 335 {
319 if (filter instanceof WhitelistFilter) 336 if (filter instanceof WhitelistFilter)
320 this.whitelist.remove(filter); 337 this._whitelist.remove(filter);
321 else 338 else
322 this.blacklist.remove(filter); 339 this._blacklist.remove(filter);
323 340
324 this.resultCache.clear(); 341 this._resultCache.clear();
325 } 342 }
326 343
327 /** 344 /**
328 * @see Matcher#findKeyword 345 * @see Matcher#findKeyword
329 * @param {Filter} filter 346 * @param {Filter} filter
330 * @returns {string} keyword 347 * @returns {string} keyword
348 * @protected
331 */ 349 */
332 findKeyword(filter) 350 findKeyword(filter)
333 { 351 {
334 if (filter instanceof WhitelistFilter) 352 if (filter instanceof WhitelistFilter)
335 return this.whitelist.findKeyword(filter); 353 return this._whitelist.findKeyword(filter);
336 return this.blacklist.findKeyword(filter); 354 return this._blacklist.findKeyword(filter);
337 } 355 }
338 356
339 /** 357 /**
340 * Optimized filter matching testing both whitelist and blacklist matchers 358 * Optimized filter matching testing both whitelist and blacklist matchers
341 * simultaneously. For parameters see 359 * simultaneously. For parameters see
342 {@link Matcher#matchesAny Matcher.matchesAny()}. 360 {@link Matcher#matchesAny Matcher.matchesAny()}.
343 * @see Matcher#matchesAny 361 * @see Matcher#matchesAny
344 * @inheritdoc 362 * @inheritdoc
345 */ 363 * @private
346 matchesAnyInternal(location, typeMask, docDomain, thirdParty, sitekey, 364 */
347 specificOnly) 365 _matchesAnyInternal(location, typeMask, docDomain, thirdParty, sitekey,
366 specificOnly)
348 { 367 {
349 let candidates = location.toLowerCase().match(/[a-z0-9%]{3,}/g); 368 let candidates = location.toLowerCase().match(/[a-z0-9%]{3,}/g);
350 if (candidates === null) 369 if (candidates === null)
351 candidates = []; 370 candidates = [];
352 candidates.push(""); 371 candidates.push("");
353 372
373 let whitelistHit = null;
354 let blacklistHit = null; 374 let blacklistHit = null;
355 for (let i = 0, l = candidates.length; i < l; i++) 375
356 { 376 // If the type mask includes no types other than whitelist-only types, we
357 let substr = candidates[i]; 377 // can skip the blacklist.
358 let result = this.whitelist._checkEntryMatch( 378 if ((typeMask & ~WHITELIST_ONLY_TYPES) != 0)
359 substr, location, typeMask, docDomain, thirdParty, sitekey 379 {
360 ); 380 for (let i = 0, l = candidates.length; !blacklistHit && i < l; i++)
361 if (result)
362 return result;
363 if (blacklistHit === null)
364 { 381 {
365 blacklistHit = this.blacklist._checkEntryMatch( 382 blacklistHit = this._blacklist.checkEntryMatch(candidates[i], location,
366 substr, location, typeMask, docDomain, thirdParty, sitekey, 383 typeMask, docDomain,
367 specificOnly 384 thirdParty, sitekey,
368 ); 385 specificOnly);
369 } 386 }
370 } 387 }
371 return blacklistHit; 388
389 // If the type mask includes any whitelist-only types, we need to check the
390 // whitelist.
391 if (blacklistHit || (typeMask & WHITELIST_ONLY_TYPES) != 0)
392 {
393 for (let i = 0, l = candidates.length; !whitelistHit && i < l; i++)
394 {
395 whitelistHit = this._whitelist.checkEntryMatch(candidates[i], location,
396 typeMask, docDomain,
397 thirdParty, sitekey);
398 }
399 }
400
401 return whitelistHit || blacklistHit;
372 } 402 }
373 403
374 /** 404 /**
375 * @see Matcher#matchesAny 405 * @see Matcher#matchesAny
376 * @inheritdoc 406 * @inheritdoc
377 */ 407 */
378 matchesAny(location, typeMask, docDomain, thirdParty, sitekey, specificOnly) 408 matchesAny(location, typeMask, docDomain, thirdParty, sitekey, specificOnly)
379 { 409 {
380 let key = location + " " + typeMask + " " + docDomain + " " + thirdParty + 410 let key = location + " " + typeMask + " " + docDomain + " " + thirdParty +
381 " " + sitekey + " " + specificOnly; 411 " " + sitekey + " " + specificOnly;
382 412
383 let result = this.resultCache.get(key); 413 let result = this._resultCache.get(key);
384 if (typeof result != "undefined") 414 if (typeof result != "undefined")
385 return result; 415 return result;
386 416
387 result = this.matchesAnyInternal(location, typeMask, docDomain, 417 result = this._matchesAnyInternal(location, typeMask, docDomain,
388 thirdParty, sitekey, specificOnly); 418 thirdParty, sitekey, specificOnly);
389 419
390 if (this.resultCache.size >= this.maxCacheEntries) 420 if (this._resultCache.size >= this.maxCacheEntries)
391 this.resultCache.clear(); 421 this._resultCache.clear();
392 422
393 this.resultCache.set(key, result); 423 this._resultCache.set(key, result);
394 424
395 return result; 425 return result;
396 } 426 }
397 } 427 }
398 428
399 exports.CombinedMatcher = CombinedMatcher; 429 exports.CombinedMatcher = CombinedMatcher;
400 430
401 /** 431 /**
402 * Shared {@link CombinedMatcher} instance that should usually be used. 432 * Shared {@link CombinedMatcher} instance that should usually be used.
403 * @type {CombinedMatcher} 433 * @type {CombinedMatcher}
404 */ 434 */
405 let defaultMatcher = new CombinedMatcher(); 435 let defaultMatcher = new CombinedMatcher();
406 436
407 exports.defaultMatcher = defaultMatcher; 437 exports.defaultMatcher = defaultMatcher;
LEFTRIGHT

Powered by Google App Engine
This is Rietveld