Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Delta Between Two Patch Sets: lib/matcher.js

Issue 29907586: Issue 6994 - Use shortcut matching for location only filters (Closed)
Left Patch Set: Created Oct. 12, 2018, 4:56 a.m.
Right Patch Set: Address PS4, and rebase Created Oct. 24, 2018, 8:40 p.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
Left: Side by side diff | Download
Right: Side by side diff | Download
LEFTRIGHT
1 /* 1 /*
2 * This file is part of Adblock Plus <https://adblockplus.org/>, 2 * This file is part of Adblock Plus <https://adblockplus.org/>,
3 * Copyright (C) 2006-present eyeo GmbH 3 * Copyright (C) 2006-present eyeo GmbH
4 * 4 *
5 * Adblock Plus is free software: you can redistribute it and/or modify 5 * Adblock Plus is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 3 as 6 * it under the terms of the GNU General Public License version 3 as
7 * published by the Free Software Foundation. 7 * published by the Free Software Foundation.
8 * 8 *
9 * Adblock Plus is distributed in the hope that it will be useful, 9 * Adblock Plus is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details. 12 * GNU General Public License for more details.
13 * 13 *
14 * You should have received a copy of the GNU General Public License 14 * You should have received a copy of the GNU General Public License
15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. 15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.
16 */ 16 */
17 17
18 "use strict"; 18 "use strict";
19 19
20 /** 20 /**
21 * @fileOverview Matcher class implementing matching addresses against 21 * @fileOverview Matcher class implementing matching addresses against
22 * a list of filters. 22 * a list of filters.
23 */ 23 */
24 24
25 const {WhitelistFilter} = require("./filterClasses"); 25 const {RegExpFilter, WhitelistFilter} = require("./filterClasses");
26 26
27 /** 27 /**
28 * Regular expression for matching a keyword in a filter. 28 * Regular expression for matching a keyword in a filter.
29 * @type {RegExp} 29 * @type {RegExp}
30 */ 30 */
31 const keywordRegExp = /[^a-z0-9%*][a-z0-9%]{3,}(?=[^a-z0-9%*])/; 31 const keywordRegExp = /[^a-z0-9%*][a-z0-9%]{3,}(?=[^a-z0-9%*])/;
32 32
33 /** 33 /**
34 * Regular expression for matching all keywords in a filter. 34 * Regular expression for matching all keywords in a filter.
35 * @type {RegExp} 35 * @type {RegExp}
36 */ 36 */
37 const allKeywordsRegExp = new RegExp(keywordRegExp, "g"); 37 const allKeywordsRegExp = new RegExp(keywordRegExp, "g");
38
39 /**
40 * Bitmask for "types" that are for exception rules only, like
41 * <code>$document</code>, <code>$elemhide</code>, and so on.
42 * @type {number}
43 */
44 const WHITELIST_ONLY_TYPES = RegExpFilter.typeMap.DOCUMENT |
45 RegExpFilter.typeMap.ELEMHIDE |
46 RegExpFilter.typeMap.GENERICHIDE |
47 RegExpFilter.typeMap.GENERICBLOCK;
38 48
39 /** 49 /**
40 * Checks whether a particular filter is slow. 50 * Checks whether a particular filter is slow.
41 * @param {RegExpFilter} filter 51 * @param {RegExpFilter} filter
42 * @returns {boolean} 52 * @returns {boolean}
43 */ 53 */
44 function isSlowFilter(filter) 54 function isSlowFilter(filter)
45 { 55 {
46 return !filter.pattern || !keywordRegExp.test(filter.pattern); 56 return !filter.pattern || !keywordRegExp.test(filter.pattern);
47 } 57 }
48 58
49 exports.isSlowFilter = isSlowFilter; 59 exports.isSlowFilter = isSlowFilter;
50 60
51 /** 61 /**
52 * Blacklist/whitelist filter matching 62 * Blacklist/whitelist filter matching
53 */ 63 */
54 class Matcher 64 class Matcher
55 { 65 {
56 constructor() 66 constructor()
57 { 67 {
58 /** 68 /**
59 * Lookup table for filters by their associated keyword 69 * Lookup table for filters by their associated keyword
60 * @type {Map.<string,(Filter|Set.<Filter>)>} 70 * @type {Map.<string,(Filter|Set.<Filter>)>}
61 */ 71 * @private
62 this.filterByKeyword = new Map(); 72 */
73 this._filterByKeyword = new Map();
74
75 /**
76 * Lookup table for location only filters by their associated keyword
Manish Jethani 2018/10/24 21:35:28 Nit: I don't know if this is right, but I would ha
Jon Sonesen 2018/10/24 21:46:51 Acknowledged.
77 * for shortcut matching.
78 * @private
Manish Jethani 2018/10/24 21:35:28 Let's put @private last.
Jon Sonesen 2018/10/24 21:46:52 Acknowledged.
79 * @type {Map.<string,(Filter|Set.<Filter>)>}
80 */
81 this._fastFilterByKeyword = new Map();
Manish Jethani 2018/10/24 21:35:28 I'm thinking about the nomenclature here, about ho
Jon Sonesen 2018/10/24 21:46:52 I like this, it makes more sense from a reader's p
63 } 82 }
64 83
65 /** 84 /**
66 * Removes all known filters 85 * Removes all known filters
67 */ 86 */
68 clear() 87 clear()
69 { 88 {
70 this.filterByKeyword.clear(); 89 this._filterByKeyword.clear();
90 this._fastFilterByKeyword.clear();
71 } 91 }
72 92
73 /** 93 /**
74 * Adds a filter to the matcher 94 * Adds a filter to the matcher
75 * @param {RegExpFilter} filter 95 * @param {RegExpFilter} filter
76 */ 96 */
77 add(filter) 97 add(filter)
78 { 98 {
79 // Look for a suitable keyword 99 // Look for a suitable keyword
80 let keyword = this.findKeyword(filter); 100 let keyword = this.findKeyword(filter);
81 let set = this.filterByKeyword.get(keyword); 101 let filterMap = filter.isLocationOnly() ? this._fastFilterByKeyword :
102 this._filterByKeyword;
103 let set = filterMap.get(keyword);
104
82 if (typeof set == "undefined") 105 if (typeof set == "undefined")
83 { 106 {
84 this.filterByKeyword.set(keyword, filter); 107 filterMap.set(keyword, filter);
85 } 108 }
86 else if (set.size == 1) 109 else if (set.size == 1)
87 { 110 {
88 if (filter != set) 111 if (filter != set)
89 this.filterByKeyword.set(keyword, new Set([set, filter])); 112 filterMap.set(keyword, new Set([set, filter]));
90 } 113 }
91 else 114 else
92 { 115 {
93 set.add(filter); 116 set.add(filter);
94 } 117 }
95 } 118 }
96 119
97 /** 120 /**
98 * Removes a filter from the matcher 121 * Removes a filter from the matcher
99 * @param {RegExpFilter} filter 122 * @param {RegExpFilter} filter
100 */ 123 */
101 remove(filter) 124 remove(filter)
102 { 125 {
103 let keyword = this.findKeyword(filter); 126 let keyword = this.findKeyword(filter);
104 let set = this.filterByKeyword.get(keyword); 127 let filterMap = filter.isLocationOnly() ? this._fastFilterByKeyword :
128 this._filterByKeyword;
129 let set = filterMap.get(keyword);
130
105 if (typeof set == "undefined") 131 if (typeof set == "undefined")
106 return; 132 return;
107 133
108 if (set.size == 1) 134 if (set.size == 1)
109 { 135 {
110 if (filter == set) 136 if (filter == set)
111 this.filterByKeyword.delete(keyword); 137 filterMap.delete(keyword);
112 } 138 }
113 else 139 else
114 { 140 {
115 set.delete(filter); 141 set.delete(filter);
116 142
117 if (set.size == 1) 143 if (set.size == 1)
118 this.filterByKeyword.set(keyword, [...set][0]); 144 filterMap.set(keyword, [...set][0]);
119 } 145 }
120 } 146 }
121 147
122 /** 148 /**
123 * Chooses a keyword to be associated with the filter 149 * Chooses a keyword to be associated with the filter
124 * @param {Filter} filter 150 * @param {Filter} filter
125 * @returns {string} keyword or an empty string if no keyword could be found 151 * @returns {string} keyword or an empty string if no keyword could be found
152 * @protected
126 */ 153 */
127 findKeyword(filter) 154 findKeyword(filter)
128 { 155 {
129 let result = ""; 156 let result = "";
130 let {pattern} = filter; 157 let {pattern} = filter;
131 if (pattern == null) 158 if (pattern == null)
132 return result; 159 return result;
133 160
134 let candidates = pattern.toLowerCase().match(allKeywordsRegExp); 161 let candidates = pattern.toLowerCase().match(allKeywordsRegExp);
135 if (!candidates) 162 if (!candidates)
136 return result; 163 return result;
137 164
138 let hash = this.filterByKeyword; 165 let hash = filter.isLocationOnly() ? this._fastFilterByKeyword :
166 this._filterByKeyword;
167
139 let resultCount = 0xFFFFFF; 168 let resultCount = 0xFFFFFF;
140 let resultLength = 0; 169 let resultLength = 0;
141 for (let i = 0, l = candidates.length; i < l; i++) 170 for (let i = 0, l = candidates.length; i < l; i++)
142 { 171 {
143 let candidate = candidates[i].substr(1); 172 let candidate = candidates[i].substr(1);
144 let filters = hash.get(candidate); 173 let filters = hash.get(candidate);
145 let count = typeof filters != "undefined" ? filters.size : 0; 174 let count = typeof filters != "undefined" ? filters.size : 0;
Manish Jethani 2018/10/25 00:49:42 We need `count` to be the sum of `this._simpleFilt
146 if (count < resultCount || 175 if (count < resultCount ||
147 (count == resultCount && candidate.length > resultLength)) 176 (count == resultCount && candidate.length > resultLength))
148 { 177 {
149 result = candidate; 178 result = candidate;
150 resultCount = count; 179 resultCount = count;
151 resultLength = candidate.length; 180 resultLength = candidate.length;
152 } 181 }
153 } 182 }
154 return result; 183 return result;
155 } 184 }
156 185
157 /** 186 /**
158 * Checks whether the entries for a particular keyword match a URL 187 * Checks whether the entries for a particular keyword match a URL
159 * @param {string} keyword 188 * @param {string} keyword
160 * @param {string} location 189 * @param {string} location
161 * @param {number} typeMask 190 * @param {number} typeMask
162 * @param {string} [docDomain] 191 * @param {string} [docDomain]
163 * @param {boolean} [thirdParty] 192 * @param {boolean} [thirdParty]
164 * @param {string} [sitekey] 193 * @param {string} [sitekey]
165 * @param {boolean} [specificOnly] 194 * @param {boolean} [specificOnly]
166 * @returns {?Filter} 195 * @returns {?Filter}
167 */ 196 * @protected
168 _checkEntryMatch(keyword, location, typeMask, docDomain, thirdParty, sitekey, 197 */
169 specificOnly) 198 checkEntryMatch(keyword, location, typeMask, docDomain, thirdParty, sitekey,
170 { 199 specificOnly)
171 let set = this.filterByKeyword.get(keyword); 200 {
201 let set = this._filterByKeyword.get(keyword);
172 if (typeof set == "undefined") 202 if (typeof set == "undefined")
Manish Jethani 2018/10/25 00:49:41 This is clearly wrong. We want to check all filter
203 {
204 let fastSet = this._fastFilterByKeyword.get(keyword);
205 if (typeof fastSet == "undefined")
206 return null;
207
208 for (let filter of fastSet)
209 {
210 if (specificOnly && filter.isGeneric() &&
211 !(filter instanceof WhitelistFilter))
212 continue;
213
214 if (filter.matchesLocation(location))
215 return filter;
216 }
173 return null; 217 return null;
218 }
174 219
175 for (let filter of set) 220 for (let filter of set)
Manish Jethani 2018/10/13 13:28:45 The idea was to have two loops, one for the locati
Jon Sonesen 2018/10/20 23:07:55 Ack
176 { 221 {
177 if (typeMask == 0)
178 continue;
179
180 if (specificOnly && filter.isGeneric() && 222 if (specificOnly && filter.isGeneric() &&
181 !(filter instanceof WhitelistFilter)) 223 !(filter instanceof WhitelistFilter))
182 continue; 224 continue;
183
184 if (!sitekey || !docDomain || !thirdParty)
185 {
Manish Jethani 2018/10/13 13:28:45 This seems wrong.
Jon Sonesen 2018/10/20 23:07:55 Yeah, super wrong. My bad
186 if (filter.matches(location))
187 return filter;
188 }
189 225
190 if (filter.matches(location, typeMask, docDomain, thirdParty, sitekey)) 226 if (filter.matches(location, typeMask, docDomain, thirdParty, sitekey))
191 return filter; 227 return filter;
192 } 228 }
193 return null; 229 return null;
194 } 230 }
195 231
196 /** 232 /**
197 * Tests whether the URL matches any of the known filters 233 * Tests whether the URL matches any of the known filters
198 * @param {string} location 234 * @param {string} location
(...skipping 12 matching lines...) Expand all
211 * matching filter or <code>null</code> 247 * matching filter or <code>null</code>
212 */ 248 */
213 matchesAny(location, typeMask, docDomain, thirdParty, sitekey, specificOnly) 249 matchesAny(location, typeMask, docDomain, thirdParty, sitekey, specificOnly)
214 { 250 {
215 let candidates = location.toLowerCase().match(/[a-z0-9%]{3,}/g); 251 let candidates = location.toLowerCase().match(/[a-z0-9%]{3,}/g);
216 if (candidates === null) 252 if (candidates === null)
217 candidates = []; 253 candidates = [];
218 candidates.push(""); 254 candidates.push("");
219 for (let i = 0, l = candidates.length; i < l; i++) 255 for (let i = 0, l = candidates.length; i < l; i++)
220 { 256 {
221 let result = this._checkEntryMatch(candidates[i], location, typeMask, 257 let result = this.checkEntryMatch(candidates[i], location, typeMask,
222 docDomain, thirdParty, sitekey, 258 docDomain, thirdParty, sitekey,
223 specificOnly); 259 specificOnly);
224 if (result) 260 if (result)
225 return result; 261 return result;
226 } 262 }
227 263
228 return null; 264 return null;
229 } 265 }
230 } 266 }
231 267
232 exports.Matcher = Matcher; 268 exports.Matcher = Matcher;
233 269
234 /** 270 /**
235 * Combines a matcher for blocking and exception rules, automatically sorts 271 * Combines a matcher for blocking and exception rules, automatically sorts
236 * rules into two {@link Matcher} instances. 272 * rules into two {@link Matcher} instances.
237 */ 273 */
238 class CombinedMatcher 274 class CombinedMatcher
239 { 275 {
240 constructor() 276 constructor()
241 { 277 {
242 /** 278 /**
243 * Maximal number of matching cache entries to be kept 279 * Maximal number of matching cache entries to be kept
244 * @type {number} 280 * @type {number}
245 */ 281 */
246 this.maxCacheEntries = 1000; 282 this.maxCacheEntries = 1000;
247 283
248 /** 284 /**
249 * Matcher for blocking rules. 285 * Matcher for blocking rules.
250 * @type {Matcher} 286 * @type {Matcher}
251 */ 287 * @private
252 this.blacklist = new Matcher(); 288 */
289 this._blacklist = new Matcher();
253 290
254 /** 291 /**
255 * Matcher for exception rules. 292 * Matcher for exception rules.
256 * @type {Matcher} 293 * @type {Matcher}
257 */ 294 * @private
258 this.whitelist = new Matcher(); 295 */
296 this._whitelist = new Matcher();
259 297
260 /** 298 /**
261 * Lookup table of previous {@link Matcher#matchesAny} results 299 * Lookup table of previous {@link Matcher#matchesAny} results
262 * @type {Map.<string,Filter>} 300 * @type {Map.<string,Filter>}
263 */ 301 * @private
264 this.resultCache = new Map(); 302 */
303 this._resultCache = new Map();
265 } 304 }
266 305
267 /** 306 /**
268 * @see Matcher#clear 307 * @see Matcher#clear
269 */ 308 */
270 clear() 309 clear()
271 { 310 {
272 this.blacklist.clear(); 311 this._blacklist.clear();
273 this.whitelist.clear(); 312 this._whitelist.clear();
274 this.resultCache.clear(); 313 this._resultCache.clear();
275 } 314 }
276 315
277 /** 316 /**
278 * @see Matcher#add 317 * @see Matcher#add
279 * @param {Filter} filter 318 * @param {Filter} filter
280 */ 319 */
281 add(filter) 320 add(filter)
282 { 321 {
283 if (filter instanceof WhitelistFilter) 322 if (filter instanceof WhitelistFilter)
284 this.whitelist.add(filter); 323 this._whitelist.add(filter);
285 else 324 else
286 this.blacklist.add(filter); 325 this._blacklist.add(filter);
287 326
288 this.resultCache.clear(); 327 this._resultCache.clear();
289 } 328 }
290 329
291 /** 330 /**
292 * @see Matcher#remove 331 * @see Matcher#remove
293 * @param {Filter} filter 332 * @param {Filter} filter
294 */ 333 */
295 remove(filter) 334 remove(filter)
296 { 335 {
297 if (filter instanceof WhitelistFilter) 336 if (filter instanceof WhitelistFilter)
298 this.whitelist.remove(filter); 337 this._whitelist.remove(filter);
299 else 338 else
300 this.blacklist.remove(filter); 339 this._blacklist.remove(filter);
301 340
302 this.resultCache.clear(); 341 this._resultCache.clear();
303 } 342 }
304 343
305 /** 344 /**
306 * @see Matcher#findKeyword 345 * @see Matcher#findKeyword
307 * @param {Filter} filter 346 * @param {Filter} filter
308 * @returns {string} keyword 347 * @returns {string} keyword
348 * @protected
309 */ 349 */
310 findKeyword(filter) 350 findKeyword(filter)
311 { 351 {
312 if (filter instanceof WhitelistFilter) 352 if (filter instanceof WhitelistFilter)
313 return this.whitelist.findKeyword(filter); 353 return this._whitelist.findKeyword(filter);
314 return this.blacklist.findKeyword(filter); 354 return this._blacklist.findKeyword(filter);
315 } 355 }
316 356
317 /** 357 /**
318 * Optimized filter matching testing both whitelist and blacklist matchers 358 * Optimized filter matching testing both whitelist and blacklist matchers
319 * simultaneously. For parameters see 359 * simultaneously. For parameters see
320 {@link Matcher#matchesAny Matcher.matchesAny()}. 360 {@link Matcher#matchesAny Matcher.matchesAny()}.
321 * @see Matcher#matchesAny 361 * @see Matcher#matchesAny
322 * @inheritdoc 362 * @inheritdoc
323 */ 363 * @private
324 matchesAnyInternal(location, typeMask, docDomain, thirdParty, sitekey, 364 */
325 specificOnly) 365 _matchesAnyInternal(location, typeMask, docDomain, thirdParty, sitekey,
366 specificOnly)
326 { 367 {
327 let candidates = location.toLowerCase().match(/[a-z0-9%]{3,}/g); 368 let candidates = location.toLowerCase().match(/[a-z0-9%]{3,}/g);
328 if (candidates === null) 369 if (candidates === null)
329 candidates = []; 370 candidates = [];
330 candidates.push(""); 371 candidates.push("");
331 372
373 let whitelistHit = null;
332 let blacklistHit = null; 374 let blacklistHit = null;
333 for (let i = 0, l = candidates.length; i < l; i++) 375
334 { 376 // If the type mask includes no types other than whitelist-only types, we
335 let substr = candidates[i]; 377 // can skip the blacklist.
336 let result = this.whitelist._checkEntryMatch( 378 if ((typeMask & ~WHITELIST_ONLY_TYPES) != 0)
337 substr, location, typeMask, docDomain, thirdParty, sitekey 379 {
338 ); 380 for (let i = 0, l = candidates.length; !blacklistHit && i < l; i++)
339 if (result)
340 return result;
341 if (blacklistHit === null)
342 { 381 {
343 blacklistHit = this.blacklist._checkEntryMatch( 382 blacklistHit = this._blacklist.checkEntryMatch(candidates[i], location,
344 substr, location, typeMask, docDomain, thirdParty, sitekey, 383 typeMask, docDomain,
345 specificOnly 384 thirdParty, sitekey,
346 ); 385 specificOnly);
347 } 386 }
348 } 387 }
349 return blacklistHit; 388
389 // If the type mask includes any whitelist-only types, we need to check the
390 // whitelist.
391 if (blacklistHit || (typeMask & WHITELIST_ONLY_TYPES) != 0)
392 {
393 for (let i = 0, l = candidates.length; !whitelistHit && i < l; i++)
394 {
395 whitelistHit = this._whitelist.checkEntryMatch(candidates[i], location,
396 typeMask, docDomain,
397 thirdParty, sitekey);
398 }
399 }
400
401 return whitelistHit || blacklistHit;
350 } 402 }
351 403
352 /** 404 /**
353 * @see Matcher#matchesAny 405 * @see Matcher#matchesAny
354 * @inheritdoc 406 * @inheritdoc
355 */ 407 */
356 matchesAny(location, typeMask, docDomain, thirdParty, sitekey, specificOnly) 408 matchesAny(location, typeMask, docDomain, thirdParty, sitekey, specificOnly)
357 { 409 {
358 let key = location + " " + typeMask + " " + docDomain + " " + thirdParty + 410 let key = location + " " + typeMask + " " + docDomain + " " + thirdParty +
359 " " + sitekey + " " + specificOnly; 411 " " + sitekey + " " + specificOnly;
360 412
361 let result = this.resultCache.get(key); 413 let result = this._resultCache.get(key);
362 if (typeof result != "undefined") 414 if (typeof result != "undefined")
363 return result; 415 return result;
364 416
365 result = this.matchesAnyInternal(location, typeMask, docDomain, 417 result = this._matchesAnyInternal(location, typeMask, docDomain,
366 thirdParty, sitekey, specificOnly); 418 thirdParty, sitekey, specificOnly);
367 419
368 if (this.resultCache.size >= this.maxCacheEntries) 420 if (this._resultCache.size >= this.maxCacheEntries)
369 this.resultCache.clear(); 421 this._resultCache.clear();
370 422
371 this.resultCache.set(key, result); 423 this._resultCache.set(key, result);
372 424
373 return result; 425 return result;
374 } 426 }
375 } 427 }
376 428
377 exports.CombinedMatcher = CombinedMatcher; 429 exports.CombinedMatcher = CombinedMatcher;
378 430
379 /** 431 /**
380 * Shared {@link CombinedMatcher} instance that should usually be used. 432 * Shared {@link CombinedMatcher} instance that should usually be used.
381 * @type {CombinedMatcher} 433 * @type {CombinedMatcher}
382 */ 434 */
383 let defaultMatcher = new CombinedMatcher(); 435 let defaultMatcher = new CombinedMatcher();
384 436
385 exports.defaultMatcher = defaultMatcher; 437 exports.defaultMatcher = defaultMatcher;
LEFTRIGHT

Powered by Google App Engine
This is Rietveld