Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Delta Between Two Patch Sets: lib/matcher.js

Issue 29897555: Issue 6940 - Use underscore prefixes lib/matcher.js (Closed)
Left Patch Set: Created Oct. 1, 2018, 5:46 a.m.
Right Patch Set: Address PS6 comment Created Oct. 24, 2018, 6:47 p.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
Left: Side by side diff | Download
Right: Side by side diff | Download
« no previous file with change/comment | « no previous file | test/filterListener.js » ('j') | no next file with change/comment »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
LEFTRIGHT
1 /* 1 /*
2 * This file is part of Adblock Plus <https://adblockplus.org/>, 2 * This file is part of Adblock Plus <https://adblockplus.org/>,
3 * Copyright (C) 2006-present eyeo GmbH 3 * Copyright (C) 2006-present eyeo GmbH
4 * 4 *
5 * Adblock Plus is free software: you can redistribute it and/or modify 5 * Adblock Plus is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 3 as 6 * it under the terms of the GNU General Public License version 3 as
7 * published by the Free Software Foundation. 7 * published by the Free Software Foundation.
8 * 8 *
9 * Adblock Plus is distributed in the hope that it will be useful, 9 * Adblock Plus is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details. 12 * GNU General Public License for more details.
13 * 13 *
14 * You should have received a copy of the GNU General Public License 14 * You should have received a copy of the GNU General Public License
15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. 15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.
16 */ 16 */
17 17
18 "use strict"; 18 "use strict";
19 19
20 /** 20 /**
21 * @fileOverview Matcher class implementing matching addresses against 21 * @fileOverview Matcher class implementing matching addresses against
22 * a list of filters. 22 * a list of filters.
23 */ 23 */
24 24
25 const {WhitelistFilter} = require("./filterClasses"); 25 const {RegExpFilter, WhitelistFilter} = require("./filterClasses");
26
27 /**
28 * Regular expression for matching a keyword in a filter.
29 * @type {RegExp}
30 */
31 const keywordRegExp = /[^a-z0-9%*][a-z0-9%]{3,}(?=[^a-z0-9%*])/;
32
33 /**
34 * Regular expression for matching all keywords in a filter.
35 * @type {RegExp}
36 */
37 const allKeywordsRegExp = new RegExp(keywordRegExp, "g");
38
39 /**
40 * Bitmask for "types" that are for exception rules only, like
41 * <code>$document</code>, <code>$elemhide</code>, and so on.
42 * @type {number}
43 */
44 const WHITELIST_ONLY_TYPES = RegExpFilter.typeMap.DOCUMENT |
45 RegExpFilter.typeMap.ELEMHIDE |
46 RegExpFilter.typeMap.GENERICHIDE |
47 RegExpFilter.typeMap.GENERICBLOCK;
48
49 /**
50 * Checks whether a particular filter is slow.
51 * @param {RegExpFilter} filter
52 * @returns {boolean}
53 */
54 function isSlowFilter(filter)
55 {
56 return !filter.pattern || !keywordRegExp.test(filter.pattern);
57 }
58
59 exports.isSlowFilter = isSlowFilter;
26 60
27 /** 61 /**
28 * Blacklist/whitelist filter matching 62 * Blacklist/whitelist filter matching
29 */ 63 */
30 class Matcher 64 class Matcher
31 { 65 {
32 constructor() 66 constructor()
33 { 67 {
34 /** 68 /**
35 * Lookup table for filters by their associated keyword 69 * Lookup table for filters by their associated keyword
36 * @type {Map.<string,(Filter|Filter[])>} 70 * @type {Map.<string,(Filter|Set.<Filter>)>}
71 * @private
37 */ 72 */
38 this.filterByKeyword = new Map(); 73 this._filterByKeyword = new Map();
39
40 /**
41 * Lookup table for keywords by the filter
42 * @type {Map.<Filter,string>}
43 */
44 this._keywordByFilter = new Map();
Manish Jethani 2018/10/01 15:08:59 I think we should add the `@private` tag here. Ot
Manish Jethani 2018/10/01 15:10:00 I'm wondering where exactly it should be added. I
Jon Sonesen 2018/10/21 03:07:07 Done.
45 } 74 }
46 75
47 /** 76 /**
48 * Removes all known filters 77 * Removes all known filters
49 */ 78 */
50 clear() 79 clear()
51 { 80 {
52 this.filterByKeyword.clear(); 81 this._filterByKeyword.clear();
53 this._keywordByFilter.clear();
54 } 82 }
55 83
56 /** 84 /**
57 * Adds a filter to the matcher 85 * Adds a filter to the matcher
58 * @param {RegExpFilter} filter 86 * @param {RegExpFilter} filter
59 */ 87 */
60 add(filter) 88 add(filter)
61 { 89 {
62 if (this._keywordByFilter.has(filter))
63 return;
64
65 // Look for a suitable keyword 90 // Look for a suitable keyword
66 let keyword = this.findKeyword(filter); 91 let keyword = this.findKeyword(filter);
67 let oldEntry = this.filterByKeyword.get(keyword); 92 let set = this._filterByKeyword.get(keyword);
68 if (typeof oldEntry == "undefined") 93 if (typeof set == "undefined")
69 this.filterByKeyword.set(keyword, filter); 94 {
70 else if (oldEntry.length == 1) 95 this._filterByKeyword.set(keyword, filter);
71 this.filterByKeyword.set(keyword, [oldEntry, filter]); 96 }
97 else if (set.size == 1)
98 {
99 if (filter != set)
100 this._filterByKeyword.set(keyword, new Set([set, filter]));
101 }
72 else 102 else
73 oldEntry.push(filter); 103 {
74 this._keywordByFilter.set(filter, keyword); 104 set.add(filter);
105 }
75 } 106 }
76 107
77 /** 108 /**
78 * Removes a filter from the matcher 109 * Removes a filter from the matcher
79 * @param {RegExpFilter} filter 110 * @param {RegExpFilter} filter
80 */ 111 */
81 remove(filter) 112 remove(filter)
82 { 113 {
83 let keyword = this._keywordByFilter.get(filter); 114 let keyword = this.findKeyword(filter);
84 if (typeof keyword == "undefined") 115 let set = this._filterByKeyword.get(keyword);
116 if (typeof set == "undefined")
85 return; 117 return;
86 118
87 let list = this.filterByKeyword.get(keyword); 119 if (set.size == 1)
88 if (list.length <= 1) 120 {
89 this.filterByKeyword.delete(keyword); 121 if (filter == set)
122 this._filterByKeyword.delete(keyword);
123 }
90 else 124 else
91 { 125 {
92 let index = list.indexOf(filter); 126 set.delete(filter);
93 if (index >= 0) 127
94 { 128 if (set.size == 1)
95 list.splice(index, 1); 129 this._filterByKeyword.set(keyword, [...set][0]);
96 if (list.length == 1) 130 }
97 this.filterByKeyword.set(keyword, list[0]);
98 }
99 }
100
101 this._keywordByFilter.delete(filter);
102 } 131 }
103 132
104 /** 133 /**
105 * Chooses a keyword to be associated with the filter 134 * Chooses a keyword to be associated with the filter
106 * @param {Filter} filter 135 * @param {Filter} filter
107 * @returns {string} keyword or an empty string if no keyword could be found 136 * @returns {string} keyword or an empty string if no keyword could be found
137 * @protected
108 */ 138 */
109 findKeyword(filter) 139 findKeyword(filter)
110 { 140 {
111 let result = ""; 141 let result = "";
112 let {pattern} = filter; 142 let {pattern} = filter;
113 if (pattern == null) 143 if (pattern == null)
114 return result; 144 return result;
115 145
116 let candidates = pattern.toLowerCase().match( 146 let candidates = pattern.toLowerCase().match(allKeywordsRegExp);
117 /[^a-z0-9%*][a-z0-9%]{3,}(?=[^a-z0-9%*])/g
118 );
119 if (!candidates) 147 if (!candidates)
120 return result; 148 return result;
121 149
122 let hash = this.filterByKeyword; 150 let hash = this._filterByKeyword;
123 let resultCount = 0xFFFFFF; 151 let resultCount = 0xFFFFFF;
124 let resultLength = 0; 152 let resultLength = 0;
125 for (let i = 0, l = candidates.length; i < l; i++) 153 for (let i = 0, l = candidates.length; i < l; i++)
126 { 154 {
127 let candidate = candidates[i].substr(1); 155 let candidate = candidates[i].substr(1);
128 let filters = hash.get(candidate); 156 let filters = hash.get(candidate);
129 let count = typeof filters != "undefined" ? filters.length : 0; 157 let count = typeof filters != "undefined" ? filters.size : 0;
130 if (count < resultCount || 158 if (count < resultCount ||
131 (count == resultCount && candidate.length > resultLength)) 159 (count == resultCount && candidate.length > resultLength))
132 { 160 {
133 result = candidate; 161 result = candidate;
134 resultCount = count; 162 resultCount = count;
135 resultLength = candidate.length; 163 resultLength = candidate.length;
136 } 164 }
137 } 165 }
138 return result; 166 return result;
139 }
140
141 /**
142 * Checks whether a particular filter is being matched against.
143 * @param {RegExpFilter} filter
144 * @returns {boolean}
145 */
146 hasFilter(filter)
147 {
148 return this._keywordByFilter.has(filter);
149 }
150
151 /**
152 * Returns the keyword used for a filter, <code>null</code>
153 * for unknown filters.
154 * @param {RegExpFilter} filter
155 * @returns {?string}
156 */
157 getKeywordForFilter(filter)
158 {
159 let keyword = this._keywordByFilter.get(filter);
160 return typeof keyword != "undefined" ? keyword : null;
161 } 167 }
162 168
163 /** 169 /**
164 * Checks whether the entries for a particular keyword match a URL 170 * Checks whether the entries for a particular keyword match a URL
165 * @param {string} keyword 171 * @param {string} keyword
166 * @param {string} location 172 * @param {string} location
167 * @param {number} typeMask 173 * @param {number} typeMask
168 * @param {string} [docDomain] 174 * @param {string} [docDomain]
169 * @param {boolean} [thirdParty] 175 * @param {boolean} [thirdParty]
170 * @param {string} [sitekey] 176 * @param {string} [sitekey]
171 * @param {boolean} [specificOnly] 177 * @param {boolean} [specificOnly]
172 * @returns {?Filter} 178 * @returns {?Filter}
173 */ 179 * @protected
174 _checkEntryMatch(keyword, location, typeMask, docDomain, thirdParty, sitekey, 180 */
175 specificOnly) 181 checkEntryMatch(keyword, location, typeMask, docDomain, thirdParty, sitekey,
176 { 182 specificOnly)
177 let list = this.filterByKeyword.get(keyword); 183 {
178 if (typeof list == "undefined") 184 let set = this._filterByKeyword.get(keyword);
185 if (typeof set == "undefined")
179 return null; 186 return null;
180 for (let i = 0; i < list.length; i++) 187
181 { 188 for (let filter of set)
182 let filter = list[i]; 189 {
183
184 if (specificOnly && filter.isGeneric() && 190 if (specificOnly && filter.isGeneric() &&
185 !(filter instanceof WhitelistFilter)) 191 !(filter instanceof WhitelistFilter))
186 continue; 192 continue;
187 193
188 if (filter.matches(location, typeMask, docDomain, thirdParty, sitekey)) 194 if (filter.matches(location, typeMask, docDomain, thirdParty, sitekey))
189 return filter; 195 return filter;
190 } 196 }
191 return null; 197 return null;
192 } 198 }
193 199
(...skipping 15 matching lines...) Expand all
209 * matching filter or <code>null</code> 215 * matching filter or <code>null</code>
210 */ 216 */
211 matchesAny(location, typeMask, docDomain, thirdParty, sitekey, specificOnly) 217 matchesAny(location, typeMask, docDomain, thirdParty, sitekey, specificOnly)
212 { 218 {
213 let candidates = location.toLowerCase().match(/[a-z0-9%]{3,}/g); 219 let candidates = location.toLowerCase().match(/[a-z0-9%]{3,}/g);
214 if (candidates === null) 220 if (candidates === null)
215 candidates = []; 221 candidates = [];
216 candidates.push(""); 222 candidates.push("");
217 for (let i = 0, l = candidates.length; i < l; i++) 223 for (let i = 0, l = candidates.length; i < l; i++)
218 { 224 {
219 let result = this._checkEntryMatch(candidates[i], location, typeMask, 225 let result = this.checkEntryMatch(candidates[i], location, typeMask,
220 docDomain, thirdParty, sitekey, 226 docDomain, thirdParty, sitekey,
221 specificOnly); 227 specificOnly);
222 if (result) 228 if (result)
223 return result; 229 return result;
224 } 230 }
225 231
226 return null; 232 return null;
227 } 233 }
228 } 234 }
229 235
230 exports.Matcher = Matcher; 236 exports.Matcher = Matcher;
231 237
232 /** 238 /**
233 * Combines a matcher for blocking and exception rules, automatically sorts 239 * Combines a matcher for blocking and exception rules, automatically sorts
234 * rules into two {@link Matcher} instances. 240 * rules into two {@link Matcher} instances.
235 */ 241 */
236 class CombinedMatcher 242 class CombinedMatcher
237 { 243 {
238 constructor() 244 constructor()
239 { 245 {
240 /** 246 /**
241 * Maximal number of matching cache entries to be kept 247 * Maximal number of matching cache entries to be kept
242 * @type {number} 248 * @type {number}
243 */ 249 */
244 this.maxCacheEntries = 1000; 250 this.maxCacheEntries = 1000;
245 251
246 /** 252 /**
247 * Matcher for blocking rules. 253 * Matcher for blocking rules.
248 * @type {Matcher} 254 * @type {Matcher}
255 * @private
249 */ 256 */
250 this.blacklist = new Matcher(); 257 this._blacklist = new Matcher();
251 258
252 /** 259 /**
253 * Matcher for exception rules. 260 * Matcher for exception rules.
254 * @type {Matcher} 261 * @type {Matcher}
262 * @private
255 */ 263 */
256 this.whitelist = new Matcher(); 264 this._whitelist = new Matcher();
257 265
258 /** 266 /**
259 * Lookup table of previous {@link Matcher#matchesAny} results 267 * Lookup table of previous {@link Matcher#matchesAny} results
260 * @type {Map.<string,Filter>} 268 * @type {Map.<string,Filter>}
269 * @private
261 */ 270 */
262 this.resultCache = new Map(); 271 this._resultCache = new Map();
263 } 272 }
264 273
265 /** 274 /**
266 * @see Matcher#clear 275 * @see Matcher#clear
267 */ 276 */
268 clear() 277 clear()
269 { 278 {
270 this.blacklist.clear(); 279 this._blacklist.clear();
271 this.whitelist.clear(); 280 this._whitelist.clear();
272 this.resultCache.clear(); 281 this._resultCache.clear();
273 } 282 }
274 283
275 /** 284 /**
276 * @see Matcher#add 285 * @see Matcher#add
277 * @param {Filter} filter 286 * @param {Filter} filter
278 */ 287 */
279 add(filter) 288 add(filter)
280 { 289 {
281 if (filter instanceof WhitelistFilter) 290 if (filter instanceof WhitelistFilter)
282 this.whitelist.add(filter); 291 this._whitelist.add(filter);
283 else 292 else
284 this.blacklist.add(filter); 293 this._blacklist.add(filter);
285 294
286 this.resultCache.clear(); 295 this._resultCache.clear();
287 } 296 }
288 297
289 /** 298 /**
290 * @see Matcher#remove 299 * @see Matcher#remove
291 * @param {Filter} filter 300 * @param {Filter} filter
292 */ 301 */
293 remove(filter) 302 remove(filter)
294 { 303 {
295 if (filter instanceof WhitelistFilter) 304 if (filter instanceof WhitelistFilter)
296 this.whitelist.remove(filter); 305 this._whitelist.remove(filter);
297 else 306 else
298 this.blacklist.remove(filter); 307 this._blacklist.remove(filter);
299 308
300 this.resultCache.clear(); 309 this._resultCache.clear();
301 } 310 }
302 311
303 /** 312 /**
304 * @see Matcher#findKeyword 313 * @see Matcher#findKeyword
305 * @param {Filter} filter 314 * @param {Filter} filter
306 * @returns {string} keyword 315 * @returns {string} keyword
316 * @protected
307 */ 317 */
308 findKeyword(filter) 318 findKeyword(filter)
309 { 319 {
310 if (filter instanceof WhitelistFilter) 320 if (filter instanceof WhitelistFilter)
311 return this.whitelist.findKeyword(filter); 321 return this._whitelist.findKeyword(filter);
312 return this.blacklist.findKeyword(filter); 322 return this._blacklist.findKeyword(filter);
313 }
314
315 /**
316 * @see Matcher#hasFilter
317 * @param {Filter} filter
318 * @returns {boolean}
319 */
320 hasFilter(filter)
321 {
322 if (filter instanceof WhitelistFilter)
323 return this.whitelist.hasFilter(filter);
324 return this.blacklist.hasFilter(filter);
325 }
326
327 /**
328 * @see Matcher#getKeywordForFilter
329 * @param {Filter} filter
330 * @returns {string} keyword
331 */
332 getKeywordForFilter(filter)
333 {
334 if (filter instanceof WhitelistFilter)
335 return this.whitelist.getKeywordForFilter(filter);
336 return this.blacklist.getKeywordForFilter(filter);
337 }
338
339 /**
340 * Checks whether a particular filter is slow
341 * @param {RegExpFilter} filter
342 * @returns {boolean}
343 */
344 isSlowFilter(filter)
345 {
346 let matcher = (
347 filter instanceof WhitelistFilter ? this.whitelist : this.blacklist
348 );
349 let keyword = matcher.getKeywordForFilter(filter);
350 if (keyword != null)
351 return !keyword;
352 return !matcher.findKeyword(filter);
353 } 323 }
354 324
355 /** 325 /**
356 * Optimized filter matching testing both whitelist and blacklist matchers 326 * Optimized filter matching testing both whitelist and blacklist matchers
357 * simultaneously. For parameters see 327 * simultaneously. For parameters see
358 {@link Matcher#matchesAny Matcher.matchesAny()}. 328 {@link Matcher#matchesAny Matcher.matchesAny()}.
359 * @see Matcher#matchesAny 329 * @see Matcher#matchesAny
360 * @inheritdoc 330 * @inheritdoc
361 */ 331 * @private
362 matchesAnyInternal(location, typeMask, docDomain, thirdParty, sitekey, 332 */
363 specificOnly) 333 _matchesAnyInternal(location, typeMask, docDomain, thirdParty, sitekey,
334 specificOnly)
364 { 335 {
365 let candidates = location.toLowerCase().match(/[a-z0-9%]{3,}/g); 336 let candidates = location.toLowerCase().match(/[a-z0-9%]{3,}/g);
366 if (candidates === null) 337 if (candidates === null)
367 candidates = []; 338 candidates = [];
368 candidates.push(""); 339 candidates.push("");
369 340
341 let whitelistHit = null;
370 let blacklistHit = null; 342 let blacklistHit = null;
371 for (let i = 0, l = candidates.length; i < l; i++) 343
372 { 344 // If the type mask includes no types other than whitelist-only types, we
373 let substr = candidates[i]; 345 // can skip the blacklist.
374 let result = this.whitelist._checkEntryMatch( 346 if ((typeMask & ~WHITELIST_ONLY_TYPES) != 0)
375 substr, location, typeMask, docDomain, thirdParty, sitekey 347 {
376 ); 348 for (let i = 0, l = candidates.length; !blacklistHit && i < l; i++)
377 if (result)
378 return result;
379 if (blacklistHit === null)
380 { 349 {
381 blacklistHit = this.blacklist._checkEntryMatch( 350 blacklistHit = this._blacklist.checkEntryMatch(candidates[i], location,
382 substr, location, typeMask, docDomain, thirdParty, sitekey, 351 typeMask, docDomain,
383 specificOnly 352 thirdParty, sitekey,
384 ); 353 specificOnly);
385 } 354 }
386 } 355 }
387 return blacklistHit; 356
357 // If the type mask includes any whitelist-only types, we need to check the
358 // whitelist.
359 if (blacklistHit || (typeMask & WHITELIST_ONLY_TYPES) != 0)
360 {
361 for (let i = 0, l = candidates.length; !whitelistHit && i < l; i++)
362 {
363 whitelistHit = this._whitelist.checkEntryMatch(candidates[i], location,
364 typeMask, docDomain,
365 thirdParty, sitekey);
366 }
367 }
368
369 return whitelistHit || blacklistHit;
388 } 370 }
389 371
390 /** 372 /**
391 * @see Matcher#matchesAny 373 * @see Matcher#matchesAny
392 * @inheritdoc 374 * @inheritdoc
393 */ 375 */
394 matchesAny(location, typeMask, docDomain, thirdParty, sitekey, specificOnly) 376 matchesAny(location, typeMask, docDomain, thirdParty, sitekey, specificOnly)
395 { 377 {
396 let key = location + " " + typeMask + " " + docDomain + " " + thirdParty + 378 let key = location + " " + typeMask + " " + docDomain + " " + thirdParty +
397 " " + sitekey + " " + specificOnly; 379 " " + sitekey + " " + specificOnly;
398 380
399 let result = this.resultCache.get(key); 381 let result = this._resultCache.get(key);
400 if (typeof result != "undefined") 382 if (typeof result != "undefined")
401 return result; 383 return result;
402 384
403 result = this.matchesAnyInternal(location, typeMask, docDomain, 385 result = this._matchesAnyInternal(location, typeMask, docDomain,
404 thirdParty, sitekey, specificOnly); 386 thirdParty, sitekey, specificOnly);
405 387
406 if (this.resultCache.size >= this.maxCacheEntries) 388 if (this._resultCache.size >= this.maxCacheEntries)
407 this.resultCache.clear(); 389 this._resultCache.clear();
408 390
409 this.resultCache.set(key, result); 391 this._resultCache.set(key, result);
410 392
411 return result; 393 return result;
412 } 394 }
413 } 395 }
414 396
415 exports.CombinedMatcher = CombinedMatcher; 397 exports.CombinedMatcher = CombinedMatcher;
416 398
417 /** 399 /**
418 * Shared {@link CombinedMatcher} instance that should usually be used. 400 * Shared {@link CombinedMatcher} instance that should usually be used.
419 * @type {CombinedMatcher} 401 * @type {CombinedMatcher}
420 */ 402 */
421 let defaultMatcher = new CombinedMatcher(); 403 let defaultMatcher = new CombinedMatcher();
422 404
423 exports.defaultMatcher = defaultMatcher; 405 exports.defaultMatcher = defaultMatcher;
LEFTRIGHT

Powered by Google App Engine
This is Rietveld