Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Side by Side Diff: lib/matcher.js

Issue 29869571: Issue 6741 - Use ES2015 classes in lib/matcher.js (Closed) Base URL: https://hg.adblockplus.org/adblockpluscore/
Patch Set: Actually address ps1 comments Created Sept. 2, 2018, 4:42 p.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * This file is part of Adblock Plus <https://adblockplus.org/>, 2 * This file is part of Adblock Plus <https://adblockplus.org/>,
3 * Copyright (C) 2006-present eyeo GmbH 3 * Copyright (C) 2006-present eyeo GmbH
4 * 4 *
5 * Adblock Plus is free software: you can redistribute it and/or modify 5 * Adblock Plus is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 3 as 6 * it under the terms of the GNU General Public License version 3 as
7 * published by the Free Software Foundation. 7 * published by the Free Software Foundation.
8 * 8 *
9 * Adblock Plus is distributed in the hope that it will be useful, 9 * Adblock Plus is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details. 12 * GNU General Public License for more details.
13 * 13 *
14 * You should have received a copy of the GNU General Public License 14 * You should have received a copy of the GNU General Public License
15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. 15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.
16 */ 16 */
17 17
18 "use strict"; 18 "use strict";
19 19
20 /** 20 /**
21 * @fileOverview Matcher class implementing matching addresses against 21 * @fileOverview Matcher class implementing matching addresses against
22 * a list of filters. 22 * a list of filters.
23 */ 23 */
24 24
25 const {Filter, WhitelistFilter} = require("./filterClasses"); 25 const {Filter, WhitelistFilter} = require("./filterClasses");
26 26
27 /** 27 /**
28 * Blacklist/whitelist filter matching 28 * Blacklist/whitelist filter matching
29 * @constructor
30 */ 29 */
31 function Matcher() 30 class Matcher
32 { 31 {
33 this.clear(); 32 constructor()
34 } 33 {
35 exports.Matcher = Matcher; 34 /**
35 * Lookup table for filters by their associated keyword
36 * @type {Map.<string,(Filter|Filter[])>}
37 */
38 this.filterByKeyword = new Map();
36 39
37 Matcher.prototype = { 40 /**
38 /** 41 * Lookup table for keywords by the filter
39 * Lookup table for filters by their associated keyword 42 * @type {Map.<Filter,string>}
40 * @type {Map.<string,(Filter|Filter[])>} 43 */
41 */ 44 this.keywordByFilter = new Map();
42 filterByKeyword: null, 45 }
43
44 /**
45 * Lookup table for keywords by the filter
46 * @type {Map.<Filter,string>}
47 */
48 keywordByFilter: null,
49 46
50 /** 47 /**
51 * Removes all known filters 48 * Removes all known filters
52 */ 49 */
53 clear() 50 clear()
54 { 51 {
55 this.filterByKeyword = new Map(); 52 this.filterByKeyword.clear();
56 this.keywordByFilter = new Map(); 53 this.keywordByFilter.clear();
57 }, 54 }
58 55
59 /** 56 /**
60 * Adds a filter to the matcher 57 * Adds a filter to the matcher
61 * @param {RegExpFilter} filter 58 * @param {RegExpFilter} filter
62 */ 59 */
63 add(filter) 60 add(filter)
64 { 61 {
65 if (this.keywordByFilter.has(filter)) 62 if (this.keywordByFilter.has(filter))
66 return; 63 return;
67 64
68 // Look for a suitable keyword 65 // Look for a suitable keyword
69 let keyword = this.findKeyword(filter); 66 let keyword = this.findKeyword(filter);
70 let oldEntry = this.filterByKeyword.get(keyword); 67 let oldEntry = this.filterByKeyword.get(keyword);
71 if (typeof oldEntry == "undefined") 68 if (typeof oldEntry == "undefined")
72 this.filterByKeyword.set(keyword, filter); 69 this.filterByKeyword.set(keyword, filter);
73 else if (oldEntry.length == 1) 70 else if (oldEntry.length == 1)
74 this.filterByKeyword.set(keyword, [oldEntry, filter]); 71 this.filterByKeyword.set(keyword, [oldEntry, filter]);
75 else 72 else
76 oldEntry.push(filter); 73 oldEntry.push(filter);
77 this.keywordByFilter.set(filter, keyword); 74 this.keywordByFilter.set(filter, keyword);
78 }, 75 }
79 76
80 /** 77 /**
81 * Removes a filter from the matcher 78 * Removes a filter from the matcher
82 * @param {RegExpFilter} filter 79 * @param {RegExpFilter} filter
83 */ 80 */
84 remove(filter) 81 remove(filter)
85 { 82 {
86 let keyword = this.keywordByFilter.get(filter); 83 let keyword = this.keywordByFilter.get(filter);
87 if (typeof keyword == "undefined") 84 if (typeof keyword == "undefined")
88 return; 85 return;
89 86
90 let list = this.filterByKeyword.get(keyword); 87 let list = this.filterByKeyword.get(keyword);
91 if (list.length <= 1) 88 if (list.length <= 1)
92 this.filterByKeyword.delete(keyword); 89 this.filterByKeyword.delete(keyword);
93 else 90 else
94 { 91 {
95 let index = list.indexOf(filter); 92 let index = list.indexOf(filter);
96 if (index >= 0) 93 if (index >= 0)
97 { 94 {
98 list.splice(index, 1); 95 list.splice(index, 1);
99 if (list.length == 1) 96 if (list.length == 1)
100 this.filterByKeyword.set(keyword, list[0]); 97 this.filterByKeyword.set(keyword, list[0]);
101 } 98 }
102 } 99 }
103 100
104 this.keywordByFilter.delete(filter); 101 this.keywordByFilter.delete(filter);
105 }, 102 }
106 103
107 /** 104 /**
108 * Chooses a keyword to be associated with the filter 105 * Chooses a keyword to be associated with the filter
109 * @param {Filter} filter 106 * @param {Filter} filter
110 * @return {string} keyword or an empty string if no keyword could be found 107 * @returns {string} keyword or an empty string if no keyword could be found
111 */ 108 */
112 findKeyword(filter) 109 findKeyword(filter)
113 { 110 {
114 let result = ""; 111 let result = "";
115 let {text} = filter; 112 let {text} = filter;
116 if (Filter.regexpRegExp.test(text)) 113 if (Filter.regexpRegExp.test(text))
117 return result; 114 return result;
118 115
119 // Remove options 116 // Remove options
120 let match = Filter.optionsRegExp.exec(text); 117 let match = Filter.optionsRegExp.exec(text);
(...skipping 20 matching lines...) Expand all
141 let count = typeof filters != "undefined" ? filters.length : 0; 138 let count = typeof filters != "undefined" ? filters.length : 0;
142 if (count < resultCount || 139 if (count < resultCount ||
143 (count == resultCount && candidate.length > resultLength)) 140 (count == resultCount && candidate.length > resultLength))
144 { 141 {
145 result = candidate; 142 result = candidate;
146 resultCount = count; 143 resultCount = count;
147 resultLength = candidate.length; 144 resultLength = candidate.length;
148 } 145 }
149 } 146 }
150 return result; 147 return result;
151 }, 148 }
152 149
153 /** 150 /**
154 * Checks whether a particular filter is being matched against. 151 * Checks whether a particular filter is being matched against.
155 * @param {RegExpFilter} filter 152 * @param {RegExpFilter} filter
156 * @return {boolean} 153 * @returns {boolean}
157 */ 154 */
158 hasFilter(filter) 155 hasFilter(filter)
159 { 156 {
160 return this.keywordByFilter.has(filter); 157 return this.keywordByFilter.has(filter);
161 }, 158 }
162 159
163 /** 160 /**
164 * Returns the keyword used for a filter, null for unknown filters. 161 * Returns the keyword used for a filter, null for unknown filters.
Manish Jethani 2018/09/03 18:46:49 Let's make this `<code>null</code>` as well while
Jon Sonesen 2018/09/05 14:05:27 Done.
165 * @param {RegExpFilter} filter 162 * @param {RegExpFilter} filter
166 * @return {?string} 163 * @returns {?string}
167 */ 164 */
168 getKeywordForFilter(filter) 165 getKeywordForFilter(filter)
169 { 166 {
170 let keyword = this.keywordByFilter.get(filter); 167 let keyword = this.keywordByFilter.get(filter);
171 return typeof keyword != "undefined" ? keyword : null; 168 return typeof keyword != "undefined" ? keyword : null;
172 }, 169 }
173 170
174 /** 171 /**
175 * Checks whether the entries for a particular keyword match a URL 172 * Checks whether the entries for a particular keyword match a URL
176 * @param {string} keyword 173 * @param {string} keyword
177 * @param {string} location 174 * @param {string} location
178 * @param {number} typeMask 175 * @param {number} typeMask
179 * @param {string} docDomain 176 * @param {string} [docDomain]
180 * @param {boolean} thirdParty 177 * @param {boolean} [thirdParty]
181 * @param {string} sitekey 178 * @param {string} [sitekey]
182 * @param {boolean} specificOnly 179 * @param {boolean} [specificOnly]
183 * @return {?Filter} 180 * @returns {?Filter}
184 */ 181 */
185 _checkEntryMatch(keyword, location, typeMask, docDomain, thirdParty, sitekey, 182 _checkEntryMatch(keyword, location, typeMask, docDomain, thirdParty, sitekey,
186 specificOnly) 183 specificOnly)
187 { 184 {
188 let list = this.filterByKeyword.get(keyword); 185 let list = this.filterByKeyword.get(keyword);
189 if (typeof list == "undefined") 186 if (typeof list == "undefined")
190 return null; 187 return null;
191 for (let i = 0; i < list.length; i++) 188 for (let i = 0; i < list.length; i++)
192 { 189 {
193 let filter = list[i]; 190 let filter = list[i];
194 191
195 if (specificOnly && filter.isGeneric() && 192 if (specificOnly && filter.isGeneric() &&
196 !(filter instanceof WhitelistFilter)) 193 !(filter instanceof WhitelistFilter))
197 continue; 194 continue;
198 195
199 if (filter.matches(location, typeMask, docDomain, thirdParty, sitekey)) 196 if (filter.matches(location, typeMask, docDomain, thirdParty, sitekey))
200 return filter; 197 return filter;
201 } 198 }
202 return null; 199 return null;
203 }, 200 }
204 201
205 /** 202 /**
206 * Tests whether the URL matches any of the known filters 203 * Tests whether the URL matches any of the known filters
207 * @param {string} location 204 * @param {string} location
208 * URL to be tested 205 * URL to be tested
209 * @param {number} typeMask 206 * @param {number} typeMask
210 * bitmask of content / request types to match 207 * bitmask of content / request types to match
211 * @param {string} docDomain 208 * @param {string} [docDomain]
212 * domain name of the document that loads the URL 209 * domain name of the document that loads the URL
213 * @param {boolean} thirdParty 210 * @param {boolean} [thirdParty]
214 * should be true if the URL is a third-party request 211 * should be true if the URL is a third-party request
215 * @param {string} sitekey 212 * @param {string} [sitekey]
216 * public key provided by the document 213 * public key provided by the document
217 * @param {boolean} specificOnly 214 * @param {boolean} [specificOnly]
218 * should be true if generic matches should be ignored 215 * should be <code>true</code> if generic matches should be ignored
219 * @return {?RegExpFilter} 216 * @returns {?RegExpFilter}
220 * matching filter or null 217 * matching filter or <code>null</code>
221 */ 218 */
222 matchesAny(location, typeMask, docDomain, thirdParty, sitekey, specificOnly) 219 matchesAny(location, typeMask, docDomain, thirdParty, sitekey, specificOnly)
223 { 220 {
224 let candidates = location.toLowerCase().match(/[a-z0-9%]{3,}/g); 221 let candidates = location.toLowerCase().match(/[a-z0-9%]{3,}/g);
225 if (candidates === null) 222 if (candidates === null)
226 candidates = []; 223 candidates = [];
227 candidates.push(""); 224 candidates.push("");
228 for (let i = 0, l = candidates.length; i < l; i++) 225 for (let i = 0, l = candidates.length; i < l; i++)
229 { 226 {
230 let result = this._checkEntryMatch(candidates[i], location, typeMask, 227 let result = this._checkEntryMatch(candidates[i], location, typeMask,
231 docDomain, thirdParty, sitekey, 228 docDomain, thirdParty, sitekey,
232 specificOnly); 229 specificOnly);
233 if (result) 230 if (result)
234 return result; 231 return result;
235 } 232 }
236 233
237 return null; 234 return null;
238 } 235 }
239 }; 236 }
237
238 exports.Matcher = Matcher;
239
240 240
241 /** 241 /**
242 * Combines a matcher for blocking and exception rules, automatically sorts 242 * Combines a matcher for blocking and exception rules, automatically sorts
243 * rules into two Matcher instances. 243 * rules into two {@link Matcher} instances.
244 * @constructor
245 * @augments Matcher
246 */ 244 */
247 function CombinedMatcher() 245 class CombinedMatcher
248 { 246 {
249 this.blacklist = new Matcher(); 247 constructor()
250 this.whitelist = new Matcher(); 248 {
251 this.resultCache = new Map(); 249 /**
252 } 250 * Maximal number of matching cache entries to be kept
253 exports.CombinedMatcher = CombinedMatcher; 251 * @type {number}
252 */
253 this.maxCacheEntries = 1000;
254 254
255 /** 255 /**
256 * Maximal number of matching cache entries to be kept 256 * Matcher for blocking rules.
257 * @type {number} 257 * @type {Matcher}
258 */ 258 */
259 CombinedMatcher.maxCacheEntries = 1000; 259 this.blacklist = new Matcher();
260 260
261 CombinedMatcher.prototype = 261 /**
262 { 262 * Matcher for exception rules.
263 /** 263 * @type {Matcher}
264 * Matcher for blocking rules. 264 */
265 * @type {Matcher} 265 this.whitelist = new Matcher();
266 */
267 blacklist: null,
268 266
269 /** 267 /**
270 * Matcher for exception rules. 268 * Lookup table of previous {@link Matcher#matchesAny} results
271 * @type {Matcher} 269 * @type {Map.<string,Filter>}
272 */ 270 */
273 whitelist: null, 271 this.resultCache = new Map();
274 272 }
275 /**
276 * Lookup table of previous matchesAny results
277 * @type {Map.<string,Filter>}
278 */
279 resultCache: null,
280 273
281 /** 274 /**
282 * @see Matcher#clear 275 * @see Matcher#clear
283 */ 276 */
284 clear() 277 clear()
285 { 278 {
286 this.blacklist.clear(); 279 this.blacklist.clear();
287 this.whitelist.clear(); 280 this.whitelist.clear();
288 this.resultCache.clear(); 281 this.resultCache.clear();
289 }, 282 }
290 283
291 /** 284 /**
292 * @see Matcher#add 285 * @see Matcher#add
293 * @param {Filter} filter 286 * @param {Filter} filter
Manish Jethani 2018/09/03 18:46:49 When we use @see, we're not being consistent in ad
Jon Sonesen 2018/09/05 14:05:27 The reason for this is that matchesAny does not ha
Manish Jethani 2018/09/05 19:33:57 Oh damn. OK, no problem then.
294 */ 287 */
295 add(filter) 288 add(filter)
296 { 289 {
297 if (filter instanceof WhitelistFilter) 290 if (filter instanceof WhitelistFilter)
298 this.whitelist.add(filter); 291 this.whitelist.add(filter);
299 else 292 else
300 this.blacklist.add(filter); 293 this.blacklist.add(filter);
301 294
302 this.resultCache.clear(); 295 this.resultCache.clear();
303 }, 296 }
304 297
305 /** 298 /**
306 * @see Matcher#remove 299 * @see Matcher#remove
307 * @param {Filter} filter 300 * @param {Filter} filter
308 */ 301 */
309 remove(filter) 302 remove(filter)
310 { 303 {
311 if (filter instanceof WhitelistFilter) 304 if (filter instanceof WhitelistFilter)
312 this.whitelist.remove(filter); 305 this.whitelist.remove(filter);
313 else 306 else
314 this.blacklist.remove(filter); 307 this.blacklist.remove(filter);
315 308
316 this.resultCache.clear(); 309 this.resultCache.clear();
317 }, 310 }
318 311
319 /** 312 /**
320 * @see Matcher#findKeyword 313 * @see Matcher#findKeyword
321 * @param {Filter} filter 314 * @param {Filter} filter
322 * @return {string} keyword 315 * @returns {string} keyword
323 */ 316 */
324 findKeyword(filter) 317 findKeyword(filter)
325 { 318 {
326 if (filter instanceof WhitelistFilter) 319 if (filter instanceof WhitelistFilter)
327 return this.whitelist.findKeyword(filter); 320 return this.whitelist.findKeyword(filter);
328 return this.blacklist.findKeyword(filter); 321 return this.blacklist.findKeyword(filter);
329 }, 322 }
330 323
331 /** 324 /**
332 * @see Matcher#hasFilter 325 * @see Matcher#hasFilter
333 * @param {Filter} filter 326 * @param {Filter} filter
334 * @return {boolean} 327 * @returns {boolean}
335 */ 328 */
336 hasFilter(filter) 329 hasFilter(filter)
337 { 330 {
338 if (filter instanceof WhitelistFilter) 331 if (filter instanceof WhitelistFilter)
339 return this.whitelist.hasFilter(filter); 332 return this.whitelist.hasFilter(filter);
340 return this.blacklist.hasFilter(filter); 333 return this.blacklist.hasFilter(filter);
341 }, 334 }
342 335
343 /** 336 /**
344 * @see Matcher#getKeywordForFilter 337 * @see Matcher#getKeywordForFilter
345 * @param {Filter} filter 338 * @param {Filter} filter
346 * @return {string} keyword 339 * @returns {string} keyword
347 */ 340 */
348 getKeywordForFilter(filter) 341 getKeywordForFilter(filter)
349 { 342 {
350 if (filter instanceof WhitelistFilter) 343 if (filter instanceof WhitelistFilter)
351 return this.whitelist.getKeywordForFilter(filter); 344 return this.whitelist.getKeywordForFilter(filter);
352 return this.blacklist.getKeywordForFilter(filter); 345 return this.blacklist.getKeywordForFilter(filter);
353 }, 346 }
354 347
355 /** 348 /**
356 * Checks whether a particular filter is slow 349 * Checks whether a particular filter is slow
357 * @param {RegExpFilter} filter 350 * @param {RegExpFilter} filter
358 * @return {boolean} 351 * @returns {boolean}
359 */ 352 */
360 isSlowFilter(filter) 353 isSlowFilter(filter)
361 { 354 {
362 let matcher = ( 355 let matcher = (
363 filter instanceof WhitelistFilter ? this.whitelist : this.blacklist 356 filter instanceof WhitelistFilter ? this.whitelist : this.blacklist
364 ); 357 );
365 if (matcher.hasFilter(filter)) 358 if (matcher.hasFilter(filter))
366 return !matcher.getKeywordForFilter(filter); 359 return !matcher.getKeywordForFilter(filter);
367 return !matcher.findKeyword(filter); 360 return !matcher.findKeyword(filter);
368 }, 361 }
369 362
370 /** 363 /**
371 * Optimized filter matching testing both whitelist and blacklist matchers 364 * Optimized filter matching testing both whitelist and blacklist matchers
372 * simultaneously. For parameters see 365 * simultaneously. For parameters see
373 {@link Matcher#matchesAny Matcher.matchesAny()}. 366 {@link Matcher#matchesAny Matcher.matchesAny()}.
374 * @see Matcher#matchesAny 367 * @see Matcher#matchesAny
375 * @inheritdoc 368 * @inheritdoc
Manish Jethani 2018/09/03 18:46:49 I don't think @inheritdoc makes sense here (also b
Jon Sonesen 2018/09/05 14:05:27 Similarly to above, eslint complains here without
376 */ 369 */
377 matchesAnyInternal(location, typeMask, docDomain, thirdParty, sitekey, 370 matchesAnyInternal(location, typeMask, docDomain, thirdParty, sitekey,
378 specificOnly) 371 specificOnly)
379 { 372 {
380 let candidates = location.toLowerCase().match(/[a-z0-9%]{3,}/g); 373 let candidates = location.toLowerCase().match(/[a-z0-9%]{3,}/g);
381 if (candidates === null) 374 if (candidates === null)
382 candidates = []; 375 candidates = [];
383 candidates.push(""); 376 candidates.push("");
384 377
385 let blacklistHit = null; 378 let blacklistHit = null;
386 for (let i = 0, l = candidates.length; i < l; i++) 379 for (let i = 0, l = candidates.length; i < l; i++)
387 { 380 {
388 let substr = candidates[i]; 381 let substr = candidates[i];
389 let result = this.whitelist._checkEntryMatch( 382 let result = this.whitelist._checkEntryMatch(
390 substr, location, typeMask, docDomain, thirdParty, sitekey 383 substr, location, typeMask, docDomain, thirdParty, sitekey
391 ); 384 );
392 if (result) 385 if (result)
393 return result; 386 return result;
394 if (blacklistHit === null) 387 if (blacklistHit === null)
395 { 388 {
396 blacklistHit = this.blacklist._checkEntryMatch( 389 blacklistHit = this.blacklist._checkEntryMatch(
397 substr, location, typeMask, docDomain, thirdParty, sitekey, 390 substr, location, typeMask, docDomain, thirdParty, sitekey,
398 specificOnly 391 specificOnly
399 ); 392 );
400 } 393 }
401 } 394 }
402 return blacklistHit; 395 return blacklistHit;
403 }, 396 }
404 397
405 /** 398 /**
406 * @see Matcher#matchesAny 399 * @see Matcher#matchesAny
407 * @inheritdoc 400 * @inheritdoc
408 */ 401 */
409 matchesAny(location, typeMask, docDomain, thirdParty, sitekey, specificOnly) 402 matchesAny(location, typeMask, docDomain, thirdParty, sitekey, specificOnly)
410 { 403 {
411 let key = location + " " + typeMask + " " + docDomain + " " + thirdParty + 404 let key = location + " " + typeMask + " " + docDomain + " " + thirdParty +
412 " " + sitekey + " " + specificOnly; 405 " " + sitekey + " " + specificOnly;
413 406
414 let result = this.resultCache.get(key); 407 let result = this.resultCache.get(key);
415 if (typeof result != "undefined") 408 if (typeof result != "undefined")
416 return result; 409 return result;
417 410
418 result = this.matchesAnyInternal(location, typeMask, docDomain, 411 result = this.matchesAnyInternal(location, typeMask, docDomain,
419 thirdParty, sitekey, specificOnly); 412 thirdParty, sitekey, specificOnly);
420 413
421 if (this.resultCache.size >= CombinedMatcher.maxCacheEntries) 414 if (this.resultCache.size >= CombinedMatcher.maxCacheEntries)
422 this.resultCache.clear(); 415 this.resultCache.clear();
423 416
424 this.resultCache.set(key, result); 417 this.resultCache.set(key, result);
425 418
426 return result; 419 return result;
427 } 420 }
428 }; 421 }
422
423 exports.CombinedMatcher = CombinedMatcher;
429 424
430 /** 425 /**
431 * Shared CombinedMatcher instance that should usually be used. 426 * Shared {@link CombinedMatcher} instance that should usually be used.
432 * @type {CombinedMatcher} 427 * @type {CombinedMatcher}
433 */ 428 */
434 exports.defaultMatcher = new CombinedMatcher(); 429 exports.defaultMatcher = new CombinedMatcher();
Manish Jethani 2018/09/03 18:46:49 I think we should follow the convention of first d
Jon Sonesen 2018/09/05 14:05:27 Done.
OLDNEW
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld