Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Side by Side Diff: lib/matcher.js

Issue 30000586: Issue 7265 - Orgnanize request blocking filters by domain (Closed) Base URL: https://hg.adblockplus.org/adblockpluscore/
Patch Set: Created Feb. 6, 2019, 3:19 p.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View unified diff | Download patch
« no previous file with comments | « lib/filterClasses.js ('k') | test/filterClasses.js » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * This file is part of Adblock Plus <https://adblockplus.org/>, 2 * This file is part of Adblock Plus <https://adblockplus.org/>,
3 * Copyright (C) 2006-present eyeo GmbH 3 * Copyright (C) 2006-present eyeo GmbH
4 * 4 *
5 * Adblock Plus is free software: you can redistribute it and/or modify 5 * Adblock Plus is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 3 as 6 * it under the terms of the GNU General Public License version 3 as
7 * published by the Free Software Foundation. 7 * published by the Free Software Foundation.
8 * 8 *
9 * Adblock Plus is distributed in the hope that it will be useful, 9 * Adblock Plus is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details. 12 * GNU General Public License for more details.
13 * 13 *
14 * You should have received a copy of the GNU General Public License 14 * You should have received a copy of the GNU General Public License
15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. 15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.
16 */ 16 */
17 17
18 "use strict"; 18 "use strict";
19 19
20 /** 20 /**
21 * @fileOverview Matcher class implementing matching addresses against 21 * @fileOverview Matcher class implementing matching addresses against
22 * a list of filters. 22 * a list of filters.
23 */ 23 */
24 24
25 const {RegExpFilter, WhitelistFilter} = require("./filterClasses"); 25 const {RegExpFilter, WhitelistFilter} = require("./filterClasses");
26 const {suffixes} = require("./domain");
26 27
27 /** 28 /**
28 * Regular expression for matching a keyword in a filter. 29 * Regular expression for matching a keyword in a filter.
29 * @type {RegExp} 30 * @type {RegExp}
30 */ 31 */
31 const keywordRegExp = /[^a-z0-9%*][a-z0-9%]{3,}(?=[^a-z0-9%*])/; 32 const keywordRegExp = /[^a-z0-9%*][a-z0-9%]{3,}(?=[^a-z0-9%*])/;
32 33
33 /** 34 /**
34 * Regular expression for matching all keywords in a filter. 35 * Regular expression for matching all keywords in a filter.
35 * @type {RegExp} 36 * @type {RegExp}
(...skipping 20 matching lines...) Expand all
56 * Bitmask for "types" that are for exception rules only, like 57 * Bitmask for "types" that are for exception rules only, like
57 * <code>$document</code>, <code>$elemhide</code>, and so on. 58 * <code>$document</code>, <code>$elemhide</code>, and so on.
58 * @type {number} 59 * @type {number}
59 */ 60 */
60 const WHITELIST_ONLY_TYPES = RegExpFilter.typeMap.DOCUMENT | 61 const WHITELIST_ONLY_TYPES = RegExpFilter.typeMap.DOCUMENT |
61 RegExpFilter.typeMap.ELEMHIDE | 62 RegExpFilter.typeMap.ELEMHIDE |
62 RegExpFilter.typeMap.GENERICHIDE | 63 RegExpFilter.typeMap.GENERICHIDE |
63 RegExpFilter.typeMap.GENERICBLOCK; 64 RegExpFilter.typeMap.GENERICBLOCK;
64 65
65 /** 66 /**
67 * Map to be used instead when a filter has a blank <code>domains</code>
68 * property.
69 * @type {Map.<string, boolean>}
70 */
71 let defaultDomains = new Map([["", true]]);
72
73 /**
66 * Yields individual non-default types from a filter's type mask. 74 * Yields individual non-default types from a filter's type mask.
67 * @param {number} contentType A filter's type mask. 75 * @param {number} contentType A filter's type mask.
68 * @yields {number} 76 * @yields {number}
69 */ 77 */
70 function* nonDefaultTypes(contentType) 78 function* nonDefaultTypes(contentType)
71 { 79 {
72 for (let mask = contentType & NON_DEFAULT_TYPES, bitIndex = 0; 80 for (let mask = contentType & NON_DEFAULT_TYPES, bitIndex = 0;
73 mask != 0; mask >>>= 1, bitIndex++) 81 mask != 0; mask >>>= 1, bitIndex++)
74 { 82 {
75 if ((mask & 1) != 0) 83 if ((mask & 1) != 0)
(...skipping 89 matching lines...) Expand 10 before | Expand all | Expand 10 after
165 this._simpleFiltersByKeyword = new Map(); 173 this._simpleFiltersByKeyword = new Map();
166 174
167 /** 175 /**
168 * Lookup table for complex filters by their associated keyword 176 * Lookup table for complex filters by their associated keyword
169 * @type {Map.<string,(RegExpFilter|Set.<RegExpFilter>)>} 177 * @type {Map.<string,(RegExpFilter|Set.<RegExpFilter>)>}
170 * @private 178 * @private
171 */ 179 */
172 this._complexFiltersByKeyword = new Map(); 180 this._complexFiltersByKeyword = new Map();
173 181
174 /** 182 /**
183 * Lookup table of domain maps for complex filters by their associated
184 * keyword
185 * @type {Map.<string,Map.<string,(RegExpFilter|
186 * Map.<RegExpFilter,boolean>)>>}
187 * @private
188 */
189 this._filterDomainMapsByKeyword = new Map();
190
191 /**
175 * Lookup table of type-specific lookup tables for complex filters by their 192 * Lookup table of type-specific lookup tables for complex filters by their
176 * associated keyword 193 * associated keyword
177 * @type {Map.<string,Map.<string,(RegExpFilter|Set.<RegExpFilter>)>>} 194 * @type {Map.<string,Map.<string,(RegExpFilter|Set.<RegExpFilter>)>>}
178 * @private 195 * @private
179 */ 196 */
180 this._filterMapsByType = new Map(); 197 this._filterMapsByType = new Map();
181 } 198 }
182 199
183 /** 200 /**
184 * Removes all known filters 201 * Removes all known filters
185 */ 202 */
186 clear() 203 clear()
187 { 204 {
188 this._keywordByFilter.clear(); 205 this._keywordByFilter.clear();
189 this._simpleFiltersByKeyword.clear(); 206 this._simpleFiltersByKeyword.clear();
190 this._complexFiltersByKeyword.clear(); 207 this._complexFiltersByKeyword.clear();
208 this._filterDomainMapsByKeyword.clear();
191 this._filterMapsByType.clear(); 209 this._filterMapsByType.clear();
192 } 210 }
193 211
194 /** 212 /**
195 * Adds a filter to the matcher 213 * Adds a filter to the matcher
196 * @param {RegExpFilter} filter 214 * @param {RegExpFilter} filter
197 */ 215 */
198 add(filter) 216 add(filter)
199 { 217 {
200 if (this._keywordByFilter.has(filter)) 218 if (this._keywordByFilter.has(filter))
(...skipping 13 matching lines...) Expand all
214 return; 232 return;
215 233
216 for (let type of nonDefaultTypes(filter.contentType)) 234 for (let type of nonDefaultTypes(filter.contentType))
217 { 235 {
218 let map = this._filterMapsByType.get(type); 236 let map = this._filterMapsByType.get(type);
219 if (!map) 237 if (!map)
220 this._filterMapsByType.set(type, map = new Map()); 238 this._filterMapsByType.set(type, map = new Map());
221 239
222 addFilterByKeyword(filter, keyword, map); 240 addFilterByKeyword(filter, keyword, map);
223 } 241 }
242
243 let filtersByDomain = this._filterDomainMapsByKeyword.get(keyword);
244 if (!filtersByDomain)
245 this._filterDomainMapsByKeyword.set(keyword, filtersByDomain = new Map());
246
247 for (let [domain, include] of filter.domains || defaultDomains)
248 {
249 if (!include && domain == "")
250 continue;
251
252 let map = filtersByDomain.get(domain);
253 if (!map)
254 {
255 filtersByDomain.set(domain, include ? filter :
256 map = new Map([[filter, false]]));
257 }
258 else if (map.size == 1 && !(map instanceof Map))
259 {
260 if (filter != map)
261 {
262 filtersByDomain.set(domain, new Map([[map, true],
263 [filter, include]]));
264 }
265 }
266 else
267 {
268 map.set(filter, include);
269 }
270 }
224 } 271 }
225 272
226 /** 273 /**
227 * Removes a filter from the matcher 274 * Removes a filter from the matcher
228 * @param {RegExpFilter} filter 275 * @param {RegExpFilter} filter
229 */ 276 */
230 remove(filter) 277 remove(filter)
231 { 278 {
232 let keyword = this._keywordByFilter.get(filter); 279 let keyword = this._keywordByFilter.get(filter);
233 if (typeof keyword == "undefined") 280 if (typeof keyword == "undefined")
234 return; 281 return;
235 282
236 let locationOnly = filter.isLocationOnly(); 283 let locationOnly = filter.isLocationOnly();
237 284
238 removeFilterByKeyword(filter, keyword, 285 removeFilterByKeyword(filter, keyword,
239 locationOnly ? this._simpleFiltersByKeyword : 286 locationOnly ? this._simpleFiltersByKeyword :
240 this._complexFiltersByKeyword); 287 this._complexFiltersByKeyword);
241 288
242 this._keywordByFilter.delete(filter); 289 this._keywordByFilter.delete(filter);
243 290
244 if (locationOnly) 291 if (locationOnly)
245 return; 292 return;
246 293
247 for (let type of nonDefaultTypes(filter.contentType)) 294 for (let type of nonDefaultTypes(filter.contentType))
248 { 295 {
249 let map = this._filterMapsByType.get(type); 296 let map = this._filterMapsByType.get(type);
250 if (map) 297 if (map)
251 removeFilterByKeyword(filter, keyword, map); 298 removeFilterByKeyword(filter, keyword, map);
252 } 299 }
300
301 let filtersByDomain = this._filterDomainMapsByKeyword.get(keyword);
302 if (filtersByDomain)
303 {
304 let domains = filter.domains || defaultDomains;
305 for (let domain of domains.keys())
306 {
307 let map = filtersByDomain.get(domain);
308 if (map)
309 {
310 if (map.size > 1 || map instanceof Map)
Sebastian Noack 2019/02/06 16:30:58 In which scenario would "map" not be a Map object,
Manish Jethani 2019/02/06 19:25:05 It's a hack. The filter object doubles up as a fa
311 {
312 map.delete(filter);
313
314 if (map.size == 0)
315 filtersByDomain.delete(domain);
316 }
317 else if (filter == map)
318 {
319 filtersByDomain.delete(domain);
320 }
321 }
322 }
323 }
253 } 324 }
254 325
255 /** 326 /**
256 * Chooses a keyword to be associated with the filter 327 * Chooses a keyword to be associated with the filter
257 * @param {Filter} filter 328 * @param {Filter} filter
258 * @returns {string} keyword or an empty string if no keyword could be found 329 * @returns {string} keyword or an empty string if no keyword could be found
259 * @protected 330 * @protected
260 */ 331 */
261 findKeyword(filter) 332 findKeyword(filter)
262 { 333 {
(...skipping 21 matching lines...) Expand all
284 (count == resultCount && candidate.length > resultLength)) 355 (count == resultCount && candidate.length > resultLength))
285 { 356 {
286 result = candidate; 357 result = candidate;
287 resultCount = count; 358 resultCount = count;
288 resultLength = candidate.length; 359 resultLength = candidate.length;
289 } 360 }
290 } 361 }
291 return result; 362 return result;
292 } 363 }
293 364
365 _checkEntryMatchSimple(keyword, location, typeMask, docDomain, thirdParty,
366 sitekey, specificOnly, collection)
367 {
368 let filters = this._simpleFiltersByKeyword.get(keyword);
369 if (filters)
370 {
371 let lowerCaseLocation = location.toLowerCase();
372
373 for (let filter of filters)
374 {
375 if (specificOnly && !(filter instanceof WhitelistFilter))
376 continue;
377
378 if (filter.matchesLocation(location, lowerCaseLocation))
379 {
380 if (!collection)
381 return filter;
382
383 collection.push(filter);
384 }
385 }
386 }
387
388 return null;
389 }
390
391 _checkEntryMatchForType(keyword, location, typeMask, docDomain, thirdParty,
392 sitekey, specificOnly, collection)
393 {
394 let filtersForType = this._filterMapsByType.get(typeMask);
395 if (filtersForType)
396 {
397 let filters = filtersForType.get(keyword);
398 if (filters)
399 {
400 for (let filter of filters)
401 {
402 if (specificOnly && filter.isGeneric() &&
403 !(filter instanceof WhitelistFilter))
404 continue;
405
406 if (filter.matches(location, typeMask, docDomain, thirdParty,
407 sitekey))
408 {
409 if (!collection)
410 return filter;
411
412 collection.push(filter);
413 }
414 }
415 }
416 }
417
418 return null;
419 }
420
421 _checkEntryMatchByDomain(keyword, location, typeMask, docDomain, thirdParty,
422 sitekey, specificOnly, collection)
423 {
424 let filtersByDomain = this._filterDomainMapsByKeyword.get(keyword);
425 if (filtersByDomain)
426 {
427 // The code in this block is similar to the generateStyleSheetForDomain
428 // function in lib/elemHide.js.
429
430 if (docDomain)
431 {
432 if (docDomain[docDomain.length - 1] == ".")
433 docDomain = docDomain.replace(/\.+$/, "");
434
435 docDomain = docDomain.toLowerCase();
436 }
437
438 let excluded = new Set();
439
440 for (let suffix of suffixes(docDomain || "", !specificOnly))
441 {
442 let filters = filtersByDomain.get(suffix);
443 if (filters)
444 {
445 for (let [filter, include] of filters.entries())
446 {
447 if (!include)
448 {
449 excluded.add(filter);
450 }
451 else if ((excluded.size == 0 || !excluded.has(filter)) &&
452 filter.matchesWithoutDomain(location, typeMask,
453 thirdParty, sitekey))
454 {
455 if (!collection)
456 return filter;
457
458 collection.push(filter);
459 }
460 }
461 }
462 }
463 }
464
465 return null;
466 }
467
294 /** 468 /**
295 * Checks whether the entries for a particular keyword match a URL 469 * Checks whether the entries for a particular keyword match a URL
296 * @param {string} keyword 470 * @param {string} keyword
297 * @param {string} location 471 * @param {string} location
298 * @param {number} typeMask 472 * @param {number} typeMask
299 * @param {string} [docDomain] 473 * @param {string} [docDomain]
300 * @param {boolean} [thirdParty] 474 * @param {boolean} [thirdParty]
301 * @param {string} [sitekey] 475 * @param {string} [sitekey]
302 * @param {boolean} [specificOnly] 476 * @param {boolean} [specificOnly]
303 * @param {?Array.<Filter>} [collection] An optional list of filters to which 477 * @param {?Array.<Filter>} [collection] An optional list of filters to which
304 * to append any results. If specified, the function adds <em>all</em> 478 * to append any results. If specified, the function adds <em>all</em>
305 * matching filters to the list; if omitted, the function directly returns 479 * matching filters to the list; if omitted, the function directly returns
306 * the first matching filter. 480 * the first matching filter.
307 * @returns {?Filter} 481 * @returns {?Filter}
308 * @protected 482 * @protected
309 */ 483 */
310 checkEntryMatch(keyword, location, typeMask, docDomain, thirdParty, sitekey, 484 checkEntryMatch(keyword, location, typeMask, docDomain, thirdParty, sitekey,
311 specificOnly, collection) 485 specificOnly, collection)
312 { 486 {
313 // We need to skip the simple (location-only) filters if the type mask does 487 // We need to skip the simple (location-only) filters if the type mask does
314 // not contain any default content types. 488 // not contain any default content types.
315 if ((typeMask & DEFAULT_TYPES) != 0) 489 if ((typeMask & DEFAULT_TYPES) != 0)
316 { 490 {
317 let simpleSet = this._simpleFiltersByKeyword.get(keyword); 491 let filter = this._checkEntryMatchSimple(keyword, location, typeMask,
318 if (simpleSet) 492 docDomain, thirdParty, sitekey,
319 { 493 specificOnly, collection);
320 let lowerCaseLocation = location.toLowerCase(); 494 if (filter)
321 495 return filter;
322 for (let filter of simpleSet)
323 {
324 if (specificOnly && !(filter instanceof WhitelistFilter))
325 continue;
326
327 if (filter.matchesLocation(location, lowerCaseLocation))
328 {
329 if (!collection)
330 return filter;
331
332 collection.push(filter);
333 }
334 }
335 }
336 } 496 }
337 497
338 let complexSet = null;
339
340 // If the type mask contains a non-default type (first condition) and it is 498 // If the type mask contains a non-default type (first condition) and it is
341 // the only type in the mask (second condition), we can use the 499 // the only type in the mask (second condition), we can use the
342 // type-specific map, which typically contains a lot fewer filters. This 500 // type-specific map, which typically contains a lot fewer filters. This
343 // enables faster lookups for whitelisting types like $document, $elemhide, 501 // enables faster lookups for whitelisting types like $document, $elemhide,
344 // and so on, as well as other special types like $csp. 502 // and so on, as well as other special types like $csp.
345 if ((typeMask & NON_DEFAULT_TYPES) != 0 && (typeMask & typeMask - 1) == 0) 503 if ((typeMask & NON_DEFAULT_TYPES) != 0 && (typeMask & typeMask - 1) == 0)
346 { 504 {
347 let map = this._filterMapsByType.get(typeMask); 505 return this._checkEntryMatchForType(keyword, location, typeMask,
348 if (map) 506 docDomain, thirdParty, sitekey,
349 complexSet = map.get(keyword); 507 specificOnly, collection);
350 }
351 else
352 {
353 complexSet = this._complexFiltersByKeyword.get(keyword);
354 } 508 }
355 509
356 if (complexSet) 510 return this._checkEntryMatchByDomain(keyword, location, typeMask,
357 { 511 docDomain, thirdParty, sitekey,
358 for (let filter of complexSet) 512 specificOnly, collection);
359 {
360 if (specificOnly && filter.isGeneric() &&
361 !(filter instanceof WhitelistFilter))
362 continue;
363
364 if (filter.matches(location, typeMask, docDomain, thirdParty, sitekey))
365 {
366 if (!collection)
367 return filter;
368
369 collection.push(filter);
370 }
371 }
372 }
373
374 return null;
375 } 513 }
376 514
377 /** 515 /**
378 * Tests whether the URL matches any of the known filters 516 * Tests whether the URL matches any of the known filters
379 * @param {string} location 517 * @param {string} location
380 * URL to be tested 518 * URL to be tested
381 * @param {number} typeMask 519 * @param {number} typeMask
382 * bitmask of content / request types to match 520 * bitmask of content / request types to match
383 * @param {string} [docDomain] 521 * @param {string} [docDomain]
384 * domain name of the document that loads the URL 522 * domain name of the document that loads the URL
(...skipping 299 matching lines...) Expand 10 before | Expand all | Expand 10 after
684 822
685 exports.CombinedMatcher = CombinedMatcher; 823 exports.CombinedMatcher = CombinedMatcher;
686 824
687 /** 825 /**
688 * Shared {@link CombinedMatcher} instance that should usually be used. 826 * Shared {@link CombinedMatcher} instance that should usually be used.
689 * @type {CombinedMatcher} 827 * @type {CombinedMatcher}
690 */ 828 */
691 let defaultMatcher = new CombinedMatcher(); 829 let defaultMatcher = new CombinedMatcher();
692 830
693 exports.defaultMatcher = defaultMatcher; 831 exports.defaultMatcher = defaultMatcher;
OLDNEW
« no previous file with comments | « lib/filterClasses.js ('k') | test/filterClasses.js » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld