Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Delta Between Two Patch Sets: lib/matcher.js

Issue 29896562: Issue 7003 - Look up whitelist filter only if URL is blocked (Closed) Base URL: https://hg.adblockplus.org/adblockpluscore/
Left Patch Set: Created Sept. 30, 2018, 8:55 a.m.
Right Patch Set: Add comments Created Oct. 22, 2018, 9:14 p.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
Left: Side by side diff | Download
Right: Side by side diff | Download
« no previous file with change/comment | « no previous file | no next file » | no next file with change/comment »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
LEFTRIGHT
1 /* 1 /*
2 * This file is part of Adblock Plus <https://adblockplus.org/>, 2 * This file is part of Adblock Plus <https://adblockplus.org/>,
3 * Copyright (C) 2006-present eyeo GmbH 3 * Copyright (C) 2006-present eyeo GmbH
4 * 4 *
5 * Adblock Plus is free software: you can redistribute it and/or modify 5 * Adblock Plus is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 3 as 6 * it under the terms of the GNU General Public License version 3 as
7 * published by the Free Software Foundation. 7 * published by the Free Software Foundation.
8 * 8 *
9 * Adblock Plus is distributed in the hope that it will be useful, 9 * Adblock Plus is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details. 12 * GNU General Public License for more details.
13 * 13 *
14 * You should have received a copy of the GNU General Public License 14 * You should have received a copy of the GNU General Public License
15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. 15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.
16 */ 16 */
17 17
18 "use strict"; 18 "use strict";
19 19
20 /** 20 /**
21 * @fileOverview Matcher class implementing matching addresses against 21 * @fileOverview Matcher class implementing matching addresses against
22 * a list of filters. 22 * a list of filters.
23 */ 23 */
24 24
25 const {WhitelistFilter} = require("./filterClasses"); 25 const {RegExpFilter, WhitelistFilter} = require("./filterClasses");
26
27 /**
28 * Regular expression for matching a keyword in a filter.
29 * @type {RegExp}
30 */
31 const keywordRegExp = /[^a-z0-9%*][a-z0-9%]{3,}(?=[^a-z0-9%*])/;
32
33 /**
34 * Regular expression for matching all keywords in a filter.
35 * @type {RegExp}
36 */
37 const allKeywordsRegExp = new RegExp(keywordRegExp, "g");
38
39 /**
40 * Bitmask for "types" that are for exception rules only, like
41 * <code>$document</code>, <code>$elemhide</code>, and so on.
42 * @type {number}
43 */
44 const WHITELIST_ONLY_TYPES = RegExpFilter.typeMap.DOCUMENT |
45 RegExpFilter.typeMap.ELEMHIDE |
46 RegExpFilter.typeMap.GENERICHIDE |
47 RegExpFilter.typeMap.GENERICBLOCK;
48
49 /**
50 * Checks whether a particular filter is slow.
51 * @param {RegExpFilter} filter
52 * @returns {boolean}
53 */
54 function isSlowFilter(filter)
55 {
56 return !filter.pattern || !keywordRegExp.test(filter.pattern);
57 }
58
59 exports.isSlowFilter = isSlowFilter;
26 60
27 /** 61 /**
28 * Blacklist/whitelist filter matching 62 * Blacklist/whitelist filter matching
29 */ 63 */
30 class Matcher 64 class Matcher
31 { 65 {
32 constructor() 66 constructor()
33 { 67 {
34 /** 68 /**
35 * Lookup table for filters by their associated keyword 69 * Lookup table for filters by their associated keyword
36 * @type {Map.<string,(Filter|Filter[])>} 70 * @type {Map.<string,(Filter|Set.<Filter>)>}
37 */ 71 */
38 this.filterByKeyword = new Map(); 72 this.filterByKeyword = new Map();
39
40 /**
41 * Lookup table for keywords by the filter
42 * @type {Map.<Filter,string>}
43 */
44 this.keywordByFilter = new Map();
45 } 73 }
46 74
47 /** 75 /**
48 * Removes all known filters 76 * Removes all known filters
49 */ 77 */
50 clear() 78 clear()
51 { 79 {
52 this.filterByKeyword.clear(); 80 this.filterByKeyword.clear();
53 this.keywordByFilter.clear();
54 } 81 }
55 82
56 /** 83 /**
57 * Adds a filter to the matcher 84 * Adds a filter to the matcher
58 * @param {RegExpFilter} filter 85 * @param {RegExpFilter} filter
59 */ 86 */
60 add(filter) 87 add(filter)
61 { 88 {
62 if (this.keywordByFilter.has(filter))
63 return;
64
65 // Look for a suitable keyword 89 // Look for a suitable keyword
66 let keyword = this.findKeyword(filter); 90 let keyword = this.findKeyword(filter);
67 let oldEntry = this.filterByKeyword.get(keyword); 91 let set = this.filterByKeyword.get(keyword);
68 if (typeof oldEntry == "undefined") 92 if (typeof set == "undefined")
93 {
69 this.filterByKeyword.set(keyword, filter); 94 this.filterByKeyword.set(keyword, filter);
70 else if (oldEntry.length == 1) 95 }
71 this.filterByKeyword.set(keyword, [oldEntry, filter]); 96 else if (set.size == 1)
97 {
98 if (filter != set)
99 this.filterByKeyword.set(keyword, new Set([set, filter]));
100 }
72 else 101 else
73 oldEntry.push(filter); 102 {
74 this.keywordByFilter.set(filter, keyword); 103 set.add(filter);
104 }
75 } 105 }
76 106
77 /** 107 /**
78 * Removes a filter from the matcher 108 * Removes a filter from the matcher
79 * @param {RegExpFilter} filter 109 * @param {RegExpFilter} filter
80 */ 110 */
81 remove(filter) 111 remove(filter)
82 { 112 {
83 let keyword = this.keywordByFilter.get(filter); 113 let keyword = this.findKeyword(filter);
84 if (typeof keyword == "undefined") 114 let set = this.filterByKeyword.get(keyword);
115 if (typeof set == "undefined")
85 return; 116 return;
86 117
87 let list = this.filterByKeyword.get(keyword); 118 if (set.size == 1)
88 if (list.length <= 1) 119 {
89 this.filterByKeyword.delete(keyword); 120 if (filter == set)
121 this.filterByKeyword.delete(keyword);
122 }
90 else 123 else
91 { 124 {
92 let index = list.indexOf(filter); 125 set.delete(filter);
93 if (index >= 0) 126
94 { 127 if (set.size == 1)
95 list.splice(index, 1); 128 this.filterByKeyword.set(keyword, [...set][0]);
96 if (list.length == 1) 129 }
97 this.filterByKeyword.set(keyword, list[0]);
98 }
99 }
100
101 this.keywordByFilter.delete(filter);
102 } 130 }
103 131
104 /** 132 /**
105 * Chooses a keyword to be associated with the filter 133 * Chooses a keyword to be associated with the filter
106 * @param {Filter} filter 134 * @param {Filter} filter
107 * @returns {string} keyword or an empty string if no keyword could be found 135 * @returns {string} keyword or an empty string if no keyword could be found
108 */ 136 */
109 findKeyword(filter) 137 findKeyword(filter)
110 { 138 {
111 let result = ""; 139 let result = "";
112 let {pattern} = filter; 140 let {pattern} = filter;
113 if (pattern == null) 141 if (pattern == null)
114 return result; 142 return result;
115 143
116 let candidates = pattern.toLowerCase().match( 144 let candidates = pattern.toLowerCase().match(allKeywordsRegExp);
117 /[^a-z0-9%*][a-z0-9%]{3,}(?=[^a-z0-9%*])/g
118 );
119 if (!candidates) 145 if (!candidates)
120 return result; 146 return result;
121 147
122 let hash = this.filterByKeyword; 148 let hash = this.filterByKeyword;
123 let resultCount = 0xFFFFFF; 149 let resultCount = 0xFFFFFF;
124 let resultLength = 0; 150 let resultLength = 0;
125 for (let i = 0, l = candidates.length; i < l; i++) 151 for (let i = 0, l = candidates.length; i < l; i++)
126 { 152 {
127 let candidate = candidates[i].substr(1); 153 let candidate = candidates[i].substr(1);
128 let filters = hash.get(candidate); 154 let filters = hash.get(candidate);
129 let count = typeof filters != "undefined" ? filters.length : 0; 155 let count = typeof filters != "undefined" ? filters.size : 0;
130 if (count < resultCount || 156 if (count < resultCount ||
131 (count == resultCount && candidate.length > resultLength)) 157 (count == resultCount && candidate.length > resultLength))
132 { 158 {
133 result = candidate; 159 result = candidate;
134 resultCount = count; 160 resultCount = count;
135 resultLength = candidate.length; 161 resultLength = candidate.length;
136 } 162 }
137 } 163 }
138 return result; 164 return result;
139 }
140
141 /**
142 * Checks whether a particular filter is being matched against.
143 * @param {RegExpFilter} filter
144 * @returns {boolean}
145 */
146 hasFilter(filter)
147 {
148 return this.keywordByFilter.has(filter);
149 }
150
151 /**
152 * Returns the keyword used for a filter, <code>null</code>
153 * for unknown filters.
154 * @param {RegExpFilter} filter
155 * @returns {?string}
156 */
157 getKeywordForFilter(filter)
158 {
159 let keyword = this.keywordByFilter.get(filter);
160 return typeof keyword != "undefined" ? keyword : null;
161 } 165 }
162 166
163 /** 167 /**
164 * Checks whether the entries for a particular keyword match a URL 168 * Checks whether the entries for a particular keyword match a URL
165 * @param {string} keyword 169 * @param {string} keyword
166 * @param {string} location 170 * @param {string} location
167 * @param {number} typeMask 171 * @param {number} typeMask
168 * @param {string} [docDomain] 172 * @param {string} [docDomain]
169 * @param {boolean} [thirdParty] 173 * @param {boolean} [thirdParty]
170 * @param {string} [sitekey] 174 * @param {string} [sitekey]
171 * @param {boolean} [specificOnly] 175 * @param {boolean} [specificOnly]
172 * @returns {?Filter} 176 * @returns {?Filter}
173 */ 177 */
174 _checkEntryMatch(keyword, location, typeMask, docDomain, thirdParty, sitekey, 178 _checkEntryMatch(keyword, location, typeMask, docDomain, thirdParty, sitekey,
175 specificOnly) 179 specificOnly)
176 { 180 {
177 let list = this.filterByKeyword.get(keyword); 181 let set = this.filterByKeyword.get(keyword);
178 if (typeof list == "undefined") 182 if (typeof set == "undefined")
179 return null; 183 return null;
180 for (let i = 0; i < list.length; i++) 184
181 { 185 for (let filter of set)
182 let filter = list[i]; 186 {
183
184 if (specificOnly && filter.isGeneric() && 187 if (specificOnly && filter.isGeneric() &&
185 !(filter instanceof WhitelistFilter)) 188 !(filter instanceof WhitelistFilter))
186 continue; 189 continue;
187 190
188 if (filter.matches(location, typeMask, docDomain, thirdParty, sitekey)) 191 if (filter.matches(location, typeMask, docDomain, thirdParty, sitekey))
189 return filter; 192 return filter;
190 } 193 }
191 return null; 194 return null;
192 } 195 }
193 196
(...skipping 112 matching lines...) Expand 10 before | Expand all | Expand 10 after
306 * @returns {string} keyword 309 * @returns {string} keyword
307 */ 310 */
308 findKeyword(filter) 311 findKeyword(filter)
309 { 312 {
310 if (filter instanceof WhitelistFilter) 313 if (filter instanceof WhitelistFilter)
311 return this.whitelist.findKeyword(filter); 314 return this.whitelist.findKeyword(filter);
312 return this.blacklist.findKeyword(filter); 315 return this.blacklist.findKeyword(filter);
313 } 316 }
314 317
315 /** 318 /**
316 * @see Matcher#hasFilter
317 * @param {Filter} filter
318 * @returns {boolean}
319 */
320 hasFilter(filter)
321 {
322 if (filter instanceof WhitelistFilter)
323 return this.whitelist.hasFilter(filter);
324 return this.blacklist.hasFilter(filter);
325 }
326
327 /**
328 * @see Matcher#getKeywordForFilter
329 * @param {Filter} filter
330 * @returns {string} keyword
331 */
332 getKeywordForFilter(filter)
333 {
334 if (filter instanceof WhitelistFilter)
335 return this.whitelist.getKeywordForFilter(filter);
336 return this.blacklist.getKeywordForFilter(filter);
337 }
338
339 /**
340 * Checks whether a particular filter is slow
341 * @param {RegExpFilter} filter
342 * @returns {boolean}
343 */
344 isSlowFilter(filter)
345 {
346 let matcher = (
347 filter instanceof WhitelistFilter ? this.whitelist : this.blacklist
348 );
349 let keyword = matcher.getKeywordForFilter(filter);
350 if (keyword != null)
351 return !keyword;
352 return !matcher.findKeyword(filter);
353 }
354
355 /**
356 * Optimized filter matching testing both whitelist and blacklist matchers 319 * Optimized filter matching testing both whitelist and blacklist matchers
357 * simultaneously. For parameters see 320 * simultaneously. For parameters see
358 {@link Matcher#matchesAny Matcher.matchesAny()}. 321 {@link Matcher#matchesAny Matcher.matchesAny()}.
359 * @see Matcher#matchesAny 322 * @see Matcher#matchesAny
360 * @inheritdoc 323 * @inheritdoc
361 */ 324 */
362 matchesAnyInternal(location, typeMask, docDomain, thirdParty, sitekey, 325 matchesAnyInternal(location, typeMask, docDomain, thirdParty, sitekey,
363 specificOnly) 326 specificOnly)
364 { 327 {
365 let candidates = location.toLowerCase().match(/[a-z0-9%]{3,}/g); 328 let candidates = location.toLowerCase().match(/[a-z0-9%]{3,}/g);
366 if (candidates === null) 329 if (candidates === null)
367 candidates = []; 330 candidates = [];
368 candidates.push(""); 331 candidates.push("");
369 332
370 let whitelistHit = null; 333 let whitelistHit = null;
371 let blacklistHit = null; 334 let blacklistHit = null;
372 335
373 for (let i = 0, l = candidates.length; i < l; i++) 336 // If the type mask includes no types other than whitelist-only types, we
374 { 337 // can skip the blacklist.
375 let substr = candidates[i]; 338 if ((typeMask & ~WHITELIST_ONLY_TYPES) != 0)
376 blacklistHit = this.blacklist._checkEntryMatch( 339 {
377 substr, location, typeMask, docDomain, thirdParty, sitekey, 340 for (let i = 0, l = candidates.length; !blacklistHit && i < l; i++)
378 specificOnly
379 );
380 if (blacklistHit)
381 break;
382 }
383
384 if (blacklistHit)
385 {
386 for (let i = 0, l = candidates.length; i < l; i++)
387 { 341 {
388 let substr = candidates[i]; 342 blacklistHit = this.blacklist._checkEntryMatch(candidates[i], location,
389 whitelistHit = this.whitelist._checkEntryMatch( 343 typeMask, docDomain,
390 substr, location, typeMask, docDomain, thirdParty, sitekey 344 thirdParty, sitekey,
391 ); 345 specificOnly);
392 if (whitelistHit) 346 }
393 break; 347 }
348
349 // If the type mask includes any whitelist-only types, we need to check the
350 // whitelist.
351 if (blacklistHit || (typeMask & WHITELIST_ONLY_TYPES) != 0)
352 {
353 for (let i = 0, l = candidates.length; !whitelistHit && i < l; i++)
354 {
355 whitelistHit = this.whitelist._checkEntryMatch(candidates[i], location,
356 typeMask, docDomain,
357 thirdParty, sitekey);
394 } 358 }
395 } 359 }
396 360
397 return whitelistHit || blacklistHit; 361 return whitelistHit || blacklistHit;
398 } 362 }
399 363
400 /** 364 /**
401 * @see Matcher#matchesAny 365 * @see Matcher#matchesAny
402 * @inheritdoc 366 * @inheritdoc
403 */ 367 */
(...skipping 20 matching lines...) Expand all
424 388
425 exports.CombinedMatcher = CombinedMatcher; 389 exports.CombinedMatcher = CombinedMatcher;
426 390
427 /** 391 /**
428 * Shared {@link CombinedMatcher} instance that should usually be used. 392 * Shared {@link CombinedMatcher} instance that should usually be used.
429 * @type {CombinedMatcher} 393 * @type {CombinedMatcher}
430 */ 394 */
431 let defaultMatcher = new CombinedMatcher(); 395 let defaultMatcher = new CombinedMatcher();
432 396
433 exports.defaultMatcher = defaultMatcher; 397 exports.defaultMatcher = defaultMatcher;
LEFTRIGHT
« no previous file | no next file » | Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Toggle Comments ('s')

Powered by Google App Engine
This is Rietveld