OLD | NEW |
1 /* | 1 /* |
2 * This file is part of Adblock Plus <https://adblockplus.org/>, | 2 * This file is part of Adblock Plus <https://adblockplus.org/>, |
3 * Copyright (C) 2006-present eyeo GmbH | 3 * Copyright (C) 2006-present eyeo GmbH |
4 * | 4 * |
5 * Adblock Plus is free software: you can redistribute it and/or modify | 5 * Adblock Plus is free software: you can redistribute it and/or modify |
6 * it under the terms of the GNU General Public License version 3 as | 6 * it under the terms of the GNU General Public License version 3 as |
7 * published by the Free Software Foundation. | 7 * published by the Free Software Foundation. |
8 * | 8 * |
9 * Adblock Plus is distributed in the hope that it will be useful, | 9 * Adblock Plus is distributed in the hope that it will be useful, |
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of | 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
12 * GNU General Public License for more details. | 12 * GNU General Public License for more details. |
13 * | 13 * |
14 * You should have received a copy of the GNU General Public License | 14 * You should have received a copy of the GNU General Public License |
15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. | 15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. |
16 */ | 16 */ |
17 | 17 |
18 "use strict"; | 18 "use strict"; |
19 | 19 |
20 /** | 20 /** |
21 * @fileOverview Matcher class implementing matching addresses against | 21 * @fileOverview Matcher class implementing matching addresses against |
22 * a list of filters. | 22 * a list of filters. |
23 */ | 23 */ |
24 | 24 |
25 const {WhitelistFilter} = require("./filterClasses"); | 25 const {WhitelistFilter} = require("./filterClasses"); |
26 | 26 |
27 /** | 27 /** |
| 28 * Regular expression for matching a keyword in a filter. |
| 29 * @type {RegExp} |
| 30 */ |
| 31 const keywordRegExp = /[^a-z0-9%*][a-z0-9%]{3,}(?=[^a-z0-9%*])/; |
| 32 |
| 33 /** |
| 34 * Regular expression for matching all keywords in a filter. |
| 35 * @type {RegExp} |
| 36 */ |
| 37 const allKeywordsRegExp = new RegExp(keywordRegExp, "g"); |
| 38 |
| 39 /** |
| 40 * Checks whether a particular filter is slow. |
| 41 * @param {RegExpFilter} filter |
| 42 * @returns {boolean} |
| 43 */ |
| 44 function isSlowFilter(filter) |
| 45 { |
| 46 return !filter.pattern || !keywordRegExp.test(filter.pattern); |
| 47 } |
| 48 |
| 49 exports.isSlowFilter = isSlowFilter; |
| 50 |
| 51 /** |
28 * Blacklist/whitelist filter matching | 52 * Blacklist/whitelist filter matching |
29 */ | 53 */ |
30 class Matcher | 54 class Matcher |
31 { | 55 { |
32 constructor() | 56 constructor() |
33 { | 57 { |
34 /** | 58 /** |
35 * Lookup table for filters by their associated keyword | 59 * Lookup table for filters by their associated keyword |
36 * @type {Map.<string,(Filter|Filter[])>} | 60 * @type {Map.<string,(Filter|Set.<Filter>)>} |
37 */ | 61 */ |
38 this.filterByKeyword = new Map(); | 62 this.filterByKeyword = new Map(); |
39 | |
40 /** | |
41 * Lookup table for keywords by the filter | |
42 * @type {Map.<Filter,string>} | |
43 */ | |
44 this.keywordByFilter = new Map(); | |
45 } | 63 } |
46 | 64 |
47 /** | 65 /** |
48 * Removes all known filters | 66 * Removes all known filters |
49 */ | 67 */ |
50 clear() | 68 clear() |
51 { | 69 { |
52 this.filterByKeyword.clear(); | 70 this.filterByKeyword.clear(); |
53 this.keywordByFilter.clear(); | |
54 } | 71 } |
55 | 72 |
56 /** | 73 /** |
57 * Adds a filter to the matcher | 74 * Adds a filter to the matcher |
58 * @param {RegExpFilter} filter | 75 * @param {RegExpFilter} filter |
59 */ | 76 */ |
60 add(filter) | 77 add(filter) |
61 { | 78 { |
62 if (this.keywordByFilter.has(filter)) | |
63 return; | |
64 | |
65 // Look for a suitable keyword | 79 // Look for a suitable keyword |
66 let keyword = this.findKeyword(filter); | 80 let keyword = this.findKeyword(filter); |
67 let oldEntry = this.filterByKeyword.get(keyword); | 81 let set = this.filterByKeyword.get(keyword); |
68 if (typeof oldEntry == "undefined") | 82 if (typeof set == "undefined") |
| 83 { |
69 this.filterByKeyword.set(keyword, filter); | 84 this.filterByKeyword.set(keyword, filter); |
70 else if (oldEntry.length == 1) | 85 } |
71 this.filterByKeyword.set(keyword, [oldEntry, filter]); | 86 else if (set.size == 1) |
| 87 { |
| 88 if (filter != set) |
| 89 this.filterByKeyword.set(keyword, new Set([set, filter])); |
| 90 } |
72 else | 91 else |
73 oldEntry.push(filter); | 92 { |
74 this.keywordByFilter.set(filter, keyword); | 93 set.add(filter); |
| 94 } |
75 } | 95 } |
76 | 96 |
77 /** | 97 /** |
78 * Removes a filter from the matcher | 98 * Removes a filter from the matcher |
79 * @param {RegExpFilter} filter | 99 * @param {RegExpFilter} filter |
80 */ | 100 */ |
81 remove(filter) | 101 remove(filter) |
82 { | 102 { |
83 let keyword = this.keywordByFilter.get(filter); | 103 let keyword = this.findKeyword(filter); |
84 if (typeof keyword == "undefined") | 104 let set = this.filterByKeyword.get(keyword); |
| 105 if (typeof set == "undefined") |
85 return; | 106 return; |
86 | 107 |
87 let list = this.filterByKeyword.get(keyword); | 108 if (set.size == 1) |
88 if (list.length <= 1) | 109 { |
89 this.filterByKeyword.delete(keyword); | 110 if (filter == set) |
| 111 this.filterByKeyword.delete(keyword); |
| 112 } |
90 else | 113 else |
91 { | 114 { |
92 let index = list.indexOf(filter); | 115 set.delete(filter); |
93 if (index >= 0) | 116 |
94 { | 117 if (set.size == 1) |
95 list.splice(index, 1); | 118 this.filterByKeyword.set(keyword, [...set][0]); |
96 if (list.length == 1) | |
97 this.filterByKeyword.set(keyword, list[0]); | |
98 } | |
99 } | 119 } |
100 | |
101 this.keywordByFilter.delete(filter); | |
102 } | 120 } |
103 | 121 |
104 /** | 122 /** |
105 * Chooses a keyword to be associated with the filter | 123 * Chooses a keyword to be associated with the filter |
106 * @param {Filter} filter | 124 * @param {Filter} filter |
107 * @returns {string} keyword or an empty string if no keyword could be found | 125 * @returns {string} keyword or an empty string if no keyword could be found |
108 */ | 126 */ |
109 findKeyword(filter) | 127 findKeyword(filter) |
110 { | 128 { |
111 let result = ""; | 129 let result = ""; |
112 let {pattern} = filter; | 130 let {pattern} = filter; |
113 if (pattern == null) | 131 if (pattern == null) |
114 return result; | 132 return result; |
115 | 133 |
116 let candidates = pattern.toLowerCase().match( | 134 let candidates = pattern.toLowerCase().match(allKeywordsRegExp); |
117 /[^a-z0-9%*][a-z0-9%]{3,}(?=[^a-z0-9%*])/g | |
118 ); | |
119 if (!candidates) | 135 if (!candidates) |
120 return result; | 136 return result; |
121 | 137 |
122 let hash = this.filterByKeyword; | 138 let hash = this.filterByKeyword; |
123 let resultCount = 0xFFFFFF; | 139 let resultCount = 0xFFFFFF; |
124 let resultLength = 0; | 140 let resultLength = 0; |
125 for (let i = 0, l = candidates.length; i < l; i++) | 141 for (let i = 0, l = candidates.length; i < l; i++) |
126 { | 142 { |
127 let candidate = candidates[i].substr(1); | 143 let candidate = candidates[i].substr(1); |
128 let filters = hash.get(candidate); | 144 let filters = hash.get(candidate); |
129 let count = typeof filters != "undefined" ? filters.length : 0; | 145 let count = typeof filters != "undefined" ? filters.size : 0; |
130 if (count < resultCount || | 146 if (count < resultCount || |
131 (count == resultCount && candidate.length > resultLength)) | 147 (count == resultCount && candidate.length > resultLength)) |
132 { | 148 { |
133 result = candidate; | 149 result = candidate; |
134 resultCount = count; | 150 resultCount = count; |
135 resultLength = candidate.length; | 151 resultLength = candidate.length; |
136 } | 152 } |
137 } | 153 } |
138 return result; | 154 return result; |
139 } | 155 } |
140 | 156 |
141 /** | 157 /** |
142 * Checks whether a particular filter is being matched against. | |
143 * @param {RegExpFilter} filter | |
144 * @returns {boolean} | |
145 */ | |
146 hasFilter(filter) | |
147 { | |
148 return this.keywordByFilter.has(filter); | |
149 } | |
150 | |
151 /** | |
152 * Returns the keyword used for a filter, <code>null</code> | |
153 * for unknown filters. | |
154 * @param {RegExpFilter} filter | |
155 * @returns {?string} | |
156 */ | |
157 getKeywordForFilter(filter) | |
158 { | |
159 let keyword = this.keywordByFilter.get(filter); | |
160 return typeof keyword != "undefined" ? keyword : null; | |
161 } | |
162 | |
163 /** | |
164 * Checks whether the entries for a particular keyword match a URL | 158 * Checks whether the entries for a particular keyword match a URL |
165 * @param {string} keyword | 159 * @param {string} keyword |
166 * @param {string} location | 160 * @param {string} location |
167 * @param {number} typeMask | 161 * @param {number} typeMask |
168 * @param {string} [docDomain] | 162 * @param {string} [docDomain] |
169 * @param {boolean} [thirdParty] | 163 * @param {boolean} [thirdParty] |
170 * @param {string} [sitekey] | 164 * @param {string} [sitekey] |
171 * @param {boolean} [specificOnly] | 165 * @param {boolean} [specificOnly] |
172 * @returns {?Filter} | 166 * @returns {?Filter} |
173 */ | 167 */ |
174 _checkEntryMatch(keyword, location, typeMask, docDomain, thirdParty, sitekey, | 168 _checkEntryMatch(keyword, location, typeMask, docDomain, thirdParty, sitekey, |
175 specificOnly) | 169 specificOnly) |
176 { | 170 { |
177 let list = this.filterByKeyword.get(keyword); | 171 let set = this.filterByKeyword.get(keyword); |
178 if (typeof list == "undefined") | 172 if (typeof set == "undefined") |
179 return null; | 173 return null; |
180 for (let i = 0; i < list.length; i++) | 174 |
| 175 for (let filter of set) |
181 { | 176 { |
182 let filter = list[i]; | |
183 | |
184 if (specificOnly && filter.isGeneric() && | 177 if (specificOnly && filter.isGeneric() && |
185 !(filter instanceof WhitelistFilter)) | 178 !(filter instanceof WhitelistFilter)) |
186 continue; | 179 continue; |
187 | 180 |
188 if (filter.matches(location, typeMask, docDomain, thirdParty, sitekey)) | 181 if (filter.matches(location, typeMask, docDomain, thirdParty, sitekey)) |
189 return filter; | 182 return filter; |
190 } | 183 } |
191 return null; | 184 return null; |
192 } | 185 } |
193 | 186 |
(...skipping 112 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
306 * @returns {string} keyword | 299 * @returns {string} keyword |
307 */ | 300 */ |
308 findKeyword(filter) | 301 findKeyword(filter) |
309 { | 302 { |
310 if (filter instanceof WhitelistFilter) | 303 if (filter instanceof WhitelistFilter) |
311 return this.whitelist.findKeyword(filter); | 304 return this.whitelist.findKeyword(filter); |
312 return this.blacklist.findKeyword(filter); | 305 return this.blacklist.findKeyword(filter); |
313 } | 306 } |
314 | 307 |
315 /** | 308 /** |
316 * @see Matcher#hasFilter | |
317 * @param {Filter} filter | |
318 * @returns {boolean} | |
319 */ | |
320 hasFilter(filter) | |
321 { | |
322 if (filter instanceof WhitelistFilter) | |
323 return this.whitelist.hasFilter(filter); | |
324 return this.blacklist.hasFilter(filter); | |
325 } | |
326 | |
327 /** | |
328 * @see Matcher#getKeywordForFilter | |
329 * @param {Filter} filter | |
330 * @returns {string} keyword | |
331 */ | |
332 getKeywordForFilter(filter) | |
333 { | |
334 if (filter instanceof WhitelistFilter) | |
335 return this.whitelist.getKeywordForFilter(filter); | |
336 return this.blacklist.getKeywordForFilter(filter); | |
337 } | |
338 | |
339 /** | |
340 * Checks whether a particular filter is slow | |
341 * @param {RegExpFilter} filter | |
342 * @returns {boolean} | |
343 */ | |
344 isSlowFilter(filter) | |
345 { | |
346 let matcher = ( | |
347 filter instanceof WhitelistFilter ? this.whitelist : this.blacklist | |
348 ); | |
349 let keyword = matcher.getKeywordForFilter(filter); | |
350 if (keyword != null) | |
351 return !keyword; | |
352 return !matcher.findKeyword(filter); | |
353 } | |
354 | |
355 /** | |
356 * Optimized filter matching testing both whitelist and blacklist matchers | 309 * Optimized filter matching testing both whitelist and blacklist matchers |
357 * simultaneously. For parameters see | 310 * simultaneously. For parameters see |
358 {@link Matcher#matchesAny Matcher.matchesAny()}. | 311 {@link Matcher#matchesAny Matcher.matchesAny()}. |
359 * @see Matcher#matchesAny | 312 * @see Matcher#matchesAny |
360 * @inheritdoc | 313 * @inheritdoc |
361 */ | 314 */ |
362 matchesAnyInternal(location, typeMask, docDomain, thirdParty, sitekey, | 315 matchesAnyInternal(location, typeMask, docDomain, thirdParty, sitekey, |
363 specificOnly) | 316 specificOnly) |
364 { | 317 { |
365 let candidates = location.toLowerCase().match(/[a-z0-9%]{3,}/g); | 318 let candidates = location.toLowerCase().match(/[a-z0-9%]{3,}/g); |
(...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
414 | 367 |
415 exports.CombinedMatcher = CombinedMatcher; | 368 exports.CombinedMatcher = CombinedMatcher; |
416 | 369 |
417 /** | 370 /** |
418 * Shared {@link CombinedMatcher} instance that should usually be used. | 371 * Shared {@link CombinedMatcher} instance that should usually be used. |
419 * @type {CombinedMatcher} | 372 * @type {CombinedMatcher} |
420 */ | 373 */ |
421 let defaultMatcher = new CombinedMatcher(); | 374 let defaultMatcher = new CombinedMatcher(); |
422 | 375 |
423 exports.defaultMatcher = defaultMatcher; | 376 exports.defaultMatcher = defaultMatcher; |
OLD | NEW |