OLD | NEW |
1 /* | 1 /* |
2 * This file is part of Adblock Plus <https://adblockplus.org/>, | 2 * This file is part of Adblock Plus <https://adblockplus.org/>, |
3 * Copyright (C) 2006-present eyeo GmbH | 3 * Copyright (C) 2006-present eyeo GmbH |
4 * | 4 * |
5 * Adblock Plus is free software: you can redistribute it and/or modify | 5 * Adblock Plus is free software: you can redistribute it and/or modify |
6 * it under the terms of the GNU General Public License version 3 as | 6 * it under the terms of the GNU General Public License version 3 as |
7 * published by the Free Software Foundation. | 7 * published by the Free Software Foundation. |
8 * | 8 * |
9 * Adblock Plus is distributed in the hope that it will be useful, | 9 * Adblock Plus is distributed in the hope that it will be useful, |
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of | 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
12 * GNU General Public License for more details. | 12 * GNU General Public License for more details. |
13 * | 13 * |
14 * You should have received a copy of the GNU General Public License | 14 * You should have received a copy of the GNU General Public License |
15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. | 15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. |
16 */ | 16 */ |
17 | 17 |
18 "use strict"; | 18 "use strict"; |
19 | 19 |
20 /** | 20 const compiled = require("compiled"); |
21 * @fileOverview Matcher class implementing matching addresses against | 21 for (let cls of ["Matcher", "CombinedMatcher"]) |
22 * a list of filters. | |
23 */ | |
24 | |
25 const {Filter, WhitelistFilter} = require("filterClasses"); | |
26 | |
27 /** | |
28 * Blacklist/whitelist filter matching | |
29 * @constructor | |
30 */ | |
31 function Matcher() | |
32 { | 22 { |
33 this.clear(); | 23 exports[cls] = compiled[cls]; |
34 } | 24 } |
35 exports.Matcher = Matcher; | 25 exports["defaultMatcher"] = new exports["CombinedMatcher"](); |
36 | |
37 Matcher.prototype = { | |
38 /** | |
39 * Lookup table for filters by their associated keyword | |
40 * @type {Object} | |
41 */ | |
42 filterByKeyword: null, | |
43 | |
44 /** | |
45 * Lookup table for keywords by the filter text | |
46 * @type {Object} | |
47 */ | |
48 keywordByFilter: null, | |
49 | |
50 /** | |
51 * Removes all known filters | |
52 */ | |
53 clear() | |
54 { | |
55 this.filterByKeyword = Object.create(null); | |
56 this.keywordByFilter = Object.create(null); | |
57 }, | |
58 | |
59 /** | |
60 * Adds a filter to the matcher | |
61 * @param {RegExpFilter} filter | |
62 */ | |
63 add(filter) | |
64 { | |
65 if (filter.text in this.keywordByFilter) | |
66 return; | |
67 | |
68 // Look for a suitable keyword | |
69 let keyword = this.findKeyword(filter); | |
70 let oldEntry = this.filterByKeyword[keyword]; | |
71 if (typeof oldEntry == "undefined") | |
72 this.filterByKeyword[keyword] = filter; | |
73 else if (oldEntry.length == 1) | |
74 this.filterByKeyword[keyword] = [oldEntry, filter]; | |
75 else | |
76 oldEntry.push(filter); | |
77 this.keywordByFilter[filter.text] = keyword; | |
78 }, | |
79 | |
80 /** | |
81 * Removes a filter from the matcher | |
82 * @param {RegExpFilter} filter | |
83 */ | |
84 remove(filter) | |
85 { | |
86 if (!(filter.text in this.keywordByFilter)) | |
87 return; | |
88 | |
89 let keyword = this.keywordByFilter[filter.text]; | |
90 let list = this.filterByKeyword[keyword]; | |
91 if (list.length <= 1) | |
92 delete this.filterByKeyword[keyword]; | |
93 else | |
94 { | |
95 let index = list.indexOf(filter); | |
96 if (index >= 0) | |
97 { | |
98 list.splice(index, 1); | |
99 if (list.length == 1) | |
100 this.filterByKeyword[keyword] = list[0]; | |
101 } | |
102 } | |
103 | |
104 delete this.keywordByFilter[filter.text]; | |
105 }, | |
106 | |
107 /** | |
108 * Chooses a keyword to be associated with the filter | |
109 * @param {Filter} filter | |
110 * @return {string} keyword or an empty string if no keyword could be found | |
111 */ | |
112 findKeyword(filter) | |
113 { | |
114 let result = ""; | |
115 let {text} = filter; | |
116 if (Filter.regexpRegExp.test(text)) | |
117 return result; | |
118 | |
119 // Remove options | |
120 let match = Filter.optionsRegExp.exec(text); | |
121 if (match) | |
122 text = match.input.substr(0, match.index); | |
123 | |
124 // Remove whitelist marker | |
125 if (text.substr(0, 2) == "@@") | |
126 text = text.substr(2); | |
127 | |
128 let candidates = text.toLowerCase().match( | |
129 /[^a-z0-9%*][a-z0-9%]{3,}(?=[^a-z0-9%*])/g | |
130 ); | |
131 if (!candidates) | |
132 return result; | |
133 | |
134 let hash = this.filterByKeyword; | |
135 let resultCount = 0xFFFFFF; | |
136 let resultLength = 0; | |
137 for (let i = 0, l = candidates.length; i < l; i++) | |
138 { | |
139 let candidate = candidates[i].substr(1); | |
140 let count = (candidate in hash ? hash[candidate].length : 0); | |
141 if (count < resultCount || | |
142 (count == resultCount && candidate.length > resultLength)) | |
143 { | |
144 result = candidate; | |
145 resultCount = count; | |
146 resultLength = candidate.length; | |
147 } | |
148 } | |
149 return result; | |
150 }, | |
151 | |
152 /** | |
153 * Checks whether a particular filter is being matched against. | |
154 * @param {RegExpFilter} filter | |
155 * @return {boolean} | |
156 */ | |
157 hasFilter(filter) | |
158 { | |
159 return (filter.text in this.keywordByFilter); | |
160 }, | |
161 | |
162 /** | |
163 * Returns the keyword used for a filter, null for unknown filters. | |
164 * @param {RegExpFilter} filter | |
165 * @return {string} | |
166 */ | |
167 getKeywordForFilter(filter) | |
168 { | |
169 if (filter.text in this.keywordByFilter) | |
170 return this.keywordByFilter[filter.text]; | |
171 return null; | |
172 }, | |
173 | |
174 /** | |
175 * Checks whether the entries for a particular keyword match a URL | |
176 * @param {string} keyword | |
177 * @param {string} location | |
178 * @param {number} typeMask | |
179 * @param {string} docDomain | |
180 * @param {boolean} thirdParty | |
181 * @param {string} sitekey | |
182 * @param {boolean} specificOnly | |
183 * @return {?Filter} | |
184 */ | |
185 _checkEntryMatch(keyword, location, typeMask, docDomain, thirdParty, sitekey, | |
186 specificOnly) | |
187 { | |
188 let list = this.filterByKeyword[keyword]; | |
189 for (let i = 0; i < list.length; i++) | |
190 { | |
191 let filter = list[i]; | |
192 | |
193 if (specificOnly && filter.isGeneric() && | |
194 !(filter instanceof WhitelistFilter)) | |
195 continue; | |
196 | |
197 if (filter.matches(location, typeMask, docDomain, thirdParty, sitekey)) | |
198 return filter; | |
199 } | |
200 return null; | |
201 }, | |
202 | |
203 /** | |
204 * Tests whether the URL matches any of the known filters | |
205 * @param {string} location | |
206 * URL to be tested | |
207 * @param {number} typeMask | |
208 * bitmask of content / request types to match | |
209 * @param {string} docDomain | |
210 * domain name of the document that loads the URL | |
211 * @param {boolean} thirdParty | |
212 * should be true if the URL is a third-party request | |
213 * @param {string} sitekey | |
214 * public key provided by the document | |
215 * @param {boolean} specificOnly | |
216 * should be true if generic matches should be ignored | |
217 * @return {?RegExpFilter} | |
218 * matching filter or null | |
219 */ | |
220 matchesAny(location, typeMask, docDomain, thirdParty, sitekey, specificOnly) | |
221 { | |
222 let candidates = location.toLowerCase().match(/[a-z0-9%]{3,}/g); | |
223 if (candidates === null) | |
224 candidates = []; | |
225 candidates.push(""); | |
226 for (let i = 0, l = candidates.length; i < l; i++) | |
227 { | |
228 let substr = candidates[i]; | |
229 if (substr in this.filterByKeyword) | |
230 { | |
231 let result = this._checkEntryMatch(substr, location, typeMask, | |
232 docDomain, thirdParty, sitekey, | |
233 specificOnly); | |
234 if (result) | |
235 return result; | |
236 } | |
237 } | |
238 | |
239 return null; | |
240 } | |
241 }; | |
242 | |
243 /** | |
244 * Combines a matcher for blocking and exception rules, automatically sorts | |
245 * rules into two Matcher instances. | |
246 * @constructor | |
247 * @augments Matcher | |
248 */ | |
249 function CombinedMatcher() | |
250 { | |
251 this.blacklist = new Matcher(); | |
252 this.whitelist = new Matcher(); | |
253 this.resultCache = Object.create(null); | |
254 } | |
255 exports.CombinedMatcher = CombinedMatcher; | |
256 | |
257 /** | |
258 * Maximal number of matching cache entries to be kept | |
259 * @type {number} | |
260 */ | |
261 CombinedMatcher.maxCacheEntries = 1000; | |
262 | |
263 CombinedMatcher.prototype = | |
264 { | |
265 /** | |
266 * Matcher for blocking rules. | |
267 * @type {Matcher} | |
268 */ | |
269 blacklist: null, | |
270 | |
271 /** | |
272 * Matcher for exception rules. | |
273 * @type {Matcher} | |
274 */ | |
275 whitelist: null, | |
276 | |
277 /** | |
278 * Lookup table of previous matchesAny results | |
279 * @type {Object} | |
280 */ | |
281 resultCache: null, | |
282 | |
283 /** | |
284 * Number of entries in resultCache | |
285 * @type {number} | |
286 */ | |
287 cacheEntries: 0, | |
288 | |
289 /** | |
290 * @see Matcher#clear | |
291 */ | |
292 clear() | |
293 { | |
294 this.blacklist.clear(); | |
295 this.whitelist.clear(); | |
296 this.resultCache = Object.create(null); | |
297 this.cacheEntries = 0; | |
298 }, | |
299 | |
300 /** | |
301 * @see Matcher#add | |
302 * @param {Filter} filter | |
303 */ | |
304 add(filter) | |
305 { | |
306 if (filter instanceof WhitelistFilter) | |
307 this.whitelist.add(filter); | |
308 else | |
309 this.blacklist.add(filter); | |
310 | |
311 if (this.cacheEntries > 0) | |
312 { | |
313 this.resultCache = Object.create(null); | |
314 this.cacheEntries = 0; | |
315 } | |
316 }, | |
317 | |
318 /** | |
319 * @see Matcher#remove | |
320 * @param {Filter} filter | |
321 */ | |
322 remove(filter) | |
323 { | |
324 if (filter instanceof WhitelistFilter) | |
325 this.whitelist.remove(filter); | |
326 else | |
327 this.blacklist.remove(filter); | |
328 | |
329 if (this.cacheEntries > 0) | |
330 { | |
331 this.resultCache = Object.create(null); | |
332 this.cacheEntries = 0; | |
333 } | |
334 }, | |
335 | |
336 /** | |
337 * @see Matcher#findKeyword | |
338 * @param {Filter} filter | |
339 * @return {string} keyword | |
340 */ | |
341 findKeyword(filter) | |
342 { | |
343 if (filter instanceof WhitelistFilter) | |
344 return this.whitelist.findKeyword(filter); | |
345 return this.blacklist.findKeyword(filter); | |
346 }, | |
347 | |
348 /** | |
349 * @see Matcher#hasFilter | |
350 * @param {Filter} filter | |
351 * @return {boolean} | |
352 */ | |
353 hasFilter(filter) | |
354 { | |
355 if (filter instanceof WhitelistFilter) | |
356 return this.whitelist.hasFilter(filter); | |
357 return this.blacklist.hasFilter(filter); | |
358 }, | |
359 | |
360 /** | |
361 * @see Matcher#getKeywordForFilter | |
362 * @param {Filter} filter | |
363 * @return {string} keyword | |
364 */ | |
365 getKeywordForFilter(filter) | |
366 { | |
367 if (filter instanceof WhitelistFilter) | |
368 return this.whitelist.getKeywordForFilter(filter); | |
369 return this.blacklist.getKeywordForFilter(filter); | |
370 }, | |
371 | |
372 /** | |
373 * Checks whether a particular filter is slow | |
374 * @param {RegExpFilter} filter | |
375 * @return {boolean} | |
376 */ | |
377 isSlowFilter(filter) | |
378 { | |
379 let matcher = ( | |
380 filter instanceof WhitelistFilter ? this.whitelist : this.blacklist | |
381 ); | |
382 if (matcher.hasFilter(filter)) | |
383 return !matcher.getKeywordForFilter(filter); | |
384 return !matcher.findKeyword(filter); | |
385 }, | |
386 | |
387 /** | |
388 * Optimized filter matching testing both whitelist and blacklist matchers | |
389 * simultaneously. For parameters see Matcher.matchesAny(). | |
390 * @see Matcher#matchesAny | |
391 * @inheritdoc | |
392 */ | |
393 matchesAnyInternal(location, typeMask, docDomain, thirdParty, sitekey, | |
394 specificOnly) | |
395 { | |
396 let candidates = location.toLowerCase().match(/[a-z0-9%]{3,}/g); | |
397 if (candidates === null) | |
398 candidates = []; | |
399 candidates.push(""); | |
400 | |
401 let blacklistHit = null; | |
402 for (let i = 0, l = candidates.length; i < l; i++) | |
403 { | |
404 let substr = candidates[i]; | |
405 if (substr in this.whitelist.filterByKeyword) | |
406 { | |
407 let result = this.whitelist._checkEntryMatch( | |
408 substr, location, typeMask, docDomain, thirdParty, sitekey | |
409 ); | |
410 if (result) | |
411 return result; | |
412 } | |
413 if (substr in this.blacklist.filterByKeyword && blacklistHit === null) | |
414 { | |
415 blacklistHit = this.blacklist._checkEntryMatch( | |
416 substr, location, typeMask, docDomain, thirdParty, sitekey, | |
417 specificOnly | |
418 ); | |
419 } | |
420 } | |
421 return blacklistHit; | |
422 }, | |
423 | |
424 /** | |
425 * @see Matcher#matchesAny | |
426 * @inheritdoc | |
427 */ | |
428 matchesAny(location, typeMask, docDomain, thirdParty, sitekey, specificOnly) | |
429 { | |
430 let key = location + " " + typeMask + " " + docDomain + " " + thirdParty + | |
431 " " + sitekey + " " + specificOnly; | |
432 if (key in this.resultCache) | |
433 return this.resultCache[key]; | |
434 | |
435 let result = this.matchesAnyInternal(location, typeMask, docDomain, | |
436 thirdParty, sitekey, specificOnly); | |
437 | |
438 if (this.cacheEntries >= CombinedMatcher.maxCacheEntries) | |
439 { | |
440 this.resultCache = Object.create(null); | |
441 this.cacheEntries = 0; | |
442 } | |
443 | |
444 this.resultCache[key] = result; | |
445 this.cacheEntries++; | |
446 | |
447 return result; | |
448 } | |
449 }; | |
450 | |
451 /** | |
452 * Shared CombinedMatcher instance that should usually be used. | |
453 * @type {CombinedMatcher} | |
454 */ | |
455 exports.defaultMatcher = new CombinedMatcher(); | |
OLD | NEW |