OLD | NEW |
| (Empty) |
1 /* | |
2 * This file is part of Adblock Plus <https://adblockplus.org/>, | |
3 * Copyright (C) 2006-2016 Eyeo GmbH | |
4 * | |
5 * Adblock Plus is free software: you can redistribute it and/or modify | |
6 * it under the terms of the GNU General Public License version 3 as | |
7 * published by the Free Software Foundation. | |
8 * | |
9 * Adblock Plus is distributed in the hope that it will be useful, | |
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
12 * GNU General Public License for more details. | |
13 * | |
14 * You should have received a copy of the GNU General Public License | |
15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. | |
16 */ | |
17 | |
18 /** | |
19 * @fileOverview Matcher class implementing matching addresses against a list of
filters. | |
20 */ | |
21 | |
22 let {Filter, RegExpFilter, WhitelistFilter} = require("filterClasses"); | |
23 | |
24 /** | |
25 * Blacklist/whitelist filter matching | |
26 * @constructor | |
27 */ | |
28 function Matcher() | |
29 { | |
30 this.clear(); | |
31 } | |
32 exports.Matcher = Matcher; | |
33 | |
34 Matcher.prototype = { | |
35 /** | |
36 * Lookup table for filters by their associated keyword | |
37 * @type Object | |
38 */ | |
39 filterByKeyword: null, | |
40 | |
41 /** | |
42 * Lookup table for keywords by the filter text | |
43 * @type Object | |
44 */ | |
45 keywordByFilter: null, | |
46 | |
47 /** | |
48 * Removes all known filters | |
49 */ | |
50 clear: function() | |
51 { | |
52 this.filterByKeyword = Object.create(null); | |
53 this.keywordByFilter = Object.create(null); | |
54 }, | |
55 | |
56 /** | |
57 * Adds a filter to the matcher | |
58 * @param {RegExpFilter} filter | |
59 */ | |
60 add: function(filter) | |
61 { | |
62 if (filter.text in this.keywordByFilter) | |
63 return; | |
64 | |
65 // Look for a suitable keyword | |
66 let keyword = this.findKeyword(filter); | |
67 let oldEntry = this.filterByKeyword[keyword]; | |
68 if (typeof oldEntry == "undefined") | |
69 this.filterByKeyword[keyword] = filter; | |
70 else if (oldEntry.length == 1) | |
71 this.filterByKeyword[keyword] = [oldEntry, filter]; | |
72 else | |
73 oldEntry.push(filter); | |
74 this.keywordByFilter[filter.text] = keyword; | |
75 }, | |
76 | |
77 /** | |
78 * Removes a filter from the matcher | |
79 * @param {RegExpFilter} filter | |
80 */ | |
81 remove: function(filter) | |
82 { | |
83 if (!(filter.text in this.keywordByFilter)) | |
84 return; | |
85 | |
86 let keyword = this.keywordByFilter[filter.text]; | |
87 let list = this.filterByKeyword[keyword]; | |
88 if (list.length <= 1) | |
89 delete this.filterByKeyword[keyword]; | |
90 else | |
91 { | |
92 let index = list.indexOf(filter); | |
93 if (index >= 0) | |
94 { | |
95 list.splice(index, 1); | |
96 if (list.length == 1) | |
97 this.filterByKeyword[keyword] = list[0]; | |
98 } | |
99 } | |
100 | |
101 delete this.keywordByFilter[filter.text]; | |
102 }, | |
103 | |
104 /** | |
105 * Chooses a keyword to be associated with the filter | |
106 * @param {String} text text representation of the filter | |
107 * @return {String} keyword (might be empty string) | |
108 */ | |
109 findKeyword: function(filter) | |
110 { | |
111 let result = ""; | |
112 let text = filter.text; | |
113 if (Filter.regexpRegExp.test(text)) | |
114 return result; | |
115 | |
116 // Remove options | |
117 let match = Filter.optionsRegExp.exec(text); | |
118 if (match) | |
119 text = match.input.substr(0, match.index); | |
120 | |
121 // Remove whitelist marker | |
122 if (text.substr(0, 2) == "@@") | |
123 text = text.substr(2); | |
124 | |
125 let candidates = text.toLowerCase().match(/[^a-z0-9%*][a-z0-9%]{3,}(?=[^a-z0
-9%*])/g); | |
126 if (!candidates) | |
127 return result; | |
128 | |
129 let hash = this.filterByKeyword; | |
130 let resultCount = 0xFFFFFF; | |
131 let resultLength = 0; | |
132 for (let i = 0, l = candidates.length; i < l; i++) | |
133 { | |
134 let candidate = candidates[i].substr(1); | |
135 let count = (candidate in hash ? hash[candidate].length : 0); | |
136 if (count < resultCount || (count == resultCount && candidate.length > res
ultLength)) | |
137 { | |
138 result = candidate; | |
139 resultCount = count; | |
140 resultLength = candidate.length; | |
141 } | |
142 } | |
143 return result; | |
144 }, | |
145 | |
146 /** | |
147 * Checks whether a particular filter is being matched against. | |
148 */ | |
149 hasFilter: function(/**RegExpFilter*/ filter) /**Boolean*/ | |
150 { | |
151 return (filter.text in this.keywordByFilter); | |
152 }, | |
153 | |
154 /** | |
155 * Returns the keyword used for a filter, null for unknown filters. | |
156 */ | |
157 getKeywordForFilter: function(/**RegExpFilter*/ filter) /**String*/ | |
158 { | |
159 if (filter.text in this.keywordByFilter) | |
160 return this.keywordByFilter[filter.text]; | |
161 else | |
162 return null; | |
163 }, | |
164 | |
165 /** | |
166 * Checks whether the entries for a particular keyword match a URL | |
167 */ | |
168 _checkEntryMatch: function(keyword, location, typeMask, docDomain, thirdParty,
sitekey, specificOnly) | |
169 { | |
170 let list = this.filterByKeyword[keyword]; | |
171 for (let i = 0; i < list.length; i++) | |
172 { | |
173 let filter = list[i]; | |
174 | |
175 if (specificOnly && filter.isGeneric() && | |
176 !(filter instanceof WhitelistFilter)) | |
177 continue; | |
178 | |
179 if (filter.matches(location, typeMask, docDomain, thirdParty, sitekey)) | |
180 return filter; | |
181 } | |
182 return null; | |
183 }, | |
184 | |
185 /** | |
186 * Tests whether the URL matches any of the known filters | |
187 * @param {String} location URL to be tested | |
188 * @param {String} typeMask bitmask of content / request types to match | |
189 * @param {String} docDomain domain name of the document that loads the URL | |
190 * @param {Boolean} thirdParty should be true if the URL is a third-party requ
est | |
191 * @param {String} sitekey public key provided by the document | |
192 * @param {Boolean} specificOnly should be true if generic matches should be i
gnored | |
193 * @return {RegExpFilter} matching filter or null | |
194 */ | |
195 matchesAny: function(location, typeMask, docDomain, thirdParty, sitekey, speci
ficOnly) | |
196 { | |
197 let candidates = location.toLowerCase().match(/[a-z0-9%]{3,}/g); | |
198 if (candidates === null) | |
199 candidates = []; | |
200 candidates.push(""); | |
201 for (let i = 0, l = candidates.length; i < l; i++) | |
202 { | |
203 let substr = candidates[i]; | |
204 if (substr in this.filterByKeyword) | |
205 { | |
206 let result = this._checkEntryMatch(substr, location, typeMask, docDomain
, thirdParty, sitekey, specificOnly); | |
207 if (result) | |
208 return result; | |
209 } | |
210 } | |
211 | |
212 return null; | |
213 } | |
214 }; | |
215 | |
216 /** | |
217 * Combines a matcher for blocking and exception rules, automatically sorts | |
218 * rules into two Matcher instances. | |
219 * @constructor | |
220 */ | |
221 function CombinedMatcher() | |
222 { | |
223 this.blacklist = new Matcher(); | |
224 this.whitelist = new Matcher(); | |
225 this.resultCache = Object.create(null); | |
226 } | |
227 exports.CombinedMatcher = CombinedMatcher; | |
228 | |
229 /** | |
230 * Maximal number of matching cache entries to be kept | |
231 * @type Number | |
232 */ | |
233 CombinedMatcher.maxCacheEntries = 1000; | |
234 | |
235 CombinedMatcher.prototype = | |
236 { | |
237 /** | |
238 * Matcher for blocking rules. | |
239 * @type Matcher | |
240 */ | |
241 blacklist: null, | |
242 | |
243 /** | |
244 * Matcher for exception rules. | |
245 * @type Matcher | |
246 */ | |
247 whitelist: null, | |
248 | |
249 /** | |
250 * Lookup table of previous matchesAny results | |
251 * @type Object | |
252 */ | |
253 resultCache: null, | |
254 | |
255 /** | |
256 * Number of entries in resultCache | |
257 * @type Number | |
258 */ | |
259 cacheEntries: 0, | |
260 | |
261 /** | |
262 * @see Matcher#clear | |
263 */ | |
264 clear: function() | |
265 { | |
266 this.blacklist.clear(); | |
267 this.whitelist.clear(); | |
268 this.resultCache = Object.create(null); | |
269 this.cacheEntries = 0; | |
270 }, | |
271 | |
272 /** | |
273 * @see Matcher#add | |
274 */ | |
275 add: function(filter) | |
276 { | |
277 if (filter instanceof WhitelistFilter) | |
278 this.whitelist.add(filter); | |
279 else | |
280 this.blacklist.add(filter); | |
281 | |
282 if (this.cacheEntries > 0) | |
283 { | |
284 this.resultCache = Object.create(null); | |
285 this.cacheEntries = 0; | |
286 } | |
287 }, | |
288 | |
289 /** | |
290 * @see Matcher#remove | |
291 */ | |
292 remove: function(filter) | |
293 { | |
294 if (filter instanceof WhitelistFilter) | |
295 this.whitelist.remove(filter); | |
296 else | |
297 this.blacklist.remove(filter); | |
298 | |
299 if (this.cacheEntries > 0) | |
300 { | |
301 this.resultCache = Object.create(null); | |
302 this.cacheEntries = 0; | |
303 } | |
304 }, | |
305 | |
306 /** | |
307 * @see Matcher#findKeyword | |
308 */ | |
309 findKeyword: function(filter) | |
310 { | |
311 if (filter instanceof WhitelistFilter) | |
312 return this.whitelist.findKeyword(filter); | |
313 else | |
314 return this.blacklist.findKeyword(filter); | |
315 }, | |
316 | |
317 /** | |
318 * @see Matcher#hasFilter | |
319 */ | |
320 hasFilter: function(filter) | |
321 { | |
322 if (filter instanceof WhitelistFilter) | |
323 return this.whitelist.hasFilter(filter); | |
324 else | |
325 return this.blacklist.hasFilter(filter); | |
326 }, | |
327 | |
328 /** | |
329 * @see Matcher#getKeywordForFilter | |
330 */ | |
331 getKeywordForFilter: function(filter) | |
332 { | |
333 if (filter instanceof WhitelistFilter) | |
334 return this.whitelist.getKeywordForFilter(filter); | |
335 else | |
336 return this.blacklist.getKeywordForFilter(filter); | |
337 }, | |
338 | |
339 /** | |
340 * Checks whether a particular filter is slow | |
341 */ | |
342 isSlowFilter: function(/**RegExpFilter*/ filter) /**Boolean*/ | |
343 { | |
344 let matcher = (filter instanceof WhitelistFilter ? this.whitelist : this.bla
cklist); | |
345 if (matcher.hasFilter(filter)) | |
346 return !matcher.getKeywordForFilter(filter); | |
347 else | |
348 return !matcher.findKeyword(filter); | |
349 }, | |
350 | |
351 /** | |
352 * Optimized filter matching testing both whitelist and blacklist matchers | |
353 * simultaneously. For parameters see Matcher.matchesAny(). | |
354 * @see Matcher#matchesAny | |
355 */ | |
356 matchesAnyInternal: function(location, typeMask, docDomain, thirdParty, siteke
y, specificOnly) | |
357 { | |
358 let candidates = location.toLowerCase().match(/[a-z0-9%]{3,}/g); | |
359 if (candidates === null) | |
360 candidates = []; | |
361 candidates.push(""); | |
362 | |
363 let blacklistHit = null; | |
364 for (let i = 0, l = candidates.length; i < l; i++) | |
365 { | |
366 let substr = candidates[i]; | |
367 if (substr in this.whitelist.filterByKeyword) | |
368 { | |
369 let result = this.whitelist._checkEntryMatch(substr, location, typeMask,
docDomain, thirdParty, sitekey); | |
370 if (result) | |
371 return result; | |
372 } | |
373 if (substr in this.blacklist.filterByKeyword && blacklistHit === null) | |
374 blacklistHit = this.blacklist._checkEntryMatch(substr, location, typeMas
k, docDomain, thirdParty, sitekey, specificOnly); | |
375 } | |
376 return blacklistHit; | |
377 }, | |
378 | |
379 /** | |
380 * @see Matcher#matchesAny | |
381 */ | |
382 matchesAny: function(location, typeMask, docDomain, thirdParty, sitekey, speci
ficOnly) | |
383 { | |
384 let key = location + " " + typeMask + " " + docDomain + " " + thirdParty + "
" + sitekey + " " + specificOnly; | |
385 if (key in this.resultCache) | |
386 return this.resultCache[key]; | |
387 | |
388 let result = this.matchesAnyInternal(location, typeMask, docDomain, thirdPar
ty, sitekey, specificOnly); | |
389 | |
390 if (this.cacheEntries >= CombinedMatcher.maxCacheEntries) | |
391 { | |
392 this.resultCache = Object.create(null); | |
393 this.cacheEntries = 0; | |
394 } | |
395 | |
396 this.resultCache[key] = result; | |
397 this.cacheEntries++; | |
398 | |
399 return result; | |
400 } | |
401 } | |
402 | |
403 /** | |
404 * Shared CombinedMatcher instance that should usually be used. | |
405 * @type CombinedMatcher | |
406 */ | |
407 let defaultMatcher = exports.defaultMatcher = new CombinedMatcher(); | |
OLD | NEW |