Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Side by Side Diff: lib/matcher.js

Issue 29556737: Issue 5141 - Convert filter match to C++ (Closed) Base URL: https://hg.adblockplus.org/adblockpluscore/
Patch Set: Addressed most of the comment. Fixed some issues. Created Oct. 3, 2017, 7:31 p.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 * This file is part of Adblock Plus <https://adblockplus.org/>, 2 * This file is part of Adblock Plus <https://adblockplus.org/>,
3 * Copyright (C) 2006-present eyeo GmbH 3 * Copyright (C) 2006-present eyeo GmbH
4 * 4 *
5 * Adblock Plus is free software: you can redistribute it and/or modify 5 * Adblock Plus is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 3 as 6 * it under the terms of the GNU General Public License version 3 as
7 * published by the Free Software Foundation. 7 * published by the Free Software Foundation.
8 * 8 *
9 * Adblock Plus is distributed in the hope that it will be useful, 9 * Adblock Plus is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details. 12 * GNU General Public License for more details.
13 * 13 *
14 * You should have received a copy of the GNU General Public License 14 * You should have received a copy of the GNU General Public License
15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. 15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.
16 */ 16 */
17 17
18 "use strict"; 18 "use strict";
19 19
20 /** 20 const compiled = require("compiled");
21 * @fileOverview Matcher class implementing matching addresses against 21 for (let cls of ["Matcher", "CombinedMatcher"])
22 * a list of filters.
23 */
24
25 const {Filter, WhitelistFilter} = require("filterClasses");
26
27 /**
28 * Blacklist/whitelist filter matching
29 * @constructor
30 */
31 function Matcher()
32 { 22 {
33 this.clear(); 23 exports[cls] = compiled[cls];
34 } 24 }
35 exports.Matcher = Matcher; 25 exports["defaultMatcher"] = new exports["CombinedMatcher"]();
36
37 Matcher.prototype = {
38 /**
39 * Lookup table for filters by their associated keyword
40 * @type {Object}
41 */
42 filterByKeyword: null,
43
44 /**
45 * Lookup table for keywords by the filter text
46 * @type {Object}
47 */
48 keywordByFilter: null,
49
50 /**
51 * Removes all known filters
52 */
53 clear()
54 {
55 this.filterByKeyword = Object.create(null);
56 this.keywordByFilter = Object.create(null);
57 },
58
59 /**
60 * Adds a filter to the matcher
61 * @param {RegExpFilter} filter
62 */
63 add(filter)
64 {
65 if (filter.text in this.keywordByFilter)
66 return;
67
68 // Look for a suitable keyword
69 let keyword = this.findKeyword(filter);
70 let oldEntry = this.filterByKeyword[keyword];
71 if (typeof oldEntry == "undefined")
72 this.filterByKeyword[keyword] = filter;
73 else if (oldEntry.length == 1)
74 this.filterByKeyword[keyword] = [oldEntry, filter];
75 else
76 oldEntry.push(filter);
77 this.keywordByFilter[filter.text] = keyword;
78 },
79
80 /**
81 * Removes a filter from the matcher
82 * @param {RegExpFilter} filter
83 */
84 remove(filter)
85 {
86 if (!(filter.text in this.keywordByFilter))
87 return;
88
89 let keyword = this.keywordByFilter[filter.text];
90 let list = this.filterByKeyword[keyword];
91 if (list.length <= 1)
92 delete this.filterByKeyword[keyword];
93 else
94 {
95 let index = list.indexOf(filter);
96 if (index >= 0)
97 {
98 list.splice(index, 1);
99 if (list.length == 1)
100 this.filterByKeyword[keyword] = list[0];
101 }
102 }
103
104 delete this.keywordByFilter[filter.text];
105 },
106
107 /**
108 * Chooses a keyword to be associated with the filter
109 * @param {Filter} filter
110 * @return {string} keyword or an empty string if no keyword could be found
111 */
112 findKeyword(filter)
113 {
114 let result = "";
115 let {text} = filter;
116 if (Filter.regexpRegExp.test(text))
117 return result;
118
119 // Remove options
120 let match = Filter.optionsRegExp.exec(text);
121 if (match)
122 text = match.input.substr(0, match.index);
123
124 // Remove whitelist marker
125 if (text.substr(0, 2) == "@@")
126 text = text.substr(2);
127
128 let candidates = text.toLowerCase().match(
129 /[^a-z0-9%*][a-z0-9%]{3,}(?=[^a-z0-9%*])/g
130 );
131 if (!candidates)
132 return result;
133
134 let hash = this.filterByKeyword;
135 let resultCount = 0xFFFFFF;
136 let resultLength = 0;
137 for (let i = 0, l = candidates.length; i < l; i++)
138 {
139 let candidate = candidates[i].substr(1);
140 let count = (candidate in hash ? hash[candidate].length : 0);
141 if (count < resultCount ||
142 (count == resultCount && candidate.length > resultLength))
143 {
144 result = candidate;
145 resultCount = count;
146 resultLength = candidate.length;
147 }
148 }
149 return result;
150 },
151
152 /**
153 * Checks whether a particular filter is being matched against.
154 * @param {RegExpFilter} filter
155 * @return {boolean}
156 */
157 hasFilter(filter)
158 {
159 return (filter.text in this.keywordByFilter);
160 },
161
162 /**
163 * Returns the keyword used for a filter, null for unknown filters.
164 * @param {RegExpFilter} filter
165 * @return {string}
166 */
167 getKeywordForFilter(filter)
168 {
169 if (filter.text in this.keywordByFilter)
170 return this.keywordByFilter[filter.text];
171 return null;
172 },
173
174 /**
175 * Checks whether the entries for a particular keyword match a URL
176 * @param {string} keyword
177 * @param {string} location
178 * @param {number} typeMask
179 * @param {string} docDomain
180 * @param {boolean} thirdParty
181 * @param {string} sitekey
182 * @param {boolean} specificOnly
183 * @return {?Filter}
184 */
185 _checkEntryMatch(keyword, location, typeMask, docDomain, thirdParty, sitekey,
186 specificOnly)
187 {
188 let list = this.filterByKeyword[keyword];
189 for (let i = 0; i < list.length; i++)
190 {
191 let filter = list[i];
192
193 if (specificOnly && filter.isGeneric() &&
194 !(filter instanceof WhitelistFilter))
195 continue;
196
197 if (filter.matches(location, typeMask, docDomain, thirdParty, sitekey))
198 return filter;
199 }
200 return null;
201 },
202
203 /**
204 * Tests whether the URL matches any of the known filters
205 * @param {string} location
206 * URL to be tested
207 * @param {number} typeMask
208 * bitmask of content / request types to match
209 * @param {string} docDomain
210 * domain name of the document that loads the URL
211 * @param {boolean} thirdParty
212 * should be true if the URL is a third-party request
213 * @param {string} sitekey
214 * public key provided by the document
215 * @param {boolean} specificOnly
216 * should be true if generic matches should be ignored
217 * @return {?RegExpFilter}
218 * matching filter or null
219 */
220 matchesAny(location, typeMask, docDomain, thirdParty, sitekey, specificOnly)
221 {
222 let candidates = location.toLowerCase().match(/[a-z0-9%]{3,}/g);
223 if (candidates === null)
224 candidates = [];
225 candidates.push("");
226 for (let i = 0, l = candidates.length; i < l; i++)
227 {
228 let substr = candidates[i];
229 if (substr in this.filterByKeyword)
230 {
231 let result = this._checkEntryMatch(substr, location, typeMask,
232 docDomain, thirdParty, sitekey,
233 specificOnly);
234 if (result)
235 return result;
236 }
237 }
238
239 return null;
240 }
241 };
242
243 /**
244 * Combines a matcher for blocking and exception rules, automatically sorts
245 * rules into two Matcher instances.
246 * @constructor
247 * @augments Matcher
248 */
249 function CombinedMatcher()
250 {
251 this.blacklist = new Matcher();
252 this.whitelist = new Matcher();
253 this.resultCache = Object.create(null);
254 }
255 exports.CombinedMatcher = CombinedMatcher;
256
257 /**
258 * Maximal number of matching cache entries to be kept
259 * @type {number}
260 */
261 CombinedMatcher.maxCacheEntries = 1000;
262
263 CombinedMatcher.prototype =
264 {
265 /**
266 * Matcher for blocking rules.
267 * @type {Matcher}
268 */
269 blacklist: null,
270
271 /**
272 * Matcher for exception rules.
273 * @type {Matcher}
274 */
275 whitelist: null,
276
277 /**
278 * Lookup table of previous matchesAny results
279 * @type {Object}
280 */
281 resultCache: null,
282
283 /**
284 * Number of entries in resultCache
285 * @type {number}
286 */
287 cacheEntries: 0,
288
289 /**
290 * @see Matcher#clear
291 */
292 clear()
293 {
294 this.blacklist.clear();
295 this.whitelist.clear();
296 this.resultCache = Object.create(null);
297 this.cacheEntries = 0;
298 },
299
300 /**
301 * @see Matcher#add
302 * @param {Filter} filter
303 */
304 add(filter)
305 {
306 if (filter instanceof WhitelistFilter)
307 this.whitelist.add(filter);
308 else
309 this.blacklist.add(filter);
310
311 if (this.cacheEntries > 0)
312 {
313 this.resultCache = Object.create(null);
314 this.cacheEntries = 0;
315 }
316 },
317
318 /**
319 * @see Matcher#remove
320 * @param {Filter} filter
321 */
322 remove(filter)
323 {
324 if (filter instanceof WhitelistFilter)
325 this.whitelist.remove(filter);
326 else
327 this.blacklist.remove(filter);
328
329 if (this.cacheEntries > 0)
330 {
331 this.resultCache = Object.create(null);
332 this.cacheEntries = 0;
333 }
334 },
335
336 /**
337 * @see Matcher#findKeyword
338 * @param {Filter} filter
339 * @return {string} keyword
340 */
341 findKeyword(filter)
342 {
343 if (filter instanceof WhitelistFilter)
344 return this.whitelist.findKeyword(filter);
345 return this.blacklist.findKeyword(filter);
346 },
347
348 /**
349 * @see Matcher#hasFilter
350 * @param {Filter} filter
351 * @return {boolean}
352 */
353 hasFilter(filter)
354 {
355 if (filter instanceof WhitelistFilter)
356 return this.whitelist.hasFilter(filter);
357 return this.blacklist.hasFilter(filter);
358 },
359
360 /**
361 * @see Matcher#getKeywordForFilter
362 * @param {Filter} filter
363 * @return {string} keyword
364 */
365 getKeywordForFilter(filter)
366 {
367 if (filter instanceof WhitelistFilter)
368 return this.whitelist.getKeywordForFilter(filter);
369 return this.blacklist.getKeywordForFilter(filter);
370 },
371
372 /**
373 * Checks whether a particular filter is slow
374 * @param {RegExpFilter} filter
375 * @return {boolean}
376 */
377 isSlowFilter(filter)
378 {
379 let matcher = (
380 filter instanceof WhitelistFilter ? this.whitelist : this.blacklist
381 );
382 if (matcher.hasFilter(filter))
383 return !matcher.getKeywordForFilter(filter);
384 return !matcher.findKeyword(filter);
385 },
386
387 /**
388 * Optimized filter matching testing both whitelist and blacklist matchers
389 * simultaneously. For parameters see Matcher.matchesAny().
390 * @see Matcher#matchesAny
391 * @inheritdoc
392 */
393 matchesAnyInternal(location, typeMask, docDomain, thirdParty, sitekey,
394 specificOnly)
395 {
396 let candidates = location.toLowerCase().match(/[a-z0-9%]{3,}/g);
397 if (candidates === null)
398 candidates = [];
399 candidates.push("");
400
401 let blacklistHit = null;
402 for (let i = 0, l = candidates.length; i < l; i++)
403 {
404 let substr = candidates[i];
405 if (substr in this.whitelist.filterByKeyword)
406 {
407 let result = this.whitelist._checkEntryMatch(
408 substr, location, typeMask, docDomain, thirdParty, sitekey
409 );
410 if (result)
411 return result;
412 }
413 if (substr in this.blacklist.filterByKeyword && blacklistHit === null)
414 {
415 blacklistHit = this.blacklist._checkEntryMatch(
416 substr, location, typeMask, docDomain, thirdParty, sitekey,
417 specificOnly
418 );
419 }
420 }
421 return blacklistHit;
422 },
423
424 /**
425 * @see Matcher#matchesAny
426 * @inheritdoc
427 */
428 matchesAny(location, typeMask, docDomain, thirdParty, sitekey, specificOnly)
429 {
430 let key = location + " " + typeMask + " " + docDomain + " " + thirdParty +
431 " " + sitekey + " " + specificOnly;
432 if (key in this.resultCache)
433 return this.resultCache[key];
434
435 let result = this.matchesAnyInternal(location, typeMask, docDomain,
436 thirdParty, sitekey, specificOnly);
437
438 if (this.cacheEntries >= CombinedMatcher.maxCacheEntries)
439 {
440 this.resultCache = Object.create(null);
441 this.cacheEntries = 0;
442 }
443
444 this.resultCache[key] = result;
445 this.cacheEntries++;
446
447 return result;
448 }
449 };
450
451 /**
452 * Shared CombinedMatcher instance that should usually be used.
453 * @type {CombinedMatcher}
454 */
455 exports.defaultMatcher = new CombinedMatcher();
OLDNEW
« compiled/filter/Matcher.cpp ('K') | « compiled/library.js ('k') | test/matcher.js » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld