Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Side by Side Diff: lib/matcher.js

Issue 29335650: Issue 2595 - Use the core code from adblockpluscore (Closed)
Patch Set: Created Feb. 4, 2016, 6:35 p.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View unified diff | Download patch
« no previous file with comments | « lib/filterStorage.js ('k') | lib/notification.js » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 /*
2 * This file is part of Adblock Plus <https://adblockplus.org/>,
3 * Copyright (C) 2006-2016 Eyeo GmbH
4 *
5 * Adblock Plus is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 3 as
7 * published by the Free Software Foundation.
8 *
9 * Adblock Plus is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.
16 */
17
18 /**
19 * @fileOverview Matcher class implementing matching addresses against a list of filters.
20 */
21
22 let {Filter, RegExpFilter, WhitelistFilter} = require("filterClasses");
23
24 /**
25 * Blacklist/whitelist filter matching
26 * @constructor
27 */
28 function Matcher()
29 {
30 this.clear();
31 }
32 exports.Matcher = Matcher;
33
34 Matcher.prototype = {
35 /**
36 * Lookup table for filters by their associated keyword
37 * @type Object
38 */
39 filterByKeyword: null,
40
41 /**
42 * Lookup table for keywords by the filter text
43 * @type Object
44 */
45 keywordByFilter: null,
46
47 /**
48 * Removes all known filters
49 */
50 clear: function()
51 {
52 this.filterByKeyword = Object.create(null);
53 this.keywordByFilter = Object.create(null);
54 },
55
56 /**
57 * Adds a filter to the matcher
58 * @param {RegExpFilter} filter
59 */
60 add: function(filter)
61 {
62 if (filter.text in this.keywordByFilter)
63 return;
64
65 // Look for a suitable keyword
66 let keyword = this.findKeyword(filter);
67 let oldEntry = this.filterByKeyword[keyword];
68 if (typeof oldEntry == "undefined")
69 this.filterByKeyword[keyword] = filter;
70 else if (oldEntry.length == 1)
71 this.filterByKeyword[keyword] = [oldEntry, filter];
72 else
73 oldEntry.push(filter);
74 this.keywordByFilter[filter.text] = keyword;
75 },
76
77 /**
78 * Removes a filter from the matcher
79 * @param {RegExpFilter} filter
80 */
81 remove: function(filter)
82 {
83 if (!(filter.text in this.keywordByFilter))
84 return;
85
86 let keyword = this.keywordByFilter[filter.text];
87 let list = this.filterByKeyword[keyword];
88 if (list.length <= 1)
89 delete this.filterByKeyword[keyword];
90 else
91 {
92 let index = list.indexOf(filter);
93 if (index >= 0)
94 {
95 list.splice(index, 1);
96 if (list.length == 1)
97 this.filterByKeyword[keyword] = list[0];
98 }
99 }
100
101 delete this.keywordByFilter[filter.text];
102 },
103
104 /**
105 * Chooses a keyword to be associated with the filter
106 * @param {String} text text representation of the filter
107 * @return {String} keyword (might be empty string)
108 */
109 findKeyword: function(filter)
110 {
111 let result = "";
112 let text = filter.text;
113 if (Filter.regexpRegExp.test(text))
114 return result;
115
116 // Remove options
117 let match = Filter.optionsRegExp.exec(text);
118 if (match)
119 text = match.input.substr(0, match.index);
120
121 // Remove whitelist marker
122 if (text.substr(0, 2) == "@@")
123 text = text.substr(2);
124
125 let candidates = text.toLowerCase().match(/[^a-z0-9%*][a-z0-9%]{3,}(?=[^a-z0 -9%*])/g);
126 if (!candidates)
127 return result;
128
129 let hash = this.filterByKeyword;
130 let resultCount = 0xFFFFFF;
131 let resultLength = 0;
132 for (let i = 0, l = candidates.length; i < l; i++)
133 {
134 let candidate = candidates[i].substr(1);
135 let count = (candidate in hash ? hash[candidate].length : 0);
136 if (count < resultCount || (count == resultCount && candidate.length > res ultLength))
137 {
138 result = candidate;
139 resultCount = count;
140 resultLength = candidate.length;
141 }
142 }
143 return result;
144 },
145
146 /**
147 * Checks whether a particular filter is being matched against.
148 */
149 hasFilter: function(/**RegExpFilter*/ filter) /**Boolean*/
150 {
151 return (filter.text in this.keywordByFilter);
152 },
153
154 /**
155 * Returns the keyword used for a filter, null for unknown filters.
156 */
157 getKeywordForFilter: function(/**RegExpFilter*/ filter) /**String*/
158 {
159 if (filter.text in this.keywordByFilter)
160 return this.keywordByFilter[filter.text];
161 else
162 return null;
163 },
164
165 /**
166 * Checks whether the entries for a particular keyword match a URL
167 */
168 _checkEntryMatch: function(keyword, location, typeMask, docDomain, thirdParty, sitekey, specificOnly)
169 {
170 let list = this.filterByKeyword[keyword];
171 for (let i = 0; i < list.length; i++)
172 {
173 let filter = list[i];
174
175 if (specificOnly && filter.isGeneric() &&
176 !(filter instanceof WhitelistFilter))
177 continue;
178
179 if (filter.matches(location, typeMask, docDomain, thirdParty, sitekey))
180 return filter;
181 }
182 return null;
183 },
184
185 /**
186 * Tests whether the URL matches any of the known filters
187 * @param {String} location URL to be tested
188 * @param {String} typeMask bitmask of content / request types to match
189 * @param {String} docDomain domain name of the document that loads the URL
190 * @param {Boolean} thirdParty should be true if the URL is a third-party requ est
191 * @param {String} sitekey public key provided by the document
192 * @param {Boolean} specificOnly should be true if generic matches should be i gnored
193 * @return {RegExpFilter} matching filter or null
194 */
195 matchesAny: function(location, typeMask, docDomain, thirdParty, sitekey, speci ficOnly)
196 {
197 let candidates = location.toLowerCase().match(/[a-z0-9%]{3,}/g);
198 if (candidates === null)
199 candidates = [];
200 candidates.push("");
201 for (let i = 0, l = candidates.length; i < l; i++)
202 {
203 let substr = candidates[i];
204 if (substr in this.filterByKeyword)
205 {
206 let result = this._checkEntryMatch(substr, location, typeMask, docDomain , thirdParty, sitekey, specificOnly);
207 if (result)
208 return result;
209 }
210 }
211
212 return null;
213 }
214 };
215
216 /**
217 * Combines a matcher for blocking and exception rules, automatically sorts
218 * rules into two Matcher instances.
219 * @constructor
220 */
221 function CombinedMatcher()
222 {
223 this.blacklist = new Matcher();
224 this.whitelist = new Matcher();
225 this.resultCache = Object.create(null);
226 }
227 exports.CombinedMatcher = CombinedMatcher;
228
229 /**
230 * Maximal number of matching cache entries to be kept
231 * @type Number
232 */
233 CombinedMatcher.maxCacheEntries = 1000;
234
235 CombinedMatcher.prototype =
236 {
237 /**
238 * Matcher for blocking rules.
239 * @type Matcher
240 */
241 blacklist: null,
242
243 /**
244 * Matcher for exception rules.
245 * @type Matcher
246 */
247 whitelist: null,
248
249 /**
250 * Lookup table of previous matchesAny results
251 * @type Object
252 */
253 resultCache: null,
254
255 /**
256 * Number of entries in resultCache
257 * @type Number
258 */
259 cacheEntries: 0,
260
261 /**
262 * @see Matcher#clear
263 */
264 clear: function()
265 {
266 this.blacklist.clear();
267 this.whitelist.clear();
268 this.resultCache = Object.create(null);
269 this.cacheEntries = 0;
270 },
271
272 /**
273 * @see Matcher#add
274 */
275 add: function(filter)
276 {
277 if (filter instanceof WhitelistFilter)
278 this.whitelist.add(filter);
279 else
280 this.blacklist.add(filter);
281
282 if (this.cacheEntries > 0)
283 {
284 this.resultCache = Object.create(null);
285 this.cacheEntries = 0;
286 }
287 },
288
289 /**
290 * @see Matcher#remove
291 */
292 remove: function(filter)
293 {
294 if (filter instanceof WhitelistFilter)
295 this.whitelist.remove(filter);
296 else
297 this.blacklist.remove(filter);
298
299 if (this.cacheEntries > 0)
300 {
301 this.resultCache = Object.create(null);
302 this.cacheEntries = 0;
303 }
304 },
305
306 /**
307 * @see Matcher#findKeyword
308 */
309 findKeyword: function(filter)
310 {
311 if (filter instanceof WhitelistFilter)
312 return this.whitelist.findKeyword(filter);
313 else
314 return this.blacklist.findKeyword(filter);
315 },
316
317 /**
318 * @see Matcher#hasFilter
319 */
320 hasFilter: function(filter)
321 {
322 if (filter instanceof WhitelistFilter)
323 return this.whitelist.hasFilter(filter);
324 else
325 return this.blacklist.hasFilter(filter);
326 },
327
328 /**
329 * @see Matcher#getKeywordForFilter
330 */
331 getKeywordForFilter: function(filter)
332 {
333 if (filter instanceof WhitelistFilter)
334 return this.whitelist.getKeywordForFilter(filter);
335 else
336 return this.blacklist.getKeywordForFilter(filter);
337 },
338
339 /**
340 * Checks whether a particular filter is slow
341 */
342 isSlowFilter: function(/**RegExpFilter*/ filter) /**Boolean*/
343 {
344 let matcher = (filter instanceof WhitelistFilter ? this.whitelist : this.bla cklist);
345 if (matcher.hasFilter(filter))
346 return !matcher.getKeywordForFilter(filter);
347 else
348 return !matcher.findKeyword(filter);
349 },
350
351 /**
352 * Optimized filter matching testing both whitelist and blacklist matchers
353 * simultaneously. For parameters see Matcher.matchesAny().
354 * @see Matcher#matchesAny
355 */
356 matchesAnyInternal: function(location, typeMask, docDomain, thirdParty, siteke y, specificOnly)
357 {
358 let candidates = location.toLowerCase().match(/[a-z0-9%]{3,}/g);
359 if (candidates === null)
360 candidates = [];
361 candidates.push("");
362
363 let blacklistHit = null;
364 for (let i = 0, l = candidates.length; i < l; i++)
365 {
366 let substr = candidates[i];
367 if (substr in this.whitelist.filterByKeyword)
368 {
369 let result = this.whitelist._checkEntryMatch(substr, location, typeMask, docDomain, thirdParty, sitekey);
370 if (result)
371 return result;
372 }
373 if (substr in this.blacklist.filterByKeyword && blacklistHit === null)
374 blacklistHit = this.blacklist._checkEntryMatch(substr, location, typeMas k, docDomain, thirdParty, sitekey, specificOnly);
375 }
376 return blacklistHit;
377 },
378
379 /**
380 * @see Matcher#matchesAny
381 */
382 matchesAny: function(location, typeMask, docDomain, thirdParty, sitekey, speci ficOnly)
383 {
384 let key = location + " " + typeMask + " " + docDomain + " " + thirdParty + " " + sitekey + " " + specificOnly;
385 if (key in this.resultCache)
386 return this.resultCache[key];
387
388 let result = this.matchesAnyInternal(location, typeMask, docDomain, thirdPar ty, sitekey, specificOnly);
389
390 if (this.cacheEntries >= CombinedMatcher.maxCacheEntries)
391 {
392 this.resultCache = Object.create(null);
393 this.cacheEntries = 0;
394 }
395
396 this.resultCache[key] = result;
397 this.cacheEntries++;
398
399 return result;
400 }
401 }
402
403 /**
404 * Shared CombinedMatcher instance that should usually be used.
405 * @type CombinedMatcher
406 */
407 let defaultMatcher = exports.defaultMatcher = new CombinedMatcher();
OLDNEW
« no previous file with comments | « lib/filterStorage.js ('k') | lib/notification.js » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld