| OLD | NEW |
| (Empty) |
| 1 /* | |
| 2 * This file is part of Adblock Plus <https://adblockplus.org/>, | |
| 3 * Copyright (C) 2006-2016 Eyeo GmbH | |
| 4 * | |
| 5 * Adblock Plus is free software: you can redistribute it and/or modify | |
| 6 * it under the terms of the GNU General Public License version 3 as | |
| 7 * published by the Free Software Foundation. | |
| 8 * | |
| 9 * Adblock Plus is distributed in the hope that it will be useful, | |
| 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
| 12 * GNU General Public License for more details. | |
| 13 * | |
| 14 * You should have received a copy of the GNU General Public License | |
| 15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. | |
| 16 */ | |
| 17 | |
| 18 /** | |
| 19 * @fileOverview Matcher class implementing matching addresses against a list of
filters. | |
| 20 */ | |
| 21 | |
| 22 let {Filter, RegExpFilter, WhitelistFilter} = require("filterClasses"); | |
| 23 | |
| 24 /** | |
| 25 * Blacklist/whitelist filter matching | |
| 26 * @constructor | |
| 27 */ | |
| 28 function Matcher() | |
| 29 { | |
| 30 this.clear(); | |
| 31 } | |
| 32 exports.Matcher = Matcher; | |
| 33 | |
| 34 Matcher.prototype = { | |
| 35 /** | |
| 36 * Lookup table for filters by their associated keyword | |
| 37 * @type Object | |
| 38 */ | |
| 39 filterByKeyword: null, | |
| 40 | |
| 41 /** | |
| 42 * Lookup table for keywords by the filter text | |
| 43 * @type Object | |
| 44 */ | |
| 45 keywordByFilter: null, | |
| 46 | |
| 47 /** | |
| 48 * Removes all known filters | |
| 49 */ | |
| 50 clear: function() | |
| 51 { | |
| 52 this.filterByKeyword = Object.create(null); | |
| 53 this.keywordByFilter = Object.create(null); | |
| 54 }, | |
| 55 | |
| 56 /** | |
| 57 * Adds a filter to the matcher | |
| 58 * @param {RegExpFilter} filter | |
| 59 */ | |
| 60 add: function(filter) | |
| 61 { | |
| 62 if (filter.text in this.keywordByFilter) | |
| 63 return; | |
| 64 | |
| 65 // Look for a suitable keyword | |
| 66 let keyword = this.findKeyword(filter); | |
| 67 let oldEntry = this.filterByKeyword[keyword]; | |
| 68 if (typeof oldEntry == "undefined") | |
| 69 this.filterByKeyword[keyword] = filter; | |
| 70 else if (oldEntry.length == 1) | |
| 71 this.filterByKeyword[keyword] = [oldEntry, filter]; | |
| 72 else | |
| 73 oldEntry.push(filter); | |
| 74 this.keywordByFilter[filter.text] = keyword; | |
| 75 }, | |
| 76 | |
| 77 /** | |
| 78 * Removes a filter from the matcher | |
| 79 * @param {RegExpFilter} filter | |
| 80 */ | |
| 81 remove: function(filter) | |
| 82 { | |
| 83 if (!(filter.text in this.keywordByFilter)) | |
| 84 return; | |
| 85 | |
| 86 let keyword = this.keywordByFilter[filter.text]; | |
| 87 let list = this.filterByKeyword[keyword]; | |
| 88 if (list.length <= 1) | |
| 89 delete this.filterByKeyword[keyword]; | |
| 90 else | |
| 91 { | |
| 92 let index = list.indexOf(filter); | |
| 93 if (index >= 0) | |
| 94 { | |
| 95 list.splice(index, 1); | |
| 96 if (list.length == 1) | |
| 97 this.filterByKeyword[keyword] = list[0]; | |
| 98 } | |
| 99 } | |
| 100 | |
| 101 delete this.keywordByFilter[filter.text]; | |
| 102 }, | |
| 103 | |
| 104 /** | |
| 105 * Chooses a keyword to be associated with the filter | |
| 106 * @param {String} text text representation of the filter | |
| 107 * @return {String} keyword (might be empty string) | |
| 108 */ | |
| 109 findKeyword: function(filter) | |
| 110 { | |
| 111 let result = ""; | |
| 112 let text = filter.text; | |
| 113 if (Filter.regexpRegExp.test(text)) | |
| 114 return result; | |
| 115 | |
| 116 // Remove options | |
| 117 let match = Filter.optionsRegExp.exec(text); | |
| 118 if (match) | |
| 119 text = match.input.substr(0, match.index); | |
| 120 | |
| 121 // Remove whitelist marker | |
| 122 if (text.substr(0, 2) == "@@") | |
| 123 text = text.substr(2); | |
| 124 | |
| 125 let candidates = text.toLowerCase().match(/[^a-z0-9%*][a-z0-9%]{3,}(?=[^a-z0
-9%*])/g); | |
| 126 if (!candidates) | |
| 127 return result; | |
| 128 | |
| 129 let hash = this.filterByKeyword; | |
| 130 let resultCount = 0xFFFFFF; | |
| 131 let resultLength = 0; | |
| 132 for (let i = 0, l = candidates.length; i < l; i++) | |
| 133 { | |
| 134 let candidate = candidates[i].substr(1); | |
| 135 let count = (candidate in hash ? hash[candidate].length : 0); | |
| 136 if (count < resultCount || (count == resultCount && candidate.length > res
ultLength)) | |
| 137 { | |
| 138 result = candidate; | |
| 139 resultCount = count; | |
| 140 resultLength = candidate.length; | |
| 141 } | |
| 142 } | |
| 143 return result; | |
| 144 }, | |
| 145 | |
| 146 /** | |
| 147 * Checks whether a particular filter is being matched against. | |
| 148 */ | |
| 149 hasFilter: function(/**RegExpFilter*/ filter) /**Boolean*/ | |
| 150 { | |
| 151 return (filter.text in this.keywordByFilter); | |
| 152 }, | |
| 153 | |
| 154 /** | |
| 155 * Returns the keyword used for a filter, null for unknown filters. | |
| 156 */ | |
| 157 getKeywordForFilter: function(/**RegExpFilter*/ filter) /**String*/ | |
| 158 { | |
| 159 if (filter.text in this.keywordByFilter) | |
| 160 return this.keywordByFilter[filter.text]; | |
| 161 else | |
| 162 return null; | |
| 163 }, | |
| 164 | |
| 165 /** | |
| 166 * Checks whether the entries for a particular keyword match a URL | |
| 167 */ | |
| 168 _checkEntryMatch: function(keyword, location, typeMask, docDomain, thirdParty,
sitekey, specificOnly) | |
| 169 { | |
| 170 let list = this.filterByKeyword[keyword]; | |
| 171 for (let i = 0; i < list.length; i++) | |
| 172 { | |
| 173 let filter = list[i]; | |
| 174 | |
| 175 if (specificOnly && filter.isGeneric() && | |
| 176 !(filter instanceof WhitelistFilter)) | |
| 177 continue; | |
| 178 | |
| 179 if (filter.matches(location, typeMask, docDomain, thirdParty, sitekey)) | |
| 180 return filter; | |
| 181 } | |
| 182 return null; | |
| 183 }, | |
| 184 | |
| 185 /** | |
| 186 * Tests whether the URL matches any of the known filters | |
| 187 * @param {String} location URL to be tested | |
| 188 * @param {String} typeMask bitmask of content / request types to match | |
| 189 * @param {String} docDomain domain name of the document that loads the URL | |
| 190 * @param {Boolean} thirdParty should be true if the URL is a third-party requ
est | |
| 191 * @param {String} sitekey public key provided by the document | |
| 192 * @param {Boolean} specificOnly should be true if generic matches should be i
gnored | |
| 193 * @return {RegExpFilter} matching filter or null | |
| 194 */ | |
| 195 matchesAny: function(location, typeMask, docDomain, thirdParty, sitekey, speci
ficOnly) | |
| 196 { | |
| 197 let candidates = location.toLowerCase().match(/[a-z0-9%]{3,}/g); | |
| 198 if (candidates === null) | |
| 199 candidates = []; | |
| 200 candidates.push(""); | |
| 201 for (let i = 0, l = candidates.length; i < l; i++) | |
| 202 { | |
| 203 let substr = candidates[i]; | |
| 204 if (substr in this.filterByKeyword) | |
| 205 { | |
| 206 let result = this._checkEntryMatch(substr, location, typeMask, docDomain
, thirdParty, sitekey, specificOnly); | |
| 207 if (result) | |
| 208 return result; | |
| 209 } | |
| 210 } | |
| 211 | |
| 212 return null; | |
| 213 } | |
| 214 }; | |
| 215 | |
| 216 /** | |
| 217 * Combines a matcher for blocking and exception rules, automatically sorts | |
| 218 * rules into two Matcher instances. | |
| 219 * @constructor | |
| 220 */ | |
| 221 function CombinedMatcher() | |
| 222 { | |
| 223 this.blacklist = new Matcher(); | |
| 224 this.whitelist = new Matcher(); | |
| 225 this.resultCache = Object.create(null); | |
| 226 } | |
| 227 exports.CombinedMatcher = CombinedMatcher; | |
| 228 | |
| 229 /** | |
| 230 * Maximal number of matching cache entries to be kept | |
| 231 * @type Number | |
| 232 */ | |
| 233 CombinedMatcher.maxCacheEntries = 1000; | |
| 234 | |
| 235 CombinedMatcher.prototype = | |
| 236 { | |
| 237 /** | |
| 238 * Matcher for blocking rules. | |
| 239 * @type Matcher | |
| 240 */ | |
| 241 blacklist: null, | |
| 242 | |
| 243 /** | |
| 244 * Matcher for exception rules. | |
| 245 * @type Matcher | |
| 246 */ | |
| 247 whitelist: null, | |
| 248 | |
| 249 /** | |
| 250 * Lookup table of previous matchesAny results | |
| 251 * @type Object | |
| 252 */ | |
| 253 resultCache: null, | |
| 254 | |
| 255 /** | |
| 256 * Number of entries in resultCache | |
| 257 * @type Number | |
| 258 */ | |
| 259 cacheEntries: 0, | |
| 260 | |
| 261 /** | |
| 262 * @see Matcher#clear | |
| 263 */ | |
| 264 clear: function() | |
| 265 { | |
| 266 this.blacklist.clear(); | |
| 267 this.whitelist.clear(); | |
| 268 this.resultCache = Object.create(null); | |
| 269 this.cacheEntries = 0; | |
| 270 }, | |
| 271 | |
| 272 /** | |
| 273 * @see Matcher#add | |
| 274 */ | |
| 275 add: function(filter) | |
| 276 { | |
| 277 if (filter instanceof WhitelistFilter) | |
| 278 this.whitelist.add(filter); | |
| 279 else | |
| 280 this.blacklist.add(filter); | |
| 281 | |
| 282 if (this.cacheEntries > 0) | |
| 283 { | |
| 284 this.resultCache = Object.create(null); | |
| 285 this.cacheEntries = 0; | |
| 286 } | |
| 287 }, | |
| 288 | |
| 289 /** | |
| 290 * @see Matcher#remove | |
| 291 */ | |
| 292 remove: function(filter) | |
| 293 { | |
| 294 if (filter instanceof WhitelistFilter) | |
| 295 this.whitelist.remove(filter); | |
| 296 else | |
| 297 this.blacklist.remove(filter); | |
| 298 | |
| 299 if (this.cacheEntries > 0) | |
| 300 { | |
| 301 this.resultCache = Object.create(null); | |
| 302 this.cacheEntries = 0; | |
| 303 } | |
| 304 }, | |
| 305 | |
| 306 /** | |
| 307 * @see Matcher#findKeyword | |
| 308 */ | |
| 309 findKeyword: function(filter) | |
| 310 { | |
| 311 if (filter instanceof WhitelistFilter) | |
| 312 return this.whitelist.findKeyword(filter); | |
| 313 else | |
| 314 return this.blacklist.findKeyword(filter); | |
| 315 }, | |
| 316 | |
| 317 /** | |
| 318 * @see Matcher#hasFilter | |
| 319 */ | |
| 320 hasFilter: function(filter) | |
| 321 { | |
| 322 if (filter instanceof WhitelistFilter) | |
| 323 return this.whitelist.hasFilter(filter); | |
| 324 else | |
| 325 return this.blacklist.hasFilter(filter); | |
| 326 }, | |
| 327 | |
| 328 /** | |
| 329 * @see Matcher#getKeywordForFilter | |
| 330 */ | |
| 331 getKeywordForFilter: function(filter) | |
| 332 { | |
| 333 if (filter instanceof WhitelistFilter) | |
| 334 return this.whitelist.getKeywordForFilter(filter); | |
| 335 else | |
| 336 return this.blacklist.getKeywordForFilter(filter); | |
| 337 }, | |
| 338 | |
| 339 /** | |
| 340 * Checks whether a particular filter is slow | |
| 341 */ | |
| 342 isSlowFilter: function(/**RegExpFilter*/ filter) /**Boolean*/ | |
| 343 { | |
| 344 let matcher = (filter instanceof WhitelistFilter ? this.whitelist : this.bla
cklist); | |
| 345 if (matcher.hasFilter(filter)) | |
| 346 return !matcher.getKeywordForFilter(filter); | |
| 347 else | |
| 348 return !matcher.findKeyword(filter); | |
| 349 }, | |
| 350 | |
| 351 /** | |
| 352 * Optimized filter matching testing both whitelist and blacklist matchers | |
| 353 * simultaneously. For parameters see Matcher.matchesAny(). | |
| 354 * @see Matcher#matchesAny | |
| 355 */ | |
| 356 matchesAnyInternal: function(location, typeMask, docDomain, thirdParty, siteke
y, specificOnly) | |
| 357 { | |
| 358 let candidates = location.toLowerCase().match(/[a-z0-9%]{3,}/g); | |
| 359 if (candidates === null) | |
| 360 candidates = []; | |
| 361 candidates.push(""); | |
| 362 | |
| 363 let blacklistHit = null; | |
| 364 for (let i = 0, l = candidates.length; i < l; i++) | |
| 365 { | |
| 366 let substr = candidates[i]; | |
| 367 if (substr in this.whitelist.filterByKeyword) | |
| 368 { | |
| 369 let result = this.whitelist._checkEntryMatch(substr, location, typeMask,
docDomain, thirdParty, sitekey); | |
| 370 if (result) | |
| 371 return result; | |
| 372 } | |
| 373 if (substr in this.blacklist.filterByKeyword && blacklistHit === null) | |
| 374 blacklistHit = this.blacklist._checkEntryMatch(substr, location, typeMas
k, docDomain, thirdParty, sitekey, specificOnly); | |
| 375 } | |
| 376 return blacklistHit; | |
| 377 }, | |
| 378 | |
| 379 /** | |
| 380 * @see Matcher#matchesAny | |
| 381 */ | |
| 382 matchesAny: function(location, typeMask, docDomain, thirdParty, sitekey, speci
ficOnly) | |
| 383 { | |
| 384 let key = location + " " + typeMask + " " + docDomain + " " + thirdParty + "
" + sitekey + " " + specificOnly; | |
| 385 if (key in this.resultCache) | |
| 386 return this.resultCache[key]; | |
| 387 | |
| 388 let result = this.matchesAnyInternal(location, typeMask, docDomain, thirdPar
ty, sitekey, specificOnly); | |
| 389 | |
| 390 if (this.cacheEntries >= CombinedMatcher.maxCacheEntries) | |
| 391 { | |
| 392 this.resultCache = Object.create(null); | |
| 393 this.cacheEntries = 0; | |
| 394 } | |
| 395 | |
| 396 this.resultCache[key] = result; | |
| 397 this.cacheEntries++; | |
| 398 | |
| 399 return result; | |
| 400 } | |
| 401 } | |
| 402 | |
| 403 /** | |
| 404 * Shared CombinedMatcher instance that should usually be used. | |
| 405 * @type CombinedMatcher | |
| 406 */ | |
| 407 let defaultMatcher = exports.defaultMatcher = new CombinedMatcher(); | |
| OLD | NEW |