| OLD | NEW |
| 1 /* | 1 /* |
| 2 * This file is part of Adblock Plus <https://adblockplus.org/>, | 2 * This file is part of Adblock Plus <https://adblockplus.org/>, |
| 3 * Copyright (C) 2006-present eyeo GmbH | 3 * Copyright (C) 2006-present eyeo GmbH |
| 4 * | 4 * |
| 5 * Adblock Plus is free software: you can redistribute it and/or modify | 5 * Adblock Plus is free software: you can redistribute it and/or modify |
| 6 * it under the terms of the GNU General Public License version 3 as | 6 * it under the terms of the GNU General Public License version 3 as |
| 7 * published by the Free Software Foundation. | 7 * published by the Free Software Foundation. |
| 8 * | 8 * |
| 9 * Adblock Plus is distributed in the hope that it will be useful, | 9 * Adblock Plus is distributed in the hope that it will be useful, |
| 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of | 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 12 * GNU General Public License for more details. | 12 * GNU General Public License for more details. |
| 13 * | 13 * |
| 14 * You should have received a copy of the GNU General Public License | 14 * You should have received a copy of the GNU General Public License |
| 15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. | 15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. |
| 16 */ | 16 */ |
| 17 | 17 |
| 18 "use strict"; | 18 "use strict"; |
| 19 | 19 |
| 20 /** | 20 const compiled = require("compiled"); |
| 21 * @fileOverview Matcher class implementing matching addresses against | 21 for (let cls of ["Matcher", "CombinedMatcher"]) |
| 22 * a list of filters. | |
| 23 */ | |
| 24 | |
| 25 const {Filter, WhitelistFilter} = require("filterClasses"); | |
| 26 | |
| 27 /** | |
| 28 * Blacklist/whitelist filter matching | |
| 29 * @constructor | |
| 30 */ | |
| 31 function Matcher() | |
| 32 { | 22 { |
| 33 this.clear(); | 23 exports[cls] = compiled[cls]; |
| 34 } | 24 } |
| 35 exports.Matcher = Matcher; | 25 exports["defaultMatcher"] = exports["CombinedMatcher"].create(); |
| 36 | |
| 37 Matcher.prototype = { | |
| 38 /** | |
| 39 * Lookup table for filters by their associated keyword | |
| 40 * @type {Object} | |
| 41 */ | |
| 42 filterByKeyword: null, | |
| 43 | |
| 44 /** | |
| 45 * Lookup table for keywords by the filter text | |
| 46 * @type {Object} | |
| 47 */ | |
| 48 keywordByFilter: null, | |
| 49 | |
| 50 /** | |
| 51 * Removes all known filters | |
| 52 */ | |
| 53 clear() | |
| 54 { | |
| 55 this.filterByKeyword = Object.create(null); | |
| 56 this.keywordByFilter = Object.create(null); | |
| 57 }, | |
| 58 | |
| 59 /** | |
| 60 * Adds a filter to the matcher | |
| 61 * @param {RegExpFilter} filter | |
| 62 */ | |
| 63 add(filter) | |
| 64 { | |
| 65 if (filter.text in this.keywordByFilter) | |
| 66 return; | |
| 67 | |
| 68 // Look for a suitable keyword | |
| 69 let keyword = this.findKeyword(filter); | |
| 70 let oldEntry = this.filterByKeyword[keyword]; | |
| 71 if (typeof oldEntry == "undefined") | |
| 72 this.filterByKeyword[keyword] = filter; | |
| 73 else if (oldEntry.length == 1) | |
| 74 this.filterByKeyword[keyword] = [oldEntry, filter]; | |
| 75 else | |
| 76 oldEntry.push(filter); | |
| 77 this.keywordByFilter[filter.text] = keyword; | |
| 78 }, | |
| 79 | |
| 80 /** | |
| 81 * Removes a filter from the matcher | |
| 82 * @param {RegExpFilter} filter | |
| 83 */ | |
| 84 remove(filter) | |
| 85 { | |
| 86 if (!(filter.text in this.keywordByFilter)) | |
| 87 return; | |
| 88 | |
| 89 let keyword = this.keywordByFilter[filter.text]; | |
| 90 let list = this.filterByKeyword[keyword]; | |
| 91 if (list.length <= 1) | |
| 92 delete this.filterByKeyword[keyword]; | |
| 93 else | |
| 94 { | |
| 95 let index = list.indexOf(filter); | |
| 96 if (index >= 0) | |
| 97 { | |
| 98 list.splice(index, 1); | |
| 99 if (list.length == 1) | |
| 100 this.filterByKeyword[keyword] = list[0]; | |
| 101 } | |
| 102 } | |
| 103 | |
| 104 delete this.keywordByFilter[filter.text]; | |
| 105 }, | |
| 106 | |
| 107 /** | |
| 108 * Chooses a keyword to be associated with the filter | |
| 109 * @param {Filter} filter | |
| 110 * @return {string} keyword or an empty string if no keyword could be found | |
| 111 */ | |
| 112 findKeyword(filter) | |
| 113 { | |
| 114 let result = ""; | |
| 115 let {text} = filter; | |
| 116 if (Filter.regexpRegExp.test(text)) | |
| 117 return result; | |
| 118 | |
| 119 // Remove options | |
| 120 let match = Filter.optionsRegExp.exec(text); | |
| 121 if (match) | |
| 122 text = match.input.substr(0, match.index); | |
| 123 | |
| 124 // Remove whitelist marker | |
| 125 if (text.substr(0, 2) == "@@") | |
| 126 text = text.substr(2); | |
| 127 | |
| 128 let candidates = text.toLowerCase().match( | |
| 129 /[^a-z0-9%*][a-z0-9%]{3,}(?=[^a-z0-9%*])/g | |
| 130 ); | |
| 131 if (!candidates) | |
| 132 return result; | |
| 133 | |
| 134 let hash = this.filterByKeyword; | |
| 135 let resultCount = 0xFFFFFF; | |
| 136 let resultLength = 0; | |
| 137 for (let i = 0, l = candidates.length; i < l; i++) | |
| 138 { | |
| 139 let candidate = candidates[i].substr(1); | |
| 140 let count = (candidate in hash ? hash[candidate].length : 0); | |
| 141 if (count < resultCount || | |
| 142 (count == resultCount && candidate.length > resultLength)) | |
| 143 { | |
| 144 result = candidate; | |
| 145 resultCount = count; | |
| 146 resultLength = candidate.length; | |
| 147 } | |
| 148 } | |
| 149 return result; | |
| 150 }, | |
| 151 | |
| 152 /** | |
| 153 * Checks whether a particular filter is being matched against. | |
| 154 * @param {RegExpFilter} filter | |
| 155 * @return {boolean} | |
| 156 */ | |
| 157 hasFilter(filter) | |
| 158 { | |
| 159 return (filter.text in this.keywordByFilter); | |
| 160 }, | |
| 161 | |
| 162 /** | |
| 163 * Returns the keyword used for a filter, null for unknown filters. | |
| 164 * @param {RegExpFilter} filter | |
| 165 * @return {string} | |
| 166 */ | |
| 167 getKeywordForFilter(filter) | |
| 168 { | |
| 169 if (filter.text in this.keywordByFilter) | |
| 170 return this.keywordByFilter[filter.text]; | |
| 171 return null; | |
| 172 }, | |
| 173 | |
| 174 /** | |
| 175 * Checks whether the entries for a particular keyword match a URL | |
| 176 * @param {string} keyword | |
| 177 * @param {string} location | |
| 178 * @param {number} typeMask | |
| 179 * @param {string} docDomain | |
| 180 * @param {boolean} thirdParty | |
| 181 * @param {string} sitekey | |
| 182 * @param {boolean} specificOnly | |
| 183 * @return {?Filter} | |
| 184 */ | |
| 185 _checkEntryMatch(keyword, location, typeMask, docDomain, thirdParty, sitekey, | |
| 186 specificOnly) | |
| 187 { | |
| 188 let list = this.filterByKeyword[keyword]; | |
| 189 for (let i = 0; i < list.length; i++) | |
| 190 { | |
| 191 let filter = list[i]; | |
| 192 | |
| 193 if (specificOnly && filter.isGeneric() && | |
| 194 !(filter instanceof WhitelistFilter)) | |
| 195 continue; | |
| 196 | |
| 197 if (filter.matches(location, typeMask, docDomain, thirdParty, sitekey)) | |
| 198 return filter; | |
| 199 } | |
| 200 return null; | |
| 201 }, | |
| 202 | |
| 203 /** | |
| 204 * Tests whether the URL matches any of the known filters | |
| 205 * @param {string} location | |
| 206 * URL to be tested | |
| 207 * @param {number} typeMask | |
| 208 * bitmask of content / request types to match | |
| 209 * @param {string} docDomain | |
| 210 * domain name of the document that loads the URL | |
| 211 * @param {boolean} thirdParty | |
| 212 * should be true if the URL is a third-party request | |
| 213 * @param {string} sitekey | |
| 214 * public key provided by the document | |
| 215 * @param {boolean} specificOnly | |
| 216 * should be true if generic matches should be ignored | |
| 217 * @return {?RegExpFilter} | |
| 218 * matching filter or null | |
| 219 */ | |
| 220 matchesAny(location, typeMask, docDomain, thirdParty, sitekey, specificOnly) | |
| 221 { | |
| 222 let candidates = location.toLowerCase().match(/[a-z0-9%]{3,}/g); | |
| 223 if (candidates === null) | |
| 224 candidates = []; | |
| 225 candidates.push(""); | |
| 226 for (let i = 0, l = candidates.length; i < l; i++) | |
| 227 { | |
| 228 let substr = candidates[i]; | |
| 229 if (substr in this.filterByKeyword) | |
| 230 { | |
| 231 let result = this._checkEntryMatch(substr, location, typeMask, | |
| 232 docDomain, thirdParty, sitekey, | |
| 233 specificOnly); | |
| 234 if (result) | |
| 235 return result; | |
| 236 } | |
| 237 } | |
| 238 | |
| 239 return null; | |
| 240 } | |
| 241 }; | |
| 242 | |
| 243 /** | |
| 244 * Combines a matcher for blocking and exception rules, automatically sorts | |
| 245 * rules into two Matcher instances. | |
| 246 * @constructor | |
| 247 * @augments Matcher | |
| 248 */ | |
| 249 function CombinedMatcher() | |
| 250 { | |
| 251 this.blacklist = new Matcher(); | |
| 252 this.whitelist = new Matcher(); | |
| 253 this.resultCache = Object.create(null); | |
| 254 } | |
| 255 exports.CombinedMatcher = CombinedMatcher; | |
| 256 | |
| 257 /** | |
| 258 * Maximal number of matching cache entries to be kept | |
| 259 * @type {number} | |
| 260 */ | |
| 261 CombinedMatcher.maxCacheEntries = 1000; | |
| 262 | |
| 263 CombinedMatcher.prototype = | |
| 264 { | |
| 265 /** | |
| 266 * Matcher for blocking rules. | |
| 267 * @type {Matcher} | |
| 268 */ | |
| 269 blacklist: null, | |
| 270 | |
| 271 /** | |
| 272 * Matcher for exception rules. | |
| 273 * @type {Matcher} | |
| 274 */ | |
| 275 whitelist: null, | |
| 276 | |
| 277 /** | |
| 278 * Lookup table of previous matchesAny results | |
| 279 * @type {Object} | |
| 280 */ | |
| 281 resultCache: null, | |
| 282 | |
| 283 /** | |
| 284 * Number of entries in resultCache | |
| 285 * @type {number} | |
| 286 */ | |
| 287 cacheEntries: 0, | |
| 288 | |
| 289 /** | |
| 290 * @see Matcher#clear | |
| 291 */ | |
| 292 clear() | |
| 293 { | |
| 294 this.blacklist.clear(); | |
| 295 this.whitelist.clear(); | |
| 296 this.resultCache = Object.create(null); | |
| 297 this.cacheEntries = 0; | |
| 298 }, | |
| 299 | |
| 300 /** | |
| 301 * @see Matcher#add | |
| 302 * @param {Filter} filter | |
| 303 */ | |
| 304 add(filter) | |
| 305 { | |
| 306 if (filter instanceof WhitelistFilter) | |
| 307 this.whitelist.add(filter); | |
| 308 else | |
| 309 this.blacklist.add(filter); | |
| 310 | |
| 311 if (this.cacheEntries > 0) | |
| 312 { | |
| 313 this.resultCache = Object.create(null); | |
| 314 this.cacheEntries = 0; | |
| 315 } | |
| 316 }, | |
| 317 | |
| 318 /** | |
| 319 * @see Matcher#remove | |
| 320 * @param {Filter} filter | |
| 321 */ | |
| 322 remove(filter) | |
| 323 { | |
| 324 if (filter instanceof WhitelistFilter) | |
| 325 this.whitelist.remove(filter); | |
| 326 else | |
| 327 this.blacklist.remove(filter); | |
| 328 | |
| 329 if (this.cacheEntries > 0) | |
| 330 { | |
| 331 this.resultCache = Object.create(null); | |
| 332 this.cacheEntries = 0; | |
| 333 } | |
| 334 }, | |
| 335 | |
| 336 /** | |
| 337 * @see Matcher#findKeyword | |
| 338 * @param {Filter} filter | |
| 339 * @return {string} keyword | |
| 340 */ | |
| 341 findKeyword(filter) | |
| 342 { | |
| 343 if (filter instanceof WhitelistFilter) | |
| 344 return this.whitelist.findKeyword(filter); | |
| 345 return this.blacklist.findKeyword(filter); | |
| 346 }, | |
| 347 | |
| 348 /** | |
| 349 * @see Matcher#hasFilter | |
| 350 * @param {Filter} filter | |
| 351 * @return {boolean} | |
| 352 */ | |
| 353 hasFilter(filter) | |
| 354 { | |
| 355 if (filter instanceof WhitelistFilter) | |
| 356 return this.whitelist.hasFilter(filter); | |
| 357 return this.blacklist.hasFilter(filter); | |
| 358 }, | |
| 359 | |
| 360 /** | |
| 361 * @see Matcher#getKeywordForFilter | |
| 362 * @param {Filter} filter | |
| 363 * @return {string} keyword | |
| 364 */ | |
| 365 getKeywordForFilter(filter) | |
| 366 { | |
| 367 if (filter instanceof WhitelistFilter) | |
| 368 return this.whitelist.getKeywordForFilter(filter); | |
| 369 return this.blacklist.getKeywordForFilter(filter); | |
| 370 }, | |
| 371 | |
| 372 /** | |
| 373 * Checks whether a particular filter is slow | |
| 374 * @param {RegExpFilter} filter | |
| 375 * @return {boolean} | |
| 376 */ | |
| 377 isSlowFilter(filter) | |
| 378 { | |
| 379 let matcher = ( | |
| 380 filter instanceof WhitelistFilter ? this.whitelist : this.blacklist | |
| 381 ); | |
| 382 if (matcher.hasFilter(filter)) | |
| 383 return !matcher.getKeywordForFilter(filter); | |
| 384 return !matcher.findKeyword(filter); | |
| 385 }, | |
| 386 | |
| 387 /** | |
| 388 * Optimized filter matching testing both whitelist and blacklist matchers | |
| 389 * simultaneously. For parameters see Matcher.matchesAny(). | |
| 390 * @see Matcher#matchesAny | |
| 391 * @inheritdoc | |
| 392 */ | |
| 393 matchesAnyInternal(location, typeMask, docDomain, thirdParty, sitekey, | |
| 394 specificOnly) | |
| 395 { | |
| 396 let candidates = location.toLowerCase().match(/[a-z0-9%]{3,}/g); | |
| 397 if (candidates === null) | |
| 398 candidates = []; | |
| 399 candidates.push(""); | |
| 400 | |
| 401 let blacklistHit = null; | |
| 402 for (let i = 0, l = candidates.length; i < l; i++) | |
| 403 { | |
| 404 let substr = candidates[i]; | |
| 405 if (substr in this.whitelist.filterByKeyword) | |
| 406 { | |
| 407 let result = this.whitelist._checkEntryMatch( | |
| 408 substr, location, typeMask, docDomain, thirdParty, sitekey | |
| 409 ); | |
| 410 if (result) | |
| 411 return result; | |
| 412 } | |
| 413 if (substr in this.blacklist.filterByKeyword && blacklistHit === null) | |
| 414 { | |
| 415 blacklistHit = this.blacklist._checkEntryMatch( | |
| 416 substr, location, typeMask, docDomain, thirdParty, sitekey, | |
| 417 specificOnly | |
| 418 ); | |
| 419 } | |
| 420 } | |
| 421 return blacklistHit; | |
| 422 }, | |
| 423 | |
| 424 /** | |
| 425 * @see Matcher#matchesAny | |
| 426 * @inheritdoc | |
| 427 */ | |
| 428 matchesAny(location, typeMask, docDomain, thirdParty, sitekey, specificOnly) | |
| 429 { | |
| 430 let key = location + " " + typeMask + " " + docDomain + " " + thirdParty + | |
| 431 " " + sitekey + " " + specificOnly; | |
| 432 if (key in this.resultCache) | |
| 433 return this.resultCache[key]; | |
| 434 | |
| 435 let result = this.matchesAnyInternal(location, typeMask, docDomain, | |
| 436 thirdParty, sitekey, specificOnly); | |
| 437 | |
| 438 if (this.cacheEntries >= CombinedMatcher.maxCacheEntries) | |
| 439 { | |
| 440 this.resultCache = Object.create(null); | |
| 441 this.cacheEntries = 0; | |
| 442 } | |
| 443 | |
| 444 this.resultCache[key] = result; | |
| 445 this.cacheEntries++; | |
| 446 | |
| 447 return result; | |
| 448 } | |
| 449 }; | |
| 450 | |
| 451 /** | |
| 452 * Shared CombinedMatcher instance that should usually be used. | |
| 453 * @type {CombinedMatcher} | |
| 454 */ | |
| 455 exports.defaultMatcher = new CombinedMatcher(); | |
| OLD | NEW |