| OLD | NEW | 
|---|
| 1 /* | 1 /* | 
| 2  * This file is part of Adblock Plus <https://adblockplus.org/>, | 2  * This file is part of Adblock Plus <https://adblockplus.org/>, | 
| 3  * Copyright (C) 2006-present eyeo GmbH | 3  * Copyright (C) 2006-present eyeo GmbH | 
| 4  * | 4  * | 
| 5  * Adblock Plus is free software: you can redistribute it and/or modify | 5  * Adblock Plus is free software: you can redistribute it and/or modify | 
| 6  * it under the terms of the GNU General Public License version 3 as | 6  * it under the terms of the GNU General Public License version 3 as | 
| 7  * published by the Free Software Foundation. | 7  * published by the Free Software Foundation. | 
| 8  * | 8  * | 
| 9  * Adblock Plus is distributed in the hope that it will be useful, | 9  * Adblock Plus is distributed in the hope that it will be useful, | 
| 10  * but WITHOUT ANY WARRANTY; without even the implied warranty of | 10  * but WITHOUT ANY WARRANTY; without even the implied warranty of | 
| 11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | 11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | 
| 12  * GNU General Public License for more details. | 12  * GNU General Public License for more details. | 
| 13  * | 13  * | 
| 14  * You should have received a copy of the GNU General Public License | 14  * You should have received a copy of the GNU General Public License | 
| 15  * along with Adblock Plus.  If not, see <http://www.gnu.org/licenses/>. | 15  * along with Adblock Plus.  If not, see <http://www.gnu.org/licenses/>. | 
| 16  */ | 16  */ | 
| 17 | 17 | 
| 18 "use strict"; | 18 "use strict"; | 
| 19 | 19 | 
| 20 /** | 20 const compiled = require("compiled"); | 
| 21  * @fileOverview Matcher class implementing matching addresses against | 21 for (let cls of ["Matcher"]) | 
| 22  *               a list of filters. |  | 
| 23  */ |  | 
| 24 |  | 
| 25 const {Filter, WhitelistFilter} = require("filterClasses"); |  | 
| 26 |  | 
| 27 /** |  | 
| 28  * Blacklist/whitelist filter matching |  | 
| 29  * @constructor |  | 
| 30  */ |  | 
| 31 function Matcher() |  | 
| 32 { | 22 { | 
| 33   this.clear(); | 23   exports[cls] = compiled[cls]; | 
| 34 } | 24 } | 
| 35 exports.Matcher = Matcher; |  | 
| 36 |  | 
| 37 Matcher.prototype = { |  | 
| 38   /** |  | 
| 39    * Lookup table for filters by their associated keyword |  | 
| 40    * @type {Object} |  | 
| 41    */ |  | 
| 42   filterByKeyword: null, |  | 
| 43 |  | 
| 44   /** |  | 
| 45    * Lookup table for keywords by the filter text |  | 
| 46    * @type {Object} |  | 
| 47    */ |  | 
| 48   keywordByFilter: null, |  | 
| 49 |  | 
| 50   /** |  | 
| 51    * Removes all known filters |  | 
| 52    */ |  | 
| 53   clear() |  | 
| 54   { |  | 
| 55     this.filterByKeyword = Object.create(null); |  | 
| 56     this.keywordByFilter = Object.create(null); |  | 
| 57   }, |  | 
| 58 |  | 
| 59   /** |  | 
| 60    * Adds a filter to the matcher |  | 
| 61    * @param {RegExpFilter} filter |  | 
| 62    */ |  | 
| 63   add(filter) |  | 
| 64   { |  | 
| 65     if (filter.text in this.keywordByFilter) |  | 
| 66       return; |  | 
| 67 |  | 
| 68     // Look for a suitable keyword |  | 
| 69     let keyword = this.findKeyword(filter); |  | 
| 70     let oldEntry = this.filterByKeyword[keyword]; |  | 
| 71     if (typeof oldEntry == "undefined") |  | 
| 72       this.filterByKeyword[keyword] = filter; |  | 
| 73     else if (oldEntry.length == 1) |  | 
| 74       this.filterByKeyword[keyword] = [oldEntry, filter]; |  | 
| 75     else |  | 
| 76       oldEntry.push(filter); |  | 
| 77     this.keywordByFilter[filter.text] = keyword; |  | 
| 78   }, |  | 
| 79 |  | 
| 80   /** |  | 
| 81    * Removes a filter from the matcher |  | 
| 82    * @param {RegExpFilter} filter |  | 
| 83    */ |  | 
| 84   remove(filter) |  | 
| 85   { |  | 
| 86     if (!(filter.text in this.keywordByFilter)) |  | 
| 87       return; |  | 
| 88 |  | 
| 89     let keyword = this.keywordByFilter[filter.text]; |  | 
| 90     let list = this.filterByKeyword[keyword]; |  | 
| 91     if (list.length <= 1) |  | 
| 92       delete this.filterByKeyword[keyword]; |  | 
| 93     else |  | 
| 94     { |  | 
| 95       let index = list.indexOf(filter); |  | 
| 96       if (index >= 0) |  | 
| 97       { |  | 
| 98         list.splice(index, 1); |  | 
| 99         if (list.length == 1) |  | 
| 100           this.filterByKeyword[keyword] = list[0]; |  | 
| 101       } |  | 
| 102     } |  | 
| 103 |  | 
| 104     delete this.keywordByFilter[filter.text]; |  | 
| 105   }, |  | 
| 106 |  | 
| 107   /** |  | 
| 108    * Chooses a keyword to be associated with the filter |  | 
| 109    * @param {Filter} filter |  | 
| 110    * @return {string} keyword or an empty string if no keyword could be found |  | 
| 111    */ |  | 
| 112   findKeyword(filter) |  | 
| 113   { |  | 
| 114     let result = ""; |  | 
| 115     let {text} = filter; |  | 
| 116     if (Filter.regexpRegExp.test(text)) |  | 
| 117       return result; |  | 
| 118 |  | 
| 119     // Remove options |  | 
| 120     let match = Filter.optionsRegExp.exec(text); |  | 
| 121     if (match) |  | 
| 122       text = match.input.substr(0, match.index); |  | 
| 123 |  | 
| 124     // Remove whitelist marker |  | 
| 125     if (text.substr(0, 2) == "@@") |  | 
| 126       text = text.substr(2); |  | 
| 127 |  | 
| 128     let candidates = text.toLowerCase().match( |  | 
| 129       /[^a-z0-9%*][a-z0-9%]{3,}(?=[^a-z0-9%*])/g |  | 
| 130     ); |  | 
| 131     if (!candidates) |  | 
| 132       return result; |  | 
| 133 |  | 
| 134     let hash = this.filterByKeyword; |  | 
| 135     let resultCount = 0xFFFFFF; |  | 
| 136     let resultLength = 0; |  | 
| 137     for (let i = 0, l = candidates.length; i < l; i++) |  | 
| 138     { |  | 
| 139       let candidate = candidates[i].substr(1); |  | 
| 140       let count = (candidate in hash ? hash[candidate].length : 0); |  | 
| 141       if (count < resultCount || |  | 
| 142           (count == resultCount && candidate.length > resultLength)) |  | 
| 143       { |  | 
| 144         result = candidate; |  | 
| 145         resultCount = count; |  | 
| 146         resultLength = candidate.length; |  | 
| 147       } |  | 
| 148     } |  | 
| 149     return result; |  | 
| 150   }, |  | 
| 151 |  | 
| 152   /** |  | 
| 153    * Checks whether a particular filter is being matched against. |  | 
| 154    * @param {RegExpFilter} filter |  | 
| 155    * @return {boolean} |  | 
| 156    */ |  | 
| 157   hasFilter(filter) |  | 
| 158   { |  | 
| 159     return (filter.text in this.keywordByFilter); |  | 
| 160   }, |  | 
| 161 |  | 
| 162   /** |  | 
| 163    * Returns the keyword used for a filter, null for unknown filters. |  | 
| 164    * @param {RegExpFilter} filter |  | 
| 165    * @return {string} |  | 
| 166    */ |  | 
| 167   getKeywordForFilter(filter) |  | 
| 168   { |  | 
| 169     if (filter.text in this.keywordByFilter) |  | 
| 170       return this.keywordByFilter[filter.text]; |  | 
| 171     return null; |  | 
| 172   }, |  | 
| 173 |  | 
| 174   /** |  | 
| 175    * Checks whether the entries for a particular keyword match a URL |  | 
| 176    * @param {string} keyword |  | 
| 177    * @param {string} location |  | 
| 178    * @param {number} typeMask |  | 
| 179    * @param {string} docDomain |  | 
| 180    * @param {boolean} thirdParty |  | 
| 181    * @param {string} sitekey |  | 
| 182    * @param {boolean} specificOnly |  | 
| 183    * @return {?Filter} |  | 
| 184    */ |  | 
| 185   _checkEntryMatch(keyword, location, typeMask, docDomain, thirdParty, sitekey, |  | 
| 186                    specificOnly) |  | 
| 187   { |  | 
| 188     let list = this.filterByKeyword[keyword]; |  | 
| 189     for (let i = 0; i < list.length; i++) |  | 
| 190     { |  | 
| 191       let filter = list[i]; |  | 
| 192 |  | 
| 193       if (specificOnly && filter.isGeneric() && |  | 
| 194           !(filter instanceof WhitelistFilter)) |  | 
| 195         continue; |  | 
| 196 |  | 
| 197       if (filter.matches(location, typeMask, docDomain, thirdParty, sitekey)) |  | 
| 198         return filter; |  | 
| 199     } |  | 
| 200     return null; |  | 
| 201   }, |  | 
| 202 |  | 
| 203   /** |  | 
| 204    * Tests whether the URL matches any of the known filters |  | 
| 205    * @param {string} location |  | 
| 206    *   URL to be tested |  | 
| 207    * @param {number} typeMask |  | 
| 208    *   bitmask of content / request types to match |  | 
| 209    * @param {string} docDomain |  | 
| 210    *   domain name of the document that loads the URL |  | 
| 211    * @param {boolean} thirdParty |  | 
| 212    *   should be true if the URL is a third-party request |  | 
| 213    * @param {string} sitekey |  | 
| 214    *   public key provided by the document |  | 
| 215    * @param {boolean} specificOnly |  | 
| 216    *   should be true if generic matches should be ignored |  | 
| 217    * @return {?RegExpFilter} |  | 
| 218    *   matching filter or null |  | 
| 219    */ |  | 
| 220   matchesAny(location, typeMask, docDomain, thirdParty, sitekey, specificOnly) |  | 
| 221   { |  | 
| 222     let candidates = location.toLowerCase().match(/[a-z0-9%]{3,}/g); |  | 
| 223     if (candidates === null) |  | 
| 224       candidates = []; |  | 
| 225     candidates.push(""); |  | 
| 226     for (let i = 0, l = candidates.length; i < l; i++) |  | 
| 227     { |  | 
| 228       let substr = candidates[i]; |  | 
| 229       if (substr in this.filterByKeyword) |  | 
| 230       { |  | 
| 231         let result = this._checkEntryMatch(substr, location, typeMask, |  | 
| 232                                            docDomain, thirdParty, sitekey, |  | 
| 233                                            specificOnly); |  | 
| 234         if (result) |  | 
| 235           return result; |  | 
| 236       } |  | 
| 237     } |  | 
| 238 |  | 
| 239     return null; |  | 
| 240   } |  | 
| 241 }; |  | 
| 242 |  | 
| 243 /** |  | 
| 244  * Combines a matcher for blocking and exception rules, automatically sorts |  | 
| 245  * rules into two Matcher instances. |  | 
| 246  * @constructor |  | 
| 247  * @augments Matcher |  | 
| 248  */ |  | 
| 249 function CombinedMatcher() |  | 
| 250 { |  | 
| 251   this.blacklist = new Matcher(); |  | 
| 252   this.whitelist = new Matcher(); |  | 
| 253   this.resultCache = Object.create(null); |  | 
| 254 } |  | 
| 255 exports.CombinedMatcher = CombinedMatcher; |  | 
| 256 |  | 
| 257 /** |  | 
| 258  * Maximal number of matching cache entries to be kept |  | 
| 259  * @type {number} |  | 
| 260  */ |  | 
| 261 CombinedMatcher.maxCacheEntries = 1000; |  | 
| 262 |  | 
| 263 CombinedMatcher.prototype = |  | 
| 264 { |  | 
| 265   /** |  | 
| 266    * Matcher for blocking rules. |  | 
| 267    * @type {Matcher} |  | 
| 268    */ |  | 
| 269   blacklist: null, |  | 
| 270 |  | 
| 271   /** |  | 
| 272    * Matcher for exception rules. |  | 
| 273    * @type {Matcher} |  | 
| 274    */ |  | 
| 275   whitelist: null, |  | 
| 276 |  | 
| 277   /** |  | 
| 278    * Lookup table of previous matchesAny results |  | 
| 279    * @type {Object} |  | 
| 280    */ |  | 
| 281   resultCache: null, |  | 
| 282 |  | 
| 283   /** |  | 
| 284    * Number of entries in resultCache |  | 
| 285    * @type {number} |  | 
| 286    */ |  | 
| 287   cacheEntries: 0, |  | 
| 288 |  | 
| 289   /** |  | 
| 290    * @see Matcher#clear |  | 
| 291    */ |  | 
| 292   clear() |  | 
| 293   { |  | 
| 294     this.blacklist.clear(); |  | 
| 295     this.whitelist.clear(); |  | 
| 296     this.resultCache = Object.create(null); |  | 
| 297     this.cacheEntries = 0; |  | 
| 298   }, |  | 
| 299 |  | 
| 300   /** |  | 
| 301    * @see Matcher#add |  | 
| 302    * @param {Filter} filter |  | 
| 303    */ |  | 
| 304   add(filter) |  | 
| 305   { |  | 
| 306     if (filter instanceof WhitelistFilter) |  | 
| 307       this.whitelist.add(filter); |  | 
| 308     else |  | 
| 309       this.blacklist.add(filter); |  | 
| 310 |  | 
| 311     if (this.cacheEntries > 0) |  | 
| 312     { |  | 
| 313       this.resultCache = Object.create(null); |  | 
| 314       this.cacheEntries = 0; |  | 
| 315     } |  | 
| 316   }, |  | 
| 317 |  | 
| 318   /** |  | 
| 319    * @see Matcher#remove |  | 
| 320    * @param {Filter} filter |  | 
| 321    */ |  | 
| 322   remove(filter) |  | 
| 323   { |  | 
| 324     if (filter instanceof WhitelistFilter) |  | 
| 325       this.whitelist.remove(filter); |  | 
| 326     else |  | 
| 327       this.blacklist.remove(filter); |  | 
| 328 |  | 
| 329     if (this.cacheEntries > 0) |  | 
| 330     { |  | 
| 331       this.resultCache = Object.create(null); |  | 
| 332       this.cacheEntries = 0; |  | 
| 333     } |  | 
| 334   }, |  | 
| 335 |  | 
| 336   /** |  | 
| 337    * @see Matcher#findKeyword |  | 
| 338    * @param {Filter} filter |  | 
| 339    * @return {string} keyword |  | 
| 340    */ |  | 
| 341   findKeyword(filter) |  | 
| 342   { |  | 
| 343     if (filter instanceof WhitelistFilter) |  | 
| 344       return this.whitelist.findKeyword(filter); |  | 
| 345     return this.blacklist.findKeyword(filter); |  | 
| 346   }, |  | 
| 347 |  | 
| 348   /** |  | 
| 349    * @see Matcher#hasFilter |  | 
| 350    * @param {Filter} filter |  | 
| 351    * @return {boolean} |  | 
| 352    */ |  | 
| 353   hasFilter(filter) |  | 
| 354   { |  | 
| 355     if (filter instanceof WhitelistFilter) |  | 
| 356       return this.whitelist.hasFilter(filter); |  | 
| 357     return this.blacklist.hasFilter(filter); |  | 
| 358   }, |  | 
| 359 |  | 
| 360   /** |  | 
| 361    * @see Matcher#getKeywordForFilter |  | 
| 362    * @param {Filter} filter |  | 
| 363    * @return {string} keyword |  | 
| 364    */ |  | 
| 365   getKeywordForFilter(filter) |  | 
| 366   { |  | 
| 367     if (filter instanceof WhitelistFilter) |  | 
| 368       return this.whitelist.getKeywordForFilter(filter); |  | 
| 369     return this.blacklist.getKeywordForFilter(filter); |  | 
| 370   }, |  | 
| 371 |  | 
| 372   /** |  | 
| 373    * Checks whether a particular filter is slow |  | 
| 374    * @param {RegExpFilter} filter |  | 
| 375    * @return {boolean} |  | 
| 376    */ |  | 
| 377   isSlowFilter(filter) |  | 
| 378   { |  | 
| 379     let matcher = ( |  | 
| 380       filter instanceof WhitelistFilter ? this.whitelist : this.blacklist |  | 
| 381     ); |  | 
| 382     if (matcher.hasFilter(filter)) |  | 
| 383       return !matcher.getKeywordForFilter(filter); |  | 
| 384     return !matcher.findKeyword(filter); |  | 
| 385   }, |  | 
| 386 |  | 
| 387   /** |  | 
| 388    * Optimized filter matching testing both whitelist and blacklist matchers |  | 
| 389    * simultaneously. For parameters see Matcher.matchesAny(). |  | 
| 390    * @see Matcher#matchesAny |  | 
| 391    * @inheritdoc |  | 
| 392    */ |  | 
| 393   matchesAnyInternal(location, typeMask, docDomain, thirdParty, sitekey, |  | 
| 394                      specificOnly) |  | 
| 395   { |  | 
| 396     let candidates = location.toLowerCase().match(/[a-z0-9%]{3,}/g); |  | 
| 397     if (candidates === null) |  | 
| 398       candidates = []; |  | 
| 399     candidates.push(""); |  | 
| 400 |  | 
| 401     let blacklistHit = null; |  | 
| 402     for (let i = 0, l = candidates.length; i < l; i++) |  | 
| 403     { |  | 
| 404       let substr = candidates[i]; |  | 
| 405       if (substr in this.whitelist.filterByKeyword) |  | 
| 406       { |  | 
| 407         let result = this.whitelist._checkEntryMatch( |  | 
| 408           substr, location, typeMask, docDomain, thirdParty, sitekey |  | 
| 409         ); |  | 
| 410         if (result) |  | 
| 411           return result; |  | 
| 412       } |  | 
| 413       if (substr in this.blacklist.filterByKeyword && blacklistHit === null) |  | 
| 414       { |  | 
| 415         blacklistHit = this.blacklist._checkEntryMatch( |  | 
| 416           substr, location, typeMask, docDomain, thirdParty, sitekey, |  | 
| 417           specificOnly |  | 
| 418         ); |  | 
| 419       } |  | 
| 420     } |  | 
| 421     return blacklistHit; |  | 
| 422   }, |  | 
| 423 |  | 
| 424   /** |  | 
| 425    * @see Matcher#matchesAny |  | 
| 426    * @inheritdoc |  | 
| 427    */ |  | 
| 428   matchesAny(location, typeMask, docDomain, thirdParty, sitekey, specificOnly) |  | 
| 429   { |  | 
| 430     let key = location + " " + typeMask + " " + docDomain + " " + thirdParty + |  | 
| 431       " " + sitekey + " " + specificOnly; |  | 
| 432     if (key in this.resultCache) |  | 
| 433       return this.resultCache[key]; |  | 
| 434 |  | 
| 435     let result = this.matchesAnyInternal(location, typeMask, docDomain, |  | 
| 436                                          thirdParty, sitekey, specificOnly); |  | 
| 437 |  | 
| 438     if (this.cacheEntries >= CombinedMatcher.maxCacheEntries) |  | 
| 439     { |  | 
| 440       this.resultCache = Object.create(null); |  | 
| 441       this.cacheEntries = 0; |  | 
| 442     } |  | 
| 443 |  | 
| 444     this.resultCache[key] = result; |  | 
| 445     this.cacheEntries++; |  | 
| 446 |  | 
| 447     return result; |  | 
| 448   } |  | 
| 449 }; |  | 
| 450 |  | 
| 451 /** |  | 
| 452  * Shared CombinedMatcher instance that should usually be used. |  | 
| 453  * @type {CombinedMatcher} |  | 
| 454  */ |  | 
| 455 exports.defaultMatcher = new CombinedMatcher(); |  | 
| OLD | NEW | 
|---|