| LEFT | RIGHT |
| 1 /* | 1 /* |
| 2 * This file is part of Adblock Plus <https://adblockplus.org/>, | 2 * This file is part of Adblock Plus <https://adblockplus.org/>, |
| 3 * Copyright (C) 2006-present eyeo GmbH | 3 * Copyright (C) 2006-present eyeo GmbH |
| 4 * | 4 * |
| 5 * Adblock Plus is free software: you can redistribute it and/or modify | 5 * Adblock Plus is free software: you can redistribute it and/or modify |
| 6 * it under the terms of the GNU General Public License version 3 as | 6 * it under the terms of the GNU General Public License version 3 as |
| 7 * published by the Free Software Foundation. | 7 * published by the Free Software Foundation. |
| 8 * | 8 * |
| 9 * Adblock Plus is distributed in the hope that it will be useful, | 9 * Adblock Plus is distributed in the hope that it will be useful, |
| 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of | 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 12 * GNU General Public License for more details. | 12 * GNU General Public License for more details. |
| 13 * | 13 * |
| 14 * You should have received a copy of the GNU General Public License | 14 * You should have received a copy of the GNU General Public License |
| 15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. | 15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. |
| 16 */ | 16 */ |
| 17 | 17 |
| 18 "use strict"; | 18 "use strict"; |
| 19 | 19 |
| 20 /** | 20 /** |
| 21 * @fileOverview Matcher class implementing matching addresses against | 21 * @fileOverview Matcher class implementing matching addresses against |
| 22 * a list of filters. | 22 * a list of filters. |
| 23 */ | 23 */ |
| 24 | 24 |
| 25 const {WhitelistFilter} = require("./filterClasses"); | 25 const {RegExpFilter, WhitelistFilter} = require("./filterClasses"); |
| 26 |
| 27 /** |
| 28 * Regular expression for matching a keyword in a filter. |
| 29 * @type {RegExp} |
| 30 */ |
| 31 const keywordRegExp = /[^a-z0-9%*][a-z0-9%]{3,}(?=[^a-z0-9%*])/; |
| 32 |
| 33 /** |
| 34 * Regular expression for matching all keywords in a filter. |
| 35 * @type {RegExp} |
| 36 */ |
| 37 const allKeywordsRegExp = new RegExp(keywordRegExp, "g"); |
| 38 |
| 39 /** |
| 40 * Bitmask for "types" that are for exception rules only, like |
| 41 * <code>$document</code>, <code>$elemhide</code>, and so on. |
| 42 * @type {number} |
| 43 */ |
| 44 const WHITELIST_ONLY_TYPES = RegExpFilter.typeMap.DOCUMENT | |
| 45 RegExpFilter.typeMap.ELEMHIDE | |
| 46 RegExpFilter.typeMap.GENERICHIDE | |
| 47 RegExpFilter.typeMap.GENERICBLOCK; |
| 48 |
| 49 /** |
| 50 * Checks whether a particular filter is slow. |
| 51 * @param {RegExpFilter} filter |
| 52 * @returns {boolean} |
| 53 */ |
| 54 function isSlowFilter(filter) |
| 55 { |
| 56 return !filter.pattern || !keywordRegExp.test(filter.pattern); |
| 57 } |
| 58 |
| 59 exports.isSlowFilter = isSlowFilter; |
| 26 | 60 |
| 27 /** | 61 /** |
| 28 * Blacklist/whitelist filter matching | 62 * Blacklist/whitelist filter matching |
| 29 */ | 63 */ |
| 30 class Matcher | 64 class Matcher |
| 31 { | 65 { |
| 32 constructor() | 66 constructor() |
| 33 { | 67 { |
| 34 /** | 68 /** |
| 35 * Lookup table for filters by their associated keyword | 69 * Lookup table for filters by their associated keyword |
| 36 * @type {Map.<string,(Filter|Filter[])>} | 70 * @type {Map.<string,(Filter|Set.<Filter>)>} |
| 37 */ | 71 */ |
| 38 this.filterByKeyword = new Map(); | 72 this.filterByKeyword = new Map(); |
| 39 | |
| 40 /** | |
| 41 * Lookup table for keywords by the filter | |
| 42 * @type {Map.<Filter,string>} | |
| 43 */ | |
| 44 this.keywordByFilter = new Map(); | |
| 45 } | 73 } |
| 46 | 74 |
| 47 /** | 75 /** |
| 48 * Removes all known filters | 76 * Removes all known filters |
| 49 */ | 77 */ |
| 50 clear() | 78 clear() |
| 51 { | 79 { |
| 52 this.filterByKeyword.clear(); | 80 this.filterByKeyword.clear(); |
| 53 this.keywordByFilter.clear(); | |
| 54 } | 81 } |
| 55 | 82 |
| 56 /** | 83 /** |
| 57 * Adds a filter to the matcher | 84 * Adds a filter to the matcher |
| 58 * @param {RegExpFilter} filter | 85 * @param {RegExpFilter} filter |
| 59 */ | 86 */ |
| 60 add(filter) | 87 add(filter) |
| 61 { | 88 { |
| 62 if (this.keywordByFilter.has(filter)) | |
| 63 return; | |
| 64 | |
| 65 // Look for a suitable keyword | 89 // Look for a suitable keyword |
| 66 let keyword = this.findKeyword(filter); | 90 let keyword = this.findKeyword(filter); |
| 67 let oldEntry = this.filterByKeyword.get(keyword); | 91 let set = this.filterByKeyword.get(keyword); |
| 68 if (typeof oldEntry == "undefined") | 92 if (typeof set == "undefined") |
| 93 { |
| 69 this.filterByKeyword.set(keyword, filter); | 94 this.filterByKeyword.set(keyword, filter); |
| 70 else if (oldEntry.length == 1) | 95 } |
| 71 this.filterByKeyword.set(keyword, [oldEntry, filter]); | 96 else if (set.size == 1) |
| 97 { |
| 98 if (filter != set) |
| 99 this.filterByKeyword.set(keyword, new Set([set, filter])); |
| 100 } |
| 72 else | 101 else |
| 73 oldEntry.push(filter); | 102 { |
| 74 this.keywordByFilter.set(filter, keyword); | 103 set.add(filter); |
| 104 } |
| 75 } | 105 } |
| 76 | 106 |
| 77 /** | 107 /** |
| 78 * Removes a filter from the matcher | 108 * Removes a filter from the matcher |
| 79 * @param {RegExpFilter} filter | 109 * @param {RegExpFilter} filter |
| 80 */ | 110 */ |
| 81 remove(filter) | 111 remove(filter) |
| 82 { | 112 { |
| 83 let keyword = this.keywordByFilter.get(filter); | 113 let keyword = this.findKeyword(filter); |
| 84 if (typeof keyword == "undefined") | 114 let set = this.filterByKeyword.get(keyword); |
| 115 if (typeof set == "undefined") |
| 85 return; | 116 return; |
| 86 | 117 |
| 87 let list = this.filterByKeyword.get(keyword); | 118 if (set.size == 1) |
| 88 if (list.length <= 1) | 119 { |
| 89 this.filterByKeyword.delete(keyword); | 120 if (filter == set) |
| 121 this.filterByKeyword.delete(keyword); |
| 122 } |
| 90 else | 123 else |
| 91 { | 124 { |
| 92 let index = list.indexOf(filter); | 125 set.delete(filter); |
| 93 if (index >= 0) | 126 |
| 94 { | 127 if (set.size == 1) |
| 95 list.splice(index, 1); | 128 this.filterByKeyword.set(keyword, [...set][0]); |
| 96 if (list.length == 1) | 129 } |
| 97 this.filterByKeyword.set(keyword, list[0]); | |
| 98 } | |
| 99 } | |
| 100 | |
| 101 this.keywordByFilter.delete(filter); | |
| 102 } | 130 } |
| 103 | 131 |
| 104 /** | 132 /** |
| 105 * Chooses a keyword to be associated with the filter | 133 * Chooses a keyword to be associated with the filter |
| 106 * @param {Filter} filter | 134 * @param {Filter} filter |
| 107 * @returns {string} keyword or an empty string if no keyword could be found | 135 * @returns {string} keyword or an empty string if no keyword could be found |
| 108 */ | 136 */ |
| 109 findKeyword(filter) | 137 findKeyword(filter) |
| 110 { | 138 { |
| 111 let result = ""; | 139 let result = ""; |
| 112 let {pattern} = filter; | 140 let {pattern} = filter; |
| 113 if (pattern == null) | 141 if (pattern == null) |
| 114 return result; | 142 return result; |
| 115 | 143 |
| 116 let candidates = pattern.toLowerCase().match( | 144 let candidates = pattern.toLowerCase().match(allKeywordsRegExp); |
| 117 /[^a-z0-9%*][a-z0-9%]{3,}(?=[^a-z0-9%*])/g | |
| 118 ); | |
| 119 if (!candidates) | 145 if (!candidates) |
| 120 return result; | 146 return result; |
| 121 | 147 |
| 122 let hash = this.filterByKeyword; | 148 let hash = this.filterByKeyword; |
| 123 let resultCount = 0xFFFFFF; | 149 let resultCount = 0xFFFFFF; |
| 124 let resultLength = 0; | 150 let resultLength = 0; |
| 125 for (let i = 0, l = candidates.length; i < l; i++) | 151 for (let i = 0, l = candidates.length; i < l; i++) |
| 126 { | 152 { |
| 127 let candidate = candidates[i].substr(1); | 153 let candidate = candidates[i].substr(1); |
| 128 let filters = hash.get(candidate); | 154 let filters = hash.get(candidate); |
| 129 let count = typeof filters != "undefined" ? filters.length : 0; | 155 let count = typeof filters != "undefined" ? filters.size : 0; |
| 130 if (count < resultCount || | 156 if (count < resultCount || |
| 131 (count == resultCount && candidate.length > resultLength)) | 157 (count == resultCount && candidate.length > resultLength)) |
| 132 { | 158 { |
| 133 result = candidate; | 159 result = candidate; |
| 134 resultCount = count; | 160 resultCount = count; |
| 135 resultLength = candidate.length; | 161 resultLength = candidate.length; |
| 136 } | 162 } |
| 137 } | 163 } |
| 138 return result; | 164 return result; |
| 139 } | |
| 140 | |
| 141 /** | |
| 142 * Checks whether a particular filter is being matched against. | |
| 143 * @param {RegExpFilter} filter | |
| 144 * @returns {boolean} | |
| 145 */ | |
| 146 hasFilter(filter) | |
| 147 { | |
| 148 return this.keywordByFilter.has(filter); | |
| 149 } | |
| 150 | |
| 151 /** | |
| 152 * Returns the keyword used for a filter, <code>null</code> | |
| 153 * for unknown filters. | |
| 154 * @param {RegExpFilter} filter | |
| 155 * @returns {?string} | |
| 156 */ | |
| 157 getKeywordForFilter(filter) | |
| 158 { | |
| 159 let keyword = this.keywordByFilter.get(filter); | |
| 160 return typeof keyword != "undefined" ? keyword : null; | |
| 161 } | 165 } |
| 162 | 166 |
| 163 /** | 167 /** |
| 164 * Checks whether the entries for a particular keyword match a URL | 168 * Checks whether the entries for a particular keyword match a URL |
| 165 * @param {string} keyword | 169 * @param {string} keyword |
| 166 * @param {string} location | 170 * @param {string} location |
| 167 * @param {number} typeMask | 171 * @param {number} typeMask |
| 168 * @param {string} [docDomain] | 172 * @param {string} [docDomain] |
| 169 * @param {boolean} [thirdParty] | 173 * @param {boolean} [thirdParty] |
| 170 * @param {string} [sitekey] | 174 * @param {string} [sitekey] |
| 171 * @param {boolean} [specificOnly] | 175 * @param {boolean} [specificOnly] |
| 172 * @returns {?Filter} | 176 * @returns {?Filter} |
| 173 */ | 177 */ |
| 174 _checkEntryMatch(keyword, location, typeMask, docDomain, thirdParty, sitekey, | 178 _checkEntryMatch(keyword, location, typeMask, docDomain, thirdParty, sitekey, |
| 175 specificOnly) | 179 specificOnly) |
| 176 { | 180 { |
| 177 let list = this.filterByKeyword.get(keyword); | 181 let set = this.filterByKeyword.get(keyword); |
| 178 if (typeof list == "undefined") | 182 if (typeof set == "undefined") |
| 179 return null; | 183 return null; |
| 180 for (let i = 0; i < list.length; i++) | 184 |
| 181 { | 185 for (let filter of set) |
| 182 let filter = list[i]; | 186 { |
| 183 | |
| 184 if (specificOnly && filter.isGeneric() && | 187 if (specificOnly && filter.isGeneric() && |
| 185 !(filter instanceof WhitelistFilter)) | 188 !(filter instanceof WhitelistFilter)) |
| 186 continue; | 189 continue; |
| 187 | 190 |
| 188 if (filter.matches(location, typeMask, docDomain, thirdParty, sitekey)) | 191 if (filter.matches(location, typeMask, docDomain, thirdParty, sitekey)) |
| 189 return filter; | 192 return filter; |
| 190 } | 193 } |
| 191 return null; | 194 return null; |
| 192 } | 195 } |
| 193 | 196 |
| (...skipping 112 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 306 * @returns {string} keyword | 309 * @returns {string} keyword |
| 307 */ | 310 */ |
| 308 findKeyword(filter) | 311 findKeyword(filter) |
| 309 { | 312 { |
| 310 if (filter instanceof WhitelistFilter) | 313 if (filter instanceof WhitelistFilter) |
| 311 return this.whitelist.findKeyword(filter); | 314 return this.whitelist.findKeyword(filter); |
| 312 return this.blacklist.findKeyword(filter); | 315 return this.blacklist.findKeyword(filter); |
| 313 } | 316 } |
| 314 | 317 |
| 315 /** | 318 /** |
| 316 * @see Matcher#hasFilter | |
| 317 * @param {Filter} filter | |
| 318 * @returns {boolean} | |
| 319 */ | |
| 320 hasFilter(filter) | |
| 321 { | |
| 322 if (filter instanceof WhitelistFilter) | |
| 323 return this.whitelist.hasFilter(filter); | |
| 324 return this.blacklist.hasFilter(filter); | |
| 325 } | |
| 326 | |
| 327 /** | |
| 328 * @see Matcher#getKeywordForFilter | |
| 329 * @param {Filter} filter | |
| 330 * @returns {string} keyword | |
| 331 */ | |
| 332 getKeywordForFilter(filter) | |
| 333 { | |
| 334 if (filter instanceof WhitelistFilter) | |
| 335 return this.whitelist.getKeywordForFilter(filter); | |
| 336 return this.blacklist.getKeywordForFilter(filter); | |
| 337 } | |
| 338 | |
| 339 /** | |
| 340 * Checks whether a particular filter is slow | |
| 341 * @param {RegExpFilter} filter | |
| 342 * @returns {boolean} | |
| 343 */ | |
| 344 isSlowFilter(filter) | |
| 345 { | |
| 346 let matcher = ( | |
| 347 filter instanceof WhitelistFilter ? this.whitelist : this.blacklist | |
| 348 ); | |
| 349 let keyword = matcher.getKeywordForFilter(filter); | |
| 350 if (keyword != null) | |
| 351 return !keyword; | |
| 352 return !matcher.findKeyword(filter); | |
| 353 } | |
| 354 | |
| 355 /** | |
| 356 * Optimized filter matching testing both whitelist and blacklist matchers | 319 * Optimized filter matching testing both whitelist and blacklist matchers |
| 357 * simultaneously. For parameters see | 320 * simultaneously. For parameters see |
| 358 {@link Matcher#matchesAny Matcher.matchesAny()}. | 321 {@link Matcher#matchesAny Matcher.matchesAny()}. |
| 359 * @see Matcher#matchesAny | 322 * @see Matcher#matchesAny |
| 360 * @inheritdoc | 323 * @inheritdoc |
| 361 */ | 324 */ |
| 362 matchesAnyInternal(location, typeMask, docDomain, thirdParty, sitekey, | 325 matchesAnyInternal(location, typeMask, docDomain, thirdParty, sitekey, |
| 363 specificOnly) | 326 specificOnly) |
| 364 { | 327 { |
| 365 let candidates = location.toLowerCase().match(/[a-z0-9%]{3,}/g); | 328 let candidates = location.toLowerCase().match(/[a-z0-9%]{3,}/g); |
| 366 if (candidates === null) | 329 if (candidates === null) |
| 367 candidates = []; | 330 candidates = []; |
| 368 candidates.push(""); | 331 candidates.push(""); |
| 369 | 332 |
| 370 let whitelistHit = null; | 333 let whitelistHit = null; |
| 371 let blacklistHit = null; | 334 let blacklistHit = null; |
| 372 | 335 |
| 373 for (let i = 0, l = candidates.length; i < l; i++) | 336 // If the type mask includes no types other than whitelist-only types, we |
| 374 { | 337 // can skip the blacklist. |
| 375 let substr = candidates[i]; | 338 if ((typeMask & ~WHITELIST_ONLY_TYPES) != 0) |
| 376 blacklistHit = this.blacklist._checkEntryMatch( | 339 { |
| 377 substr, location, typeMask, docDomain, thirdParty, sitekey, | 340 for (let i = 0, l = candidates.length; !blacklistHit && i < l; i++) |
| 378 specificOnly | |
| 379 ); | |
| 380 if (blacklistHit) | |
| 381 break; | |
| 382 } | |
| 383 | |
| 384 if (blacklistHit) | |
| 385 { | |
| 386 for (let i = 0, l = candidates.length; i < l; i++) | |
| 387 { | 341 { |
| 388 let substr = candidates[i]; | 342 blacklistHit = this.blacklist._checkEntryMatch(candidates[i], location, |
| 389 whitelistHit = this.whitelist._checkEntryMatch( | 343 typeMask, docDomain, |
| 390 substr, location, typeMask, docDomain, thirdParty, sitekey | 344 thirdParty, sitekey, |
| 391 ); | 345 specificOnly); |
| 392 if (whitelistHit) | 346 } |
| 393 break; | 347 } |
| 348 |
| 349 // If the type mask includes any whitelist-only types, we need to check the |
| 350 // whitelist. |
| 351 if (blacklistHit || (typeMask & WHITELIST_ONLY_TYPES) != 0) |
| 352 { |
| 353 for (let i = 0, l = candidates.length; !whitelistHit && i < l; i++) |
| 354 { |
| 355 whitelistHit = this.whitelist._checkEntryMatch(candidates[i], location, |
| 356 typeMask, docDomain, |
| 357 thirdParty, sitekey); |
| 394 } | 358 } |
| 395 } | 359 } |
| 396 | 360 |
| 397 return whitelistHit || blacklistHit; | 361 return whitelistHit || blacklistHit; |
| 398 } | 362 } |
| 399 | 363 |
| 400 /** | 364 /** |
| 401 * @see Matcher#matchesAny | 365 * @see Matcher#matchesAny |
| 402 * @inheritdoc | 366 * @inheritdoc |
| 403 */ | 367 */ |
| (...skipping 20 matching lines...) Expand all Loading... |
| 424 | 388 |
| 425 exports.CombinedMatcher = CombinedMatcher; | 389 exports.CombinedMatcher = CombinedMatcher; |
| 426 | 390 |
| 427 /** | 391 /** |
| 428 * Shared {@link CombinedMatcher} instance that should usually be used. | 392 * Shared {@link CombinedMatcher} instance that should usually be used. |
| 429 * @type {CombinedMatcher} | 393 * @type {CombinedMatcher} |
| 430 */ | 394 */ |
| 431 let defaultMatcher = new CombinedMatcher(); | 395 let defaultMatcher = new CombinedMatcher(); |
| 432 | 396 |
| 433 exports.defaultMatcher = defaultMatcher; | 397 exports.defaultMatcher = defaultMatcher; |
| LEFT | RIGHT |