 Issue 29773570:
  Issue 6652 - Implement fast selector lookups for unknown domains  (Closed) 
  Base URL: https://hg.adblockplus.org/adblockpluscore/
    
  
    Issue 29773570:
  Issue 6652 - Implement fast selector lookups for unknown domains  (Closed) 
  Base URL: https://hg.adblockplus.org/adblockpluscore/| Left: | ||
| Right: | 
| OLD | NEW | 
|---|---|
| 1 /* | 1 /* | 
| 2 * This file is part of Adblock Plus <https://adblockplus.org/>, | 2 * This file is part of Adblock Plus <https://adblockplus.org/>, | 
| 3 * Copyright (C) 2006-present eyeo GmbH | 3 * Copyright (C) 2006-present eyeo GmbH | 
| 4 * | 4 * | 
| 5 * Adblock Plus is free software: you can redistribute it and/or modify | 5 * Adblock Plus is free software: you can redistribute it and/or modify | 
| 6 * it under the terms of the GNU General Public License version 3 as | 6 * it under the terms of the GNU General Public License version 3 as | 
| 7 * published by the Free Software Foundation. | 7 * published by the Free Software Foundation. | 
| 8 * | 8 * | 
| 9 * Adblock Plus is distributed in the hope that it will be useful, | 9 * Adblock Plus is distributed in the hope that it will be useful, | 
| 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of | 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of | 
| 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | 
| 12 * GNU General Public License for more details. | 12 * GNU General Public License for more details. | 
| 13 * | 13 * | 
| 14 * You should have received a copy of the GNU General Public License | 14 * You should have received a copy of the GNU General Public License | 
| 15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. | 15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. | 
| 16 */ | 16 */ | 
| 17 | 17 | 
| 18 "use strict"; | 18 "use strict"; | 
| 19 | 19 | 
| 20 /** | 20 /** | 
| 21 * @fileOverview Element hiding implementation. | 21 * @fileOverview Element hiding implementation. | 
| 22 */ | 22 */ | 
| 23 | 23 | 
| 24 const {ElemHideException} = require("./filterClasses"); | 24 const {ElemHideException} = require("./filterClasses"); | 
| 25 const {FilterNotifier} = require("./filterNotifier"); | 25 const {FilterNotifier} = require("./filterNotifier"); | 
| 26 | 26 | 
| 27 /** | 27 /** | 
| 28 * Lookup table, active flag, by filter by domain. | 28 * Lookup table, active flag, by filter by domain. | 
| 29 * (Only contains filters that aren't unconditionally matched for all domains.) | 29 * (Only contains filters that aren't unconditionally matched for all domains.) | 
| 30 * @type {Map.<string,Map.<Filter,boolean>>} | 30 * @type {Map.<string,?Map.<Filter,boolean>>} | 
| 
Manish Jethani
2018/05/07 16:03:02
The value here can now be null.
 | |
| 31 */ | 31 */ | 
| 32 let filtersByDomain = new Map(); | 32 let filtersByDomain = new Map(); | 
| 33 | 33 | 
| 34 /** | 34 /** | 
| 35 * Lookup table, filter by selector. (Only used for selectors that are | 35 * Lookup table, filter by selector. (Only used for selectors that are | 
| 36 * unconditionally matched for all domains.) | 36 * unconditionally matched for all domains.) | 
| 37 * @type {Map.<string,Filter>} | 37 * @type {Map.<string,Filter>} | 
| 38 */ | 38 */ | 
| 39 let filterBySelector = new Map(); | 39 let filterBySelector = new Map(); | 
| 40 | 40 | 
| (...skipping 14 matching lines...) Expand all Loading... | |
| 55 * @type {Set.<ElemHideBase>} | 55 * @type {Set.<ElemHideBase>} | 
| 56 */ | 56 */ | 
| 57 let knownFilters = new Set(); | 57 let knownFilters = new Set(); | 
| 58 | 58 | 
| 59 /** | 59 /** | 
| 60 * Lookup table, lists of element hiding exceptions by selector | 60 * Lookup table, lists of element hiding exceptions by selector | 
| 61 * @type {Map.<string,Filter>} | 61 * @type {Map.<string,Filter>} | 
| 62 */ | 62 */ | 
| 63 let exceptions = new Map(); | 63 let exceptions = new Map(); | 
| 64 | 64 | 
| 65 /* | |
| 66 * Set containing selectors with generic exceptions | |
| 67 * @type {Set.<string>} | |
| 68 */ | |
| 69 let genericExceptionSelectors = new Set(); | |
| 70 | |
| 71 /* | |
| 72 * Checks if a domain is known | |
| 73 * @param {string} domain | |
| 74 * @returns {boolean} | |
| 75 */ | |
| 76 function isDomainKnown(domain) | |
| 77 { | |
| 78 while (domain) | |
| 79 { | |
| 80 // A domain is "known" if we have seen any filters that would apply to it. | |
| 81 // For example, given the filters "##foo" and "example.com#@#foo", | |
| 82 // example.com is a known domain, as is mail.example.com and any other | |
| 83 // subdomains of example.com. | |
| 84 if (filtersByDomain.has(domain)) | |
| 85 return true; | |
| 86 | |
| 87 let nextDot = domain.indexOf("."); | |
| 88 domain = nextDot == -1 ? null : domain.substring(nextDot + 1); | |
| 89 } | |
| 90 | |
| 91 return false; | |
| 92 } | |
| 93 | |
| 94 /* | |
| 95 * Returns a list of selectors that apply on any unknown domain | |
| 
Manish Jethani
2018/05/07 16:03:02
There are generic selectors that are "conditional"
 | |
| 96 * @returns {string[]} | |
| 97 */ | |
| 98 function getConditionalGenericSelectors() | |
| 
Manish Jethani
2018/05/07 16:10:33
To give you some numbers, there are 18,300 uncondi
 | |
| 99 { | |
| 100 let selectors = []; | |
| 101 | |
| 102 let filters = filtersByDomain.get(""); | |
| 103 if (!filters) | |
| 104 return selectors; | |
| 105 | |
| 106 for (let {selector} of filters.keys()) | |
| 107 { | |
| 108 if (!genericExceptionSelectors.has(selector)) | |
| 
Manish Jethani
2018/05/07 16:03:02
Here we have to check for generic exceptions like
 | |
| 109 selectors.push(selector); | |
| 110 } | |
| 111 | |
| 112 return selectors; | |
| 113 } | |
| 114 | |
| 65 /** | 115 /** | 
| 66 * Container for element hiding filters | 116 * Container for element hiding filters | 
| 67 * @class | 117 * @class | 
| 68 */ | 118 */ | 
| 69 let ElemHide = exports.ElemHide = { | 119 let ElemHide = exports.ElemHide = { | 
| 70 /** | 120 /** | 
| 71 * Removes all known filters | 121 * Removes all known filters | 
| 72 */ | 122 */ | 
| 73 clear() | 123 clear() | 
| 74 { | 124 { | 
| 75 for (let collection of [filtersByDomain, filterBySelector, | 125 for (let collection of [filtersByDomain, filterBySelector, | 
| 76 knownFilters, exceptions]) | 126 knownFilters, exceptions, | 
| 127 genericExceptionSelectors]) | |
| 77 { | 128 { | 
| 78 collection.clear(); | 129 collection.clear(); | 
| 79 } | 130 } | 
| 80 unconditionalSelectors = null; | 131 unconditionalSelectors = null; | 
| 81 FilterNotifier.emit("elemhideupdate"); | 132 FilterNotifier.emit("elemhideupdate"); | 
| 82 }, | 133 }, | 
| 83 | 134 | 
| 84 _addToFiltersByDomain(filter) | 135 _addToFiltersByDomain(filter) | 
| 85 { | 136 { | 
| 86 let domains = filter.domains || defaultDomains; | 137 let domains = filter.domains || defaultDomains; | 
| 87 for (let [domain, isIncluded] of domains) | 138 if (filter instanceof ElemHideException) | 
| 88 { | 139 { | 
| 89 // There's no need to note that a filter is generically disabled. | 140 for (let domain of domains.keys()) | 
| 90 if (!isIncluded && domain == "") | 141 { | 
| 91 continue; | 142 // Add an entry for each domain, but without any filters. This makes | 
| 143 // the domain "known" and helps us avoid the optimized path (which | |
| 144 // would give incorrect results). | |
| 145 if (domain != "" && !filtersByDomain.has(domain)) | |
| 
Manish Jethani
2018/05/07 16:03:02
We could have had a separate knownExceptionDomains
 | |
| 146 filtersByDomain.set(domain, null); | |
| 147 } | |
| 148 } | |
| 149 else | |
| 150 { | |
| 151 for (let [domain, isIncluded] of domains) | |
| 152 { | |
| 153 // There's no need to note that a filter is generically disabled. | |
| 154 if (!isIncluded && domain == "") | |
| 155 continue; | |
| 92 | 156 | 
| 93 let filters = filtersByDomain.get(domain); | 157 let filters = filtersByDomain.get(domain); | 
| 94 if (!filters) | 158 if (!filters) | 
| 95 filtersByDomain.set(domain, filters = new Map()); | 159 filtersByDomain.set(domain, filters = new Map()); | 
| 96 filters.set(filter, isIncluded); | 160 filters.set(filter, isIncluded); | 
| 161 } | |
| 97 } | 162 } | 
| 98 }, | 163 }, | 
| 99 | 164 | 
| 100 /** | 165 /** | 
| 101 * Add a new element hiding filter | 166 * Add a new element hiding filter | 
| 102 * @param {ElemHideBase} filter | 167 * @param {ElemHideBase} filter | 
| 103 */ | 168 */ | 
| 104 add(filter) | 169 add(filter) | 
| 105 { | 170 { | 
| 106 if (knownFilters.has(filter)) | 171 if (knownFilters.has(filter)) | 
| 107 return; | 172 return; | 
| 108 | 173 | 
| 109 if (filter instanceof ElemHideException) | 174 if (filter instanceof ElemHideException) | 
| 110 { | 175 { | 
| 111 let {selector} = filter; | 176 let {selector, domains} = filter; | 
| 177 | |
| 112 let list = exceptions.get(selector); | 178 let list = exceptions.get(selector); | 
| 113 if (list) | 179 if (list) | 
| 114 list.push(filter); | 180 list.push(filter); | 
| 115 else | 181 else | 
| 116 exceptions.set(selector, [filter]); | 182 exceptions.set(selector, [filter]); | 
| 117 | 183 | 
| 184 if (domains) | |
| 
Manish Jethani
2018/05/07 16:03:02
For exceptions too we should remember the domains,
 | |
| 185 this._addToFiltersByDomain(filter); | |
| 186 | |
| 187 if (filter.isGeneric()) | |
| 188 genericExceptionSelectors.add(filter.selector); | |
| 189 | |
| 118 // If this is the first exception for a previously unconditionally | 190 // If this is the first exception for a previously unconditionally | 
| 119 // applied element hiding selector we need to take care to update the | 191 // applied element hiding selector we need to take care to update the | 
| 120 // lookups. | 192 // lookups. | 
| 121 let unconditionalFilterForSelector = filterBySelector.get(selector); | 193 let unconditionalFilterForSelector = filterBySelector.get(selector); | 
| 122 if (unconditionalFilterForSelector) | 194 if (unconditionalFilterForSelector) | 
| 123 { | 195 { | 
| 124 this._addToFiltersByDomain(unconditionalFilterForSelector); | 196 this._addToFiltersByDomain(unconditionalFilterForSelector); | 
| 125 filterBySelector.delete(selector); | 197 filterBySelector.delete(selector); | 
| 126 unconditionalSelectors = null; | 198 unconditionalSelectors = null; | 
| 127 } | 199 } | 
| (...skipping 23 matching lines...) Expand all Loading... | |
| 151 if (!knownFilters.has(filter)) | 223 if (!knownFilters.has(filter)) | 
| 152 return; | 224 return; | 
| 153 | 225 | 
| 154 // Whitelisting filters | 226 // Whitelisting filters | 
| 155 if (filter instanceof ElemHideException) | 227 if (filter instanceof ElemHideException) | 
| 156 { | 228 { | 
| 157 let list = exceptions.get(filter.selector); | 229 let list = exceptions.get(filter.selector); | 
| 158 let index = list.indexOf(filter); | 230 let index = list.indexOf(filter); | 
| 159 if (index >= 0) | 231 if (index >= 0) | 
| 160 list.splice(index, 1); | 232 list.splice(index, 1); | 
| 233 | |
| 
Manish Jethani
2018/05/07 16:03:02
Note that we don't bother "unknowing" a domain onc
 | |
| 234 if (filter.isGeneric()) | |
| 235 genericExceptionSelectors.delete(filter.selector); | |
| 161 } | 236 } | 
| 162 // Unconditially applied element hiding filters | 237 // Unconditially applied element hiding filters | 
| 163 else if (filterBySelector.get(filter.selector) == filter) | 238 else if (filterBySelector.get(filter.selector) == filter) | 
| 164 { | 239 { | 
| 165 filterBySelector.delete(filter.selector); | 240 filterBySelector.delete(filter.selector); | 
| 166 unconditionalSelectors = null; | 241 unconditionalSelectors = null; | 
| 167 } | 242 } | 
| 168 // Conditionally applied element hiding filters | 243 // Conditionally applied element hiding filters | 
| 169 else | 244 else | 
| 170 { | 245 { | 
| (...skipping 74 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 245 getSelectorsForDomain(domain, criteria) | 320 getSelectorsForDomain(domain, criteria) | 
| 246 { | 321 { | 
| 247 let selectors = []; | 322 let selectors = []; | 
| 248 | 323 | 
| 249 if (typeof criteria == "undefined") | 324 if (typeof criteria == "undefined") | 
| 250 criteria = ElemHide.ALL_MATCHING; | 325 criteria = ElemHide.ALL_MATCHING; | 
| 251 if (criteria < ElemHide.NO_UNCONDITIONAL) | 326 if (criteria < ElemHide.NO_UNCONDITIONAL) | 
| 252 selectors = this.getUnconditionalSelectors(); | 327 selectors = this.getUnconditionalSelectors(); | 
| 253 | 328 | 
| 254 let specificOnly = (criteria >= ElemHide.SPECIFIC_ONLY); | 329 let specificOnly = (criteria >= ElemHide.SPECIFIC_ONLY); | 
| 255 let excluded = new Set(); | |
| 256 let currentDomain = domain ? domain.toUpperCase() : ""; | 330 let currentDomain = domain ? domain.toUpperCase() : ""; | 
| 257 | 331 | 
| 258 // This code is a performance hot-spot, which is why we've made certain | 332 if (isDomainKnown(currentDomain)) | 
| 
Manish Jethani
2018/05/07 16:03:02
The call to isDomainKnown is the only additional c
 | |
| 259 // micro-optimisations. Please be careful before making changes. | |
| 260 while (true) | |
| 261 { | 333 { | 
| 262 if (specificOnly && currentDomain == "") | 334 let excluded = new Set(); | 
| 263 break; | |
| 264 | 335 | 
| 265 let filters = filtersByDomain.get(currentDomain); | 336 // This code is a performance hot-spot, which is why we've made certain | 
| 266 if (filters) | 337 // micro-optimisations. Please be careful before making changes. | 
| 338 while (true) | |
| 267 { | 339 { | 
| 268 for (let [filter, isIncluded] of filters) | 340 if (specificOnly && currentDomain == "") | 
| 341 break; | |
| 342 | |
| 343 let filters = filtersByDomain.get(currentDomain); | |
| 344 if (filters) | |
| 269 { | 345 { | 
| 270 if (!isIncluded) | 346 for (let [filter, isIncluded] of filters) | 
| 271 { | 347 { | 
| 272 excluded.add(filter); | 348 if (!isIncluded) | 
| 273 } | 349 { | 
| 274 else if ((excluded.size == 0 || !excluded.has(filter)) && | 350 excluded.add(filter); | 
| 275 !this.getException(filter, domain)) | 351 } | 
| 276 { | 352 else if ((excluded.size == 0 || !excluded.has(filter)) && | 
| 277 selectors.push(filter.selector); | 353 !this.getException(filter, domain)) | 
| 354 { | |
| 355 selectors.push(filter.selector); | |
| 356 } | |
| 278 } | 357 } | 
| 279 } | 358 } | 
| 359 | |
| 360 if (currentDomain == "") | |
| 361 break; | |
| 362 | |
| 363 let nextDot = currentDomain.indexOf("."); | |
| 364 currentDomain = nextDot == -1 ? "" : currentDomain.substr(nextDot + 1); | |
| 280 } | 365 } | 
| 281 | 366 } | 
| 282 if (currentDomain == "") | 367 else if (!specificOnly) | 
| 283 break; | 368 { | 
| 284 | 369 selectors = selectors.concat(getConditionalGenericSelectors()); | 
| 
Manish Jethani
2018/05/07 16:03:02
We might want to cache the value returned by getCo
 | |
| 285 let nextDot = currentDomain.indexOf("."); | |
| 286 currentDomain = nextDot == -1 ? "" : currentDomain.substr(nextDot + 1); | |
| 287 } | 370 } | 
| 288 | 371 | 
| 289 return selectors; | 372 return selectors; | 
| 290 } | 373 } | 
| 291 }; | 374 }; | 
| OLD | NEW |