 Issue 29773570:
  Issue 6652 - Implement fast selector lookups for unknown domains  (Closed) 
  Base URL: https://hg.adblockplus.org/adblockpluscore/
    
  
    Issue 29773570:
  Issue 6652 - Implement fast selector lookups for unknown domains  (Closed) 
  Base URL: https://hg.adblockplus.org/adblockpluscore/| Index: lib/elemHide.js | 
| =================================================================== | 
| --- a/lib/elemHide.js | 
| +++ b/lib/elemHide.js | 
| @@ -22,17 +22,17 @@ | 
| */ | 
| const {ElemHideException} = require("./filterClasses"); | 
| const {FilterNotifier} = require("./filterNotifier"); | 
| /** | 
| * Lookup table, active flag, by filter by domain. | 
| * (Only contains filters that aren't unconditionally matched for all domains.) | 
| - * @type {Map.<string,Map.<Filter,boolean>>} | 
| + * @type {Map.<string,?Map.<Filter,boolean>>} | 
| */ | 
| let filtersByDomain = new Map(); | 
| /** | 
| * Lookup table, filter by selector. (Only used for selectors that are | 
| * unconditionally matched for all domains.) | 
| * @type {Map.<string,Filter>} | 
| */ | 
| @@ -56,38 +56,213 @@ | 
| /** | 
| * Set containing known element hiding and exception filters | 
| * @type {Set.<ElemHideBase>} | 
| */ | 
| let knownFilters = new Set(); | 
| /** | 
| * Lookup table, lists of element hiding exceptions by selector | 
| - * @type {Map.<string,Filter>} | 
| + * @type {Map.<string,Filter[]>} | 
| 
kzar
2018/05/15 13:26:31
I guess you need to rebase again.
 
Manish Jethani
2018/05/15 16:00:08
Done.
 | 
| */ | 
| let exceptions = new Map(); | 
| /** | 
| + * Lookup table, lists of generic element hiding exceptions by selector | 
| + * @type {Map.<string,Filter[]>} | 
| + */ | 
| +let genericExceptions = new Map(); | 
| + | 
| +/** | 
| + * List of selectors that apply on any unknown domain | 
| + * @type {?string[]} | 
| + */ | 
| +let conditionalGenericSelectors = null; | 
| 
kzar
2018/05/15 13:26:31
I think this cache is probably a good idea. I figu
 
Manish Jethani
2018/05/15 16:00:07
It also applies to known domains (see my other com
 | 
| + | 
| +/** | 
| + * Domains that are known not to be specifically excluded from any generic | 
| + * filters | 
| + * @type {Set.<string>} | 
| + */ | 
| +let genericFriendlyDomains = new Set(); | 
| + | 
| +/** | 
| * Adds a filter to the lookup table of filters by domain. | 
| * @param {Filter} filter | 
| */ | 
| function addToFiltersByDomain(filter) | 
| { | 
| let domains = filter.domains || defaultDomains; | 
| - for (let [domain, isIncluded] of domains) | 
| + if (filter instanceof ElemHideException) | 
| + { | 
| + for (let domain of domains.keys()) | 
| + { | 
| + // Add an entry for each domain, but without any filters. This makes | 
| + // the domain "known" and helps us avoid certain optimizations that | 
| + // would otherwise yield incorrect results. | 
| + if (domain != "" && !filtersByDomain.has(domain)) | 
| + filtersByDomain.set(domain, null); | 
| 
kzar
2018/05/15 13:26:30
Am I missing something or do we never remove the d
 
Manish Jethani
2018/05/15 16:00:08
You're right, we never remove a domain like that f
 
kzar
2018/05/18 14:34:53
Acknowledged.
 | 
| + } | 
| + } | 
| + else | 
| { | 
| - // There's no need to note that a filter is generically disabled. | 
| - if (!isIncluded && domain == "") | 
| - continue; | 
| + for (let [domain, isIncluded] of domains) | 
| + { | 
| + // There's no need to note that a filter is generically disabled. | 
| + if (!isIncluded && domain == "") | 
| + continue; | 
| + | 
| + let filters = filtersByDomain.get(domain); | 
| + if (!filters) | 
| + filtersByDomain.set(domain, filters = new Map()); | 
| + filters.set(filter, isIncluded); | 
| + } | 
| + } | 
| +} | 
| + | 
| +/** | 
| + * Checks whether a filter applies on a domain | 
| + * @param {Filter} filter | 
| + * @param {string} [domain] | 
| + * @param {Set.<Filter>} excludeSet | 
| + * @returns {boolean} | 
| + */ | 
| +function doesFilterApply(filter, domain, excludeSet) | 
| +{ | 
| + return (excludeSet.size == 0 || !excludeSet.has(filter)) && | 
| + !ElemHide.getException(filter, domain); | 
| +} | 
| + | 
| +/** | 
| + * Returns a list of domain-specific filters matching a domain | 
| + * @param {string} [domain] | 
| + * @returns {Array.<{domain: string, filters: ?Map.<Filter,boolean>}>} | 
| + */ | 
| +function getSpecificFiltersForDomain(domain) | 
| 
Manish Jethani
2018/05/15 16:20:59
Note: getSpecificFiltersForDomain could be reused
 
kzar
2018/05/18 14:34:53
Acknowledged.
 | 
| +{ | 
| + let filtersList = []; | 
| + | 
| + if (domain) | 
| + domain = domain.toUpperCase(); | 
| + | 
| + while (domain) | 
| + { | 
| + let filters = filtersByDomain.get(domain); | 
| + if (typeof filters != "undefined") | 
| + filtersList.push({domain, filters}); | 
| + | 
| + let nextDot = domain.indexOf("."); | 
| + domain = nextDot == -1 ? null : domain.substring(nextDot + 1); | 
| + } | 
| + | 
| + return filtersList; | 
| +} | 
| + | 
| +/** | 
| + * Returns a list of selectors that apply on a domain from a given list of | 
| + * filters | 
| + * @param {string} [domain] | 
| + * @param {Array.<{domain: string, filters: ?Map.<Filter,boolean>}>} | 
| + * @param {Set.<Filter>} excludeSet | 
| + * @returns {string[]} | 
| + */ | 
| +function matchSelectors(domain, filtersList, excludeSet) | 
| 
Manish Jethani
2018/05/15 16:20:59
Note: matchSelectors could be reused by ElemHideEm
 | 
| +{ | 
| + let matches = []; | 
| - let filters = filtersByDomain.get(domain); | 
| - if (!filters) | 
| - filtersByDomain.set(domain, filters = new Map()); | 
| - filters.set(filter, isIncluded); | 
| + // This code is a performance hot-spot, which is why we've made certain | 
| + // micro-optimisations. Please be careful before making changes. | 
| + for (let i = 0; i < filtersList.length; i++) | 
| + { | 
| + let filters = filtersList[i].filters; | 
| + if (filters) | 
| + { | 
| + for (let [filter, isIncluded] of filters) | 
| + { | 
| + if (!isIncluded) | 
| + excludeSet.add(filter); | 
| + else if (doesFilterApply(filter, domain, excludeSet)) | 
| + matches.push(filter.selector); | 
| + } | 
| + } | 
| } | 
| + | 
| + return matches; | 
| +} | 
| + | 
| +/** | 
| + * Returns a list of selectors that apply on a domain | 
| + * @param {string} [domain] | 
| + * @param {boolean} specificOnly | 
| + * @returns {string[]} | 
| + */ | 
| +function getConditionalSelectorsForDomain(domain, specificOnly) | 
| +{ | 
| + let specificFilters = getSpecificFiltersForDomain(domain); | 
| + | 
| + // If there are no specific filters (nor any specific exceptions), we can | 
| + // just return the selectors from all the generic filters modulo any generic | 
| + // exceptions. | 
| + if (specificFilters.length == 0) | 
| + return specificOnly ? [] : getConditionalGenericSelectors(); | 
| 
kzar
2018/05/18 14:34:53
Nit: Seems a waste to create a new empty Array whe
 | 
| + | 
| + let excludeSet = new Set(); | 
| + let specificSelectors = matchSelectors(domain, specificFilters, excludeSet); | 
| + | 
| + if (specificOnly) | 
| + return specificSelectors; | 
| + | 
| + // We use the longest subdomain of this domain found in our data structures | 
| + // as the key to check if the domain is "generic friendly." For example, | 
| + // given foo.example.com, there may be an entry for example.com in our data | 
| + // structures (e.g. "example.com###foo"), so we use that subdomain as the | 
| + // key. This way we make only one entry and it works for all subdomains of | 
| + // example.com, except those that have specific entries | 
| + // (e.g. "~bar.example.com##.no-bar"). | 
| + let domainKey = specificFilters[0].domain; | 
| + | 
| + if (genericFriendlyDomains.has(domainKey)) | 
| + return specificSelectors.concat(getConditionalGenericSelectors()); | 
| + | 
| + let genericFilters = [{filters: filtersByDomain.get("")}]; | 
| + let genericSelectors = matchSelectors(domain, genericFilters, excludeSet); | 
| + | 
| + // If the number of conditional generic selectors that apply on this domain | 
| + // is the same as the total number of conditional generic selectors, the | 
| + // domain is "generic friendly" (i.e. all generic filters apply, except those | 
| + // with generic exceptions). In that case, we mark it is as such for faster | 
| + // lookups. | 
| + if (genericSelectors.length == (conditionalGenericSelectors || {}).length) | 
| + genericFriendlyDomains.add(domainKey); | 
| + | 
| + return specificSelectors.concat(genericSelectors); | 
| +} | 
| + | 
| +/** | 
| + * Returns a list of selectors that apply on any unknown domain | 
| + * @returns {string[]} | 
| + */ | 
| +function getConditionalGenericSelectors() | 
| +{ | 
| + if (conditionalGenericSelectors) | 
| + return conditionalGenericSelectors; | 
| + | 
| + conditionalGenericSelectors = []; | 
| + | 
| + let filters = filtersByDomain.get(""); | 
| + if (!filters) | 
| + return conditionalGenericSelectors; | 
| + | 
| + for (let {selector} of filters.keys()) | 
| + { | 
| + if (genericExceptions.size == 0 || !genericExceptions.has(selector)) | 
| + conditionalGenericSelectors.push(selector); | 
| + } | 
| + | 
| + return conditionalGenericSelectors; | 
| } | 
| /** | 
| * Returns a list of selectors that apply on each website unconditionally. | 
| * @returns {string[]} | 
| */ | 
| function getUnconditionalSelectors() | 
| { | 
| @@ -103,42 +278,60 @@ | 
| */ | 
| let ElemHide = exports.ElemHide = { | 
| /** | 
| * Removes all known filters | 
| */ | 
| clear() | 
| { | 
| for (let collection of [filtersByDomain, filterBySelector, | 
| - knownFilters, exceptions]) | 
| + knownFilters, exceptions, | 
| + genericExceptions, genericFriendlyDomains]) | 
| { | 
| collection.clear(); | 
| } | 
| unconditionalSelectors = null; | 
| + conditionalGenericSelectors = null; | 
| FilterNotifier.emit("elemhideupdate"); | 
| }, | 
| /** | 
| * Add a new element hiding filter | 
| * @param {ElemHideBase} filter | 
| */ | 
| add(filter) | 
| { | 
| if (knownFilters.has(filter)) | 
| return; | 
| + conditionalGenericSelectors = null; | 
| + genericFriendlyDomains.clear(); | 
| + | 
| if (filter instanceof ElemHideException) | 
| { | 
| - let {selector} = filter; | 
| + let {selector, domains} = filter; | 
| + | 
| let list = exceptions.get(selector); | 
| if (list) | 
| list.push(filter); | 
| else | 
| exceptions.set(selector, [filter]); | 
| + if (domains) | 
| + addToFiltersByDomain(filter); | 
| + | 
| + if (filter.isGeneric()) | 
| + { | 
| + list = genericExceptions.get(selector); | 
| + if (list) | 
| + list.push(filter); | 
| + else | 
| + genericExceptions.set(selector, [filter]); | 
| + } | 
| + | 
| // If this is the first exception for a previously unconditionally | 
| // applied element hiding selector we need to take care to update the | 
| // lookups. | 
| let unconditionalFilterForSelector = filterBySelector.get(selector); | 
| if (unconditionalFilterForSelector) | 
| { | 
| addToFiltersByDomain(unconditionalFilterForSelector); | 
| filterBySelector.delete(selector); | 
| @@ -165,23 +358,39 @@ | 
| * Removes an element hiding filter | 
| * @param {ElemHideBase} filter | 
| */ | 
| remove(filter) | 
| { | 
| if (!knownFilters.has(filter)) | 
| return; | 
| + conditionalGenericSelectors = null; | 
| + genericFriendlyDomains.clear(); | 
| + | 
| // Whitelisting filters | 
| if (filter instanceof ElemHideException) | 
| { | 
| let list = exceptions.get(filter.selector); | 
| let index = list.indexOf(filter); | 
| if (index >= 0) | 
| list.splice(index, 1); | 
| + | 
| + if (filter.isGeneric()) | 
| + { | 
| + list = genericExceptions.get(filter.selector); | 
| + index = list.indexOf(filter); | 
| + if (index >= 0) | 
| + list.splice(index, 1); | 
| + | 
| + // It's important to delete the entry here so the selector no longer | 
| + // appears to have any generic exceptions. | 
| + if (list.length == 0) | 
| + genericExceptions.delete(filter.selector); | 
| + } | 
| } | 
| // Unconditially applied element hiding filters | 
| else if (filterBySelector.get(filter.selector) == filter) | 
| { | 
| filterBySelector.delete(filter.selector); | 
| unconditionalSelectors = null; | 
| } | 
| // Conditionally applied element hiding filters | 
| @@ -253,51 +462,19 @@ | 
| * @param {number} [criteria] | 
| * One of the following: ElemHide.ALL_MATCHING, ElemHide.NO_UNCONDITIONAL or | 
| * ElemHide.SPECIFIC_ONLY. | 
| * @returns {string[]} | 
| * List of selectors. | 
| */ | 
| getSelectorsForDomain(domain, criteria = ElemHide.ALL_MATCHING) | 
| { | 
| - let selectors = []; | 
| - | 
| - let specificOnly = (criteria >= ElemHide.SPECIFIC_ONLY); | 
| - let excluded = new Set(); | 
| - let currentDomain = domain ? domain.toUpperCase() : ""; | 
| - | 
| - // This code is a performance hot-spot, which is why we've made certain | 
| - // micro-optimisations. Please be careful before making changes. | 
| - while (true) | 
| - { | 
| - if (specificOnly && currentDomain == "") | 
| 
kzar
2018/05/15 13:26:30
Why don't we instead keep track of if filtersByDom
 
Manish Jethani
2018/05/15 16:00:08
filtersByDomain.has(domain) will be true for ~11,0
 
kzar
2018/05/18 14:34:53
Alright, how about this, perhaps a dumb idea but h
 
Manish Jethani
2018/05/18 17:18:26
If I understanding the suggestion correctly, you'r
 | 
| - break; | 
| - | 
| - let filters = filtersByDomain.get(currentDomain); | 
| - if (filters) | 
| - { | 
| - for (let [filter, isIncluded] of filters) | 
| - { | 
| - if (!isIncluded) | 
| - { | 
| - excluded.add(filter); | 
| - } | 
| - else if ((excluded.size == 0 || !excluded.has(filter)) && | 
| - !this.getException(filter, domain)) | 
| - { | 
| - selectors.push(filter.selector); | 
| - } | 
| - } | 
| - } | 
| - | 
| - if (currentDomain == "") | 
| - break; | 
| - | 
| - let nextDot = currentDomain.indexOf("."); | 
| - currentDomain = nextDot == -1 ? "" : currentDomain.substr(nextDot + 1); | 
| - } | 
| + let specificOnly = criteria >= ElemHide.SPECIFIC_ONLY; | 
| + let selectors = getConditionalSelectorsForDomain(domain, specificOnly); | 
| 
kzar
2018/05/15 13:26:31
I feel like some of these changes (like this one,
 
Manish Jethani
2018/05/15 16:00:08
Yeah this patch has been updated a lot. Now the fu
 
Manish Jethani
2018/05/15 16:16:15
Actually there are three return statements (out of
 
Manish Jethani
2018/05/15 16:20:59
I meant getConditionalSelectorsForDomain of course
 | 
| if (criteria < ElemHide.NO_UNCONDITIONAL) | 
| selectors = getUnconditionalSelectors().concat(selectors); | 
| + else if (criteria == ElemHide.NO_UNCONDITIONAL) | 
| + selectors = selectors.slice(); | 
| return selectors; | 
| } | 
| }; |