| OLD | NEW | 
|    1 /* |    1 /* | 
|    2  * This file is part of Adblock Plus <https://adblockplus.org/>, |    2  * This file is part of Adblock Plus <https://adblockplus.org/>, | 
|    3  * Copyright (C) 2006-2017 eyeo GmbH |    3  * Copyright (C) 2006-2017 eyeo GmbH | 
|    4  * |    4  * | 
|    5  * Adblock Plus is free software: you can redistribute it and/or modify |    5  * Adblock Plus is free software: you can redistribute it and/or modify | 
|    6  * it under the terms of the GNU General Public License version 3 as |    6  * it under the terms of the GNU General Public License version 3 as | 
|    7  * published by the Free Software Foundation. |    7  * published by the Free Software Foundation. | 
|    8  * |    8  * | 
|    9  * Adblock Plus is distributed in the hope that it will be useful, |    9  * Adblock Plus is distributed in the hope that it will be useful, | 
|   10  * but WITHOUT ANY WARRANTY; without even the implied warranty of |   10  * but WITHOUT ANY WARRANTY; without even the implied warranty of | 
| (...skipping 348 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
|  359   { |  359   { | 
|  360     newSelector.push(selector.substring(i, pos.start)); |  360     newSelector.push(selector.substring(i, pos.start)); | 
|  361     newSelector.push('[id=', selector.substring(pos.start + 1, pos.end), ']'); |  361     newSelector.push('[id=', selector.substring(pos.start + 1, pos.end), ']'); | 
|  362     i = pos.end; |  362     i = pos.end; | 
|  363   } |  363   } | 
|  364   newSelector.push(selector.substring(i)); |  364   newSelector.push(selector.substring(i)); | 
|  365  |  365  | 
|  366   return newSelector.join(""); |  366   return newSelector.join(""); | 
|  367 } |  367 } | 
|  368  |  368  | 
 |  369 function closeMatch(s, t) | 
 |  370 { | 
 |  371   // This function returns an edit operation, one of "substitute", "delete", | 
 |  372   // and "insert", along with an index in the source string where the edit must | 
 |  373   // occur in order to arrive at the target string. If the strings are not a | 
 |  374   // close match, it returns null. | 
 |  375   // | 
 |  376   // Two strings are considered to be a close match if they are one edit | 
 |  377   // operation apart. | 
 |  378   // | 
 |  379   // Deletions or insertions of a contiguous range of characters from one | 
 |  380   // string into the other, at the same index, are treated as a single edit. | 
 |  381   // For example, "internal" and "international" are considered to be one edit | 
 |  382   // apart and therefore a close match. | 
 |  383  | 
 |  384   // A few things to note: | 
 |  385   // | 
 |  386   //   1) This function does not care about the format of the input strings. | 
 |  387   //   For example, the caller may pass in regular expressions, where "[ab]" | 
 |  388   //   and "[bc]" could be considered to be a close match, since the order | 
 |  389   //   within the brackets doesn't matter. This function will still return null | 
 |  390   //   for this set of inputs since they are two edits apart. | 
 |  391   // | 
 |  392   //   2) To be friendly to calling code that might be passing in regular | 
 |  393   //   expressions, this function will simply return null if it encounters a | 
 |  394   //   special character (e.g. "\", "?", "+", etc.) in the delta. For example, | 
 |  395   //   given "Hello" and "Hello, how are you?", it will return null. | 
 |  396   // | 
 |  397   //   3) If the caller does indeed pass in regular expressions, it must make | 
 |  398   //   the important assumption that the parts where two such regular | 
 |  399   //   expressions may differ can always be treated as normal strings. For | 
 |  400   //   example, "^https?://.*/ads" and "^https?://.*/adv" differ only in the | 
 |  401   //   last character, therefore the regular expressions can safely be merged | 
 |  402   //   into "^https?://.*/ad[sv]". | 
 |  403  | 
 |  404   let diff = s.length - t.length; | 
 |  405  | 
 |  406   // If target is longer than source, swap them for the purpose of our | 
 |  407   // calculation. | 
 |  408   if (diff < 0) | 
 |  409   { | 
 |  410     let tmp = s; | 
 |  411     s = t; | 
 |  412     t = tmp; | 
 |  413   } | 
 |  414  | 
 |  415   let edit = null; | 
 |  416  | 
 |  417   let i = 0, j = 0; | 
 |  418  | 
 |  419   for (; i < s.length; i++) | 
 |  420   { | 
 |  421     if (s[i] != t[i]) | 
 |  422       break; | 
 |  423   } | 
 |  424  | 
 |  425   for (; j < t.length; j++) | 
 |  426   { | 
 |  427     if (t.length - j == i || s[s.length - j - 1] != t[t.length - j - 1]) | 
 |  428       break; | 
 |  429   } | 
 |  430  | 
 |  431   if (diff == 0) | 
 |  432   { | 
 |  433     if (t.length - j - i != 1) | 
 |  434       return null; | 
 |  435   } | 
 |  436   else if (i != t.length - j) | 
 |  437   { | 
 |  438     return null; | 
 |  439   } | 
 |  440  | 
 |  441   for (let k = i; k < s.length - j; k++) | 
 |  442   { | 
 |  443     // If there are any special characters in the delta, bail. | 
 |  444     if (s[k] == "." || s[k] == "+" || s[k] == "$" || s[k] == "?" || | 
 |  445         s[k] == "{" || s[k] == "}" || s[k] == "(" || s[k] == ")" || | 
 |  446         s[k] == "[" || s[k] == "]" || s[k] == "\\") | 
 |  447       return null; | 
 |  448   } | 
 |  449  | 
 |  450   if (diff == 0) | 
 |  451   { | 
 |  452     edit = {type: "substitute", index: i}; | 
 |  453   } | 
 |  454   else if (diff > 0) | 
 |  455   { | 
 |  456     edit = {type: "delete", index: i}; | 
 |  457  | 
 |  458     if (diff > 1) | 
 |  459       edit.endIndex = s.length - j; | 
 |  460   } | 
 |  461   else | 
 |  462   { | 
 |  463     edit = {type: "insert", index: i}; | 
 |  464  | 
 |  465     if (diff < -1) | 
 |  466       edit.endIndex = s.length - j; | 
 |  467   } | 
 |  468  | 
 |  469   return edit; | 
 |  470 } | 
 |  471  | 
 |  472 function mergeRulesByURLFilter(rulesInfo, exhaustive) | 
 |  473 { | 
 |  474   // Closely matching rules are likely to be within a certain range. We only | 
 |  475   // look for matches within this range. If we increase this value, it can give | 
 |  476   // us more matches and a smaller resulting rule set, but possibly at a | 
 |  477   // significant performance cost. | 
 |  478   const heuristicRange = 100; | 
 |  479  | 
 |  480   for (let i = 0; i < rulesInfo.length; i++) | 
 |  481   { | 
 |  482     let limit = exhaustive ? rulesInfo.length : | 
 |  483                 Math.min(i + heuristicRange, rulesInfo.length); | 
 |  484  | 
 |  485     for (let j = i + 1; j < limit; j++) | 
 |  486     { | 
 |  487       let source = rulesInfo[i].rule.trigger["url-filter"]; | 
 |  488       let target = rulesInfo[j].rule.trigger["url-filter"]; | 
 |  489  | 
 |  490       let edit = closeMatch(source, target); | 
 |  491  | 
 |  492       if (edit) | 
 |  493       { | 
 |  494         let urlFilter, ruleInfo, match = {edit}; | 
 |  495  | 
 |  496         if (edit.type == "insert") | 
 |  497         { | 
 |  498           // Convert the insertion into a deletion and stick it on the target | 
 |  499           // rule instead. We can only group deletions and substitutions; | 
 |  500           // therefore insertions must be treated as deletions on the target | 
 |  501           // rule. | 
 |  502           urlFilter = target; | 
 |  503           ruleInfo = rulesInfo[j]; | 
 |  504           match.index = i; | 
 |  505           edit.type = "delete"; | 
 |  506         } | 
 |  507         else | 
 |  508         { | 
 |  509           urlFilter = source; | 
 |  510           ruleInfo = rulesInfo[i]; | 
 |  511           match.index = j; | 
 |  512         } | 
 |  513  | 
 |  514         // If the edit has an end index, it represents a multiple character | 
 |  515         // edit. | 
 |  516         let multiEdit = !!edit.endIndex; | 
 |  517  | 
 |  518         if (multiEdit) | 
 |  519         { | 
 |  520           // We only care about a single multiple character edit because the | 
 |  521           // number of characters for such a match doesn't matter, we can | 
 |  522           // only merge with one other rule. | 
 |  523           if (!ruleInfo.multiEditMatch) | 
 |  524             ruleInfo.multiEditMatch = match; | 
 |  525         } | 
 |  526         else | 
 |  527         { | 
 |  528           // For single character edits, multiple rules can be merged into | 
 |  529           // one. e.g. "ad", "ads", and "adv" can be merged into "ad[sv]?". | 
 |  530           if (!ruleInfo.matches) | 
 |  531             ruleInfo.matches = new Array(urlFilter.length + 1); | 
 |  532  | 
 |  533           // Matches at a particular index. For example, for a source string | 
 |  534           // "ads", both target strings "ad" (deletion) and "adv" | 
 |  535           // (substitution) match at index 2, hence they are grouped together | 
 |  536           // to possibly be merged later into "ad[sv]?". | 
 |  537           let matchesForIndex = ruleInfo.matches[edit.index]; | 
 |  538  | 
 |  539           if (matchesForIndex) | 
 |  540           { | 
 |  541             matchesForIndex.push(match); | 
 |  542           } | 
 |  543           else | 
 |  544           { | 
 |  545             matchesForIndex = [match]; | 
 |  546             ruleInfo.matches[edit.index] = matchesForIndex; | 
 |  547           } | 
 |  548  | 
 |  549           // Keep track of the best set of matches. We later sort by this to | 
 |  550           // get best results. | 
 |  551           if (!ruleInfo.bestMatches || | 
 |  552               matchesForIndex.length > ruleInfo.bestMatches.length) | 
 |  553             ruleInfo.bestMatches = matchesForIndex; | 
 |  554         } | 
 |  555       } | 
 |  556     } | 
 |  557   } | 
 |  558  | 
 |  559   // Filter out rules that have no matches at all. | 
 |  560   let candidateRulesInfo = rulesInfo.filter(ruleInfo => | 
 |  561   { | 
 |  562     return ruleInfo.bestMatches || ruleInfo.multiEditMatch | 
 |  563   }); | 
 |  564  | 
 |  565   // For best results, we have to sort the candidates by the largest set of | 
 |  566   // matches. | 
 |  567   // | 
 |  568   // For example, we want "ads", "bds", "adv", "bdv", "adx", and "bdx" to | 
 |  569   // generate "ad[svx]" and "bd[svx]" (2 rules), not "[ab]ds", "[ab]dv", and | 
 |  570   // "[ab]dx" (3 rules). | 
 |  571   candidateRulesInfo.sort((ruleInfo1, ruleInfo2) => | 
 |  572   { | 
 |  573     let weight1 = ruleInfo1.bestMatches ? ruleInfo1.bestMatches.length : | 
 |  574                   ruleInfo1.multiEditMatch ? 1 : 0; | 
 |  575     let weight2 = ruleInfo2.bestMatches ? ruleInfo2.bestMatches.length : | 
 |  576                   ruleInfo2.multiEditMatch ? 1 : 0; | 
 |  577  | 
 |  578     return weight2 - weight1; | 
 |  579   }); | 
 |  580  | 
 |  581   for (let ruleInfo of candidateRulesInfo) | 
 |  582   { | 
 |  583     let rule = ruleInfo.rule; | 
 |  584  | 
 |  585     // If this rule has already been merged into another rule, we skip it. | 
 |  586     if (ruleInfo.merged) | 
 |  587       continue; | 
 |  588  | 
 |  589     // Find the best set of rules to group, which is simply the largest set. | 
 |  590     let best = (ruleInfo.matches || []).reduce((best, matchesForIndex) => | 
 |  591     { | 
 |  592       matchesForIndex = (matchesForIndex || []).filter(match => | 
 |  593       { | 
 |  594         // Filter out rules that have either already been merged into other | 
 |  595         // rules or have had other rules merged into them. | 
 |  596         return !rulesInfo[match.index].merged && | 
 |  597                !rulesInfo[match.index].mergedInto; | 
 |  598       }); | 
 |  599  | 
 |  600       return matchesForIndex.length > best.length ? matchesForIndex : best; | 
 |  601     }, | 
 |  602     []); | 
 |  603  | 
 |  604     let multiEdit = false; | 
 |  605  | 
 |  606     // If we couldn't find a single rule to merge with, let's see if we have a | 
 |  607     // multiple character edit. e.g. we could merge "ad" and "adserver" into | 
 |  608     // "ad(server)?". | 
 |  609     if (best.length == 0 && ruleInfo.multiEditMatch && | 
 |  610         !rulesInfo[ruleInfo.multiEditMatch.index].merged && | 
 |  611         !rulesInfo[ruleInfo.multiEditMatch.index].mergedInto) | 
 |  612     { | 
 |  613       best = [ruleInfo.multiEditMatch]; | 
 |  614       multiEdit = true; | 
 |  615     } | 
 |  616  | 
 |  617     if (best.length > 0) | 
 |  618     { | 
 |  619       let urlFilter = rule.trigger["url-filter"]; | 
 |  620  | 
 |  621       let editIndex = best[0].edit.index; | 
 |  622  | 
 |  623       if (!multiEdit) | 
 |  624       { | 
 |  625         // Merge all the matching rules into this one. | 
 |  626  | 
 |  627         let characters = []; | 
 |  628         let quantifier = ""; | 
 |  629  | 
 |  630         for (let match of best) | 
 |  631         { | 
 |  632           if (match.edit.type == "delete") | 
 |  633           { | 
 |  634             quantifier = "?"; | 
 |  635           } | 
 |  636           else | 
 |  637           { | 
 |  638             let character = rulesInfo[match.index].rule | 
 |  639                             .trigger["url-filter"][editIndex]; | 
 |  640             characters.push(character); | 
 |  641           } | 
 |  642  | 
 |  643           // Mark the target rule as merged so other rules don't try to merge | 
 |  644           // it again. | 
 |  645           rulesInfo[match.index].merged = true; | 
 |  646         } | 
 |  647  | 
 |  648         urlFilter = urlFilter.substring(0, editIndex + 1) + quantifier + | 
 |  649                     urlFilter.substring(editIndex + 1); | 
 |  650         if (characters.length > 0) | 
 |  651         { | 
 |  652           urlFilter = urlFilter.substring(0, editIndex) + "[" + | 
 |  653                       urlFilter[editIndex] + characters.join("") + "]" + | 
 |  654                       urlFilter.substring(editIndex + 1); | 
 |  655         } | 
 |  656       } | 
 |  657       else | 
 |  658       { | 
 |  659         let editEndIndex = best[0].edit.endIndex; | 
 |  660  | 
 |  661         // Mark the target rule as merged so other rules don't try to merge it | 
 |  662         // again. | 
 |  663         rulesInfo[best[0].index].merged = true; | 
 |  664  | 
 |  665         urlFilter = urlFilter.substring(0, editIndex) + "(" + | 
 |  666                     urlFilter.substring(editIndex, editEndIndex) + ")?" + | 
 |  667                     urlFilter.substring(editEndIndex); | 
 |  668       } | 
 |  669  | 
 |  670       rule.trigger["url-filter"] = urlFilter; | 
 |  671  | 
 |  672       // Mark this rule as one that has had other rules merged into it. | 
 |  673       ruleInfo.mergedInto = true; | 
 |  674     } | 
 |  675   } | 
 |  676 } | 
 |  677  | 
 |  678 function mergeRulesByArrayProperty(rulesInfo, propertyType, property) | 
 |  679 { | 
 |  680   let set = new Set(); | 
 |  681  | 
 |  682   rulesInfo.forEach((ruleInfo, index) => | 
 |  683   { | 
 |  684     if (ruleInfo.rule[propertyType][property]) | 
 |  685     { | 
 |  686       for (let value of ruleInfo.rule[propertyType][property]) | 
 |  687         set.add(value); | 
 |  688     } | 
 |  689  | 
 |  690     if (index > 0) | 
 |  691       ruleInfo.merged = true; | 
 |  692   }); | 
 |  693  | 
 |  694   if (set.size > 0) | 
 |  695     rulesInfo[0].rule[propertyType][property] = Array.from(set).sort(); | 
 |  696  | 
 |  697   rulesInfo[0].mergedInto = true; | 
 |  698 } | 
 |  699  | 
 |  700 function groupRulesByMergeableProperty(rulesInfo, propertyType, property) | 
 |  701 { | 
 |  702   let mergeableRulesInfoByGroup = new Map(); | 
 |  703  | 
 |  704   rulesInfo.forEach(ruleInfo => | 
 |  705   { | 
 |  706     let copy = { | 
 |  707       trigger: Object.assign({}, ruleInfo.rule.trigger), | 
 |  708       action: Object.assign({}, ruleInfo.rule.action) | 
 |  709     }; | 
 |  710  | 
 |  711     delete copy[propertyType][property]; | 
 |  712  | 
 |  713     let groupKey = JSON.stringify(copy); | 
 |  714  | 
 |  715     let mergeableRulesInfo = mergeableRulesInfoByGroup.get(groupKey); | 
 |  716  | 
 |  717     if (mergeableRulesInfo) | 
 |  718       mergeableRulesInfo.push(ruleInfo); | 
 |  719     else | 
 |  720       mergeableRulesInfoByGroup.set(groupKey, [ruleInfo]); | 
 |  721   }); | 
 |  722  | 
 |  723   return mergeableRulesInfoByGroup; | 
 |  724 } | 
 |  725  | 
 |  726 function mergeRules(rules, options) | 
 |  727 { | 
 |  728   const defaultOptions = {exhaustive: false}; | 
 |  729  | 
 |  730   options = Object.assign({}, defaultOptions, options); | 
 |  731  | 
 |  732   let rulesInfo = rules.map(rule => ({rule})); | 
 |  733  | 
 |  734   groupRulesByMergeableProperty(rulesInfo, "trigger", "url-filter") | 
 |  735   .forEach(mergeableRulesInfo => | 
 |  736   { | 
 |  737     if (mergeableRulesInfo.length > 1) | 
 |  738       mergeRulesByURLFilter(mergeableRulesInfo, options.exhaustive); | 
 |  739   }); | 
 |  740  | 
 |  741   // Filter out rules that have been merged into other rules. | 
 |  742   rulesInfo = rulesInfo.filter(ruleInfo => !ruleInfo.merged); | 
 |  743  | 
 |  744   for (let arrayProperty of["resource-type", "if-domain"])  | 
 |  745   { | 
 |  746     groupRulesByMergeableProperty(rulesInfo, "trigger", arrayProperty) | 
 |  747     .forEach(mergeableRulesInfo => | 
 |  748     { | 
 |  749       if (mergeableRulesInfo.length > 1) | 
 |  750         mergeRulesByArrayProperty(mergeableRulesInfo, "trigger", arrayProperty); | 
 |  751     }); | 
 |  752  | 
 |  753     rulesInfo = rulesInfo.filter(ruleInfo => !ruleInfo.merged); | 
 |  754   } | 
 |  755  | 
 |  756   return rulesInfo.map(ruleInfo => ruleInfo.rule); | 
 |  757 } | 
 |  758  | 
|  369 let ContentBlockerList = |  759 let ContentBlockerList = | 
|  370 /** |  760 /** | 
|  371  * Create a new Adblock Plus filter to content blocker list converter |  761  * Create a new Adblock Plus filter to content blocker list converter | 
|  372  * |  762  * | 
|  373  * @constructor |  763  * @constructor | 
|  374  */ |  764  */ | 
|  375 exports.ContentBlockerList = function () |  765 exports.ContentBlockerList = function () | 
|  376 { |  766 { | 
|  377   this.requestFilters = []; |  767   this.requestFilters = []; | 
|  378   this.requestExceptions = []; |  768   this.requestExceptions = []; | 
| (...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
|  417  |  807  | 
|  418     parseDomains(filter.domains, domains, []); |  808     parseDomains(filter.domains, domains, []); | 
|  419   } |  809   } | 
|  420 }; |  810 }; | 
|  421  |  811  | 
|  422 /** |  812 /** | 
|  423  * Generate content blocker list for all filters that were added |  813  * Generate content blocker list for all filters that were added | 
|  424  * |  814  * | 
|  425  * @returns   {Filter}   filter    Filter to convert |  815  * @returns   {Filter}   filter    Filter to convert | 
|  426  */ |  816  */ | 
|  427 ContentBlockerList.prototype.generateRules = function(filter) |  817 ContentBlockerList.prototype.generateRules = function(options) | 
|  428 { |  818 { | 
 |  819   const defaultOptions = { | 
 |  820     merge: false, | 
 |  821     exhaustiveMerge: false | 
 |  822   }; | 
 |  823  | 
 |  824   options = Object.assign({}, defaultOptions, options); | 
 |  825  | 
|  429   let rules = []; |  826   let rules = []; | 
|  430  |  827  | 
|  431   let groupedElemhideFilters = new Map(); |  828   let groupedElemhideFilters = new Map(); | 
|  432   for (let filter of this.elemhideFilters) |  829   for (let filter of this.elemhideFilters) | 
|  433   { |  830   { | 
|  434     let result = convertElemHideFilter(filter, this.elemhideSelectorExceptions); |  831     let result = convertElemHideFilter(filter, this.elemhideSelectorExceptions); | 
|  435     if (!result) |  832     if (!result) | 
|  436       continue; |  833       continue; | 
|  437  |  834  | 
|  438     if (result.matchDomains.length == 0) |  835     if (result.matchDomains.length == 0) | 
| (...skipping 26 matching lines...) Expand all  Loading... | 
|  465     } |  862     } | 
|  466   }); |  863   }); | 
|  467  |  864  | 
|  468   for (let filter of this.elemhideExceptions) |  865   for (let filter of this.elemhideExceptions) | 
|  469     convertFilterAddRules(rules, filter, "ignore-previous-rules", false); |  866     convertFilterAddRules(rules, filter, "ignore-previous-rules", false); | 
|  470   for (let filter of this.requestFilters) |  867   for (let filter of this.requestFilters) | 
|  471     convertFilterAddRules(rules, filter, "block", true); |  868     convertFilterAddRules(rules, filter, "block", true); | 
|  472   for (let filter of this.requestExceptions) |  869   for (let filter of this.requestExceptions) | 
|  473     convertFilterAddRules(rules, filter, "ignore-previous-rules", true); |  870     convertFilterAddRules(rules, filter, "ignore-previous-rules", true); | 
|  474  |  871  | 
|  475   return rules.filter(rule => !hasNonASCI(rule)); |  872   rules = rules.filter(rule => !hasNonASCI(rule)); | 
 |  873  | 
 |  874   if (options.merge) | 
 |  875   { | 
 |  876     let mergeOptions = { | 
 |  877       exhaustive: options.exhaustiveMerge | 
 |  878     }; | 
 |  879  | 
 |  880     rules = mergeRules(rules, mergeOptions); | 
 |  881   } | 
 |  882  | 
 |  883   return rules; | 
|  476 }; |  884 }; | 
| OLD | NEW |