| Index: lib/abp2blocklist.js | 
| =================================================================== | 
| --- a/lib/abp2blocklist.js | 
| +++ b/lib/abp2blocklist.js | 
| @@ -361,16 +361,406 @@ | 
| newSelector.push('[id=', selector.substring(pos.start + 1, pos.end), ']'); | 
| i = pos.end; | 
| } | 
| newSelector.push(selector.substring(i)); | 
| return newSelector.join(""); | 
| } | 
| +function closeMatch(s, t) | 
| +{ | 
| + // This function returns an edit operation, one of "substitute", "delete", | 
| + // and "insert", along with an index in the source string where the edit must | 
| + // occur in order to arrive at the target string. If the strings are not a | 
| + // close match, it returns null. | 
| + // | 
| + // Two strings are considered to be a close match if they are one edit | 
| + // operation apart. | 
| + // | 
| + // Deletions or insertions of a contiguous range of characters from one | 
| + // string into the other, at the same index, are treated as a single edit. | 
| + // For example, "internal" and "international" are considered to be one edit | 
| + // apart and therefore a close match. | 
| + | 
| + // A few things to note: | 
| + // | 
| + // 1) This function does not care about the format of the input strings. | 
| + // For example, the caller may pass in regular expressions, where "[ab]" | 
| + // and "[bc]" could be considered to be a close match, since the order | 
| + // within the brackets doesn't matter. This function will still return null | 
| + // for this set of inputs since they are two edits apart. | 
| + // | 
| + // 2) To be friendly to calling code that might be passing in regular | 
| + // expressions, this function will simply return null if it encounters a | 
| + // special character (e.g. "\", "?", "+", etc.) in the delta. For example, | 
| + // given "Hello" and "Hello, how are you?", it will return null. | 
| + // | 
| + // 3) If the caller does indeed pass in regular expressions, it must make | 
| + // the important assumption that the parts where two such regular | 
| + // expressions may differ can always be treated as normal strings. For | 
| + // example, "^https?://.*/ads" and "^https?://.*/adv" differ only in the | 
| + // last character, therefore the regular expressions can safely be merged | 
| + // into "^https?://.*/ad[sv]". | 
| + | 
| + let diff = s.length - t.length; | 
| + | 
| + // If target is longer than source, swap them for the purpose of our | 
| + // calculation. | 
| + if (diff < 0) | 
| + { | 
| + let tmp = s; | 
| + s = t; | 
| + t = tmp; | 
| + } | 
| + | 
| + let edit = null; | 
| + | 
| + let i = 0, j = 0; | 
| + | 
| + for (; i < s.length; i++) | 
| + { | 
| + if (s[i] != t[i]) | 
| + break; | 
| + } | 
| + | 
| + for (; j < t.length; j++) | 
| + { | 
| + if (t.length - j == i || s[s.length - j - 1] != t[t.length - j - 1]) | 
| + break; | 
| + } | 
| + | 
| + if (diff == 0) | 
| + { | 
| + if (t.length - j - i != 1) | 
| + return null; | 
| + } | 
| + else if (i != t.length - j) | 
| + { | 
| + return null; | 
| + } | 
| + | 
| + for (let k = i; k < s.length - j; k++) | 
| + { | 
| + // If there are any special characters in the delta, bail. | 
| + if (s[k] == "." || s[k] == "+" || s[k] == "$" || s[k] == "?" || | 
| + s[k] == "{" || s[k] == "}" || s[k] == "(" || s[k] == ")" || | 
| + s[k] == "[" || s[k] == "]" || s[k] == "\\") | 
| + return null; | 
| + } | 
| + | 
| + if (diff == 0) | 
| + { | 
| + edit = {type: "substitute", index: i}; | 
| + } | 
| + else if (diff > 0) | 
| + { | 
| + edit = {type: "delete", index: i}; | 
| + | 
| + if (diff > 1) | 
| + edit.endIndex = s.length - j; | 
| + } | 
| + else | 
| + { | 
| + edit = {type: "insert", index: i}; | 
| + | 
| + if (diff < -1) | 
| + edit.endIndex = s.length - j; | 
| + } | 
| + | 
| + return edit; | 
| +} | 
| + | 
| +function mergeRulesByURLFilter(rulesInfo, exhaustive) | 
| +{ | 
| + // Closely matching rules are likely to be within a certain range. We only | 
| + // look for matches within this range. If we increase this value, it can give | 
| + // us more matches and a smaller resulting rule set, but possibly at a | 
| + // significant performance cost. | 
| + const heuristicRange = 10; | 
| + | 
| + for (let i = 0; i < rulesInfo.length; i++) | 
| + { | 
| + let limit = exhaustive ? rulesInfo.length : | 
| + Math.min(i + heuristicRange, rulesInfo.length); | 
| + | 
| + for (let j = i + 1; j < limit; j++) | 
| + { | 
| + let source = rulesInfo[i].rule.trigger["url-filter"]; | 
| + let target = rulesInfo[j].rule.trigger["url-filter"]; | 
| + | 
| + let edit = closeMatch(source, target); | 
| + | 
| + if (edit) | 
| + { | 
| + let urlFilter, ruleInfo, match = {edit}; | 
| + | 
| + if (edit.type == "insert") | 
| + { | 
| + // Convert the insertion into a deletion and stick it on the target | 
| + // rule instead. We can only group deletions and substitutions; | 
| + // therefore insertions must be treated as deletions on the target | 
| + // rule. | 
| + urlFilter = target; | 
| + ruleInfo = rulesInfo[j]; | 
| + match.index = i; | 
| + edit.type = "delete"; | 
| + } | 
| + else | 
| + { | 
| + urlFilter = source; | 
| + ruleInfo = rulesInfo[i]; | 
| + match.index = j; | 
| + } | 
| + | 
| + // If the edit has an end index, it represents a multiple character | 
| + // edit. | 
| + let multiEdit = !!edit.endIndex; | 
| + | 
| + if (multiEdit) | 
| + { | 
| + // We only care about a single multiple character edit because the | 
| + // number of characters for such a match doesn't matter, we can | 
| + // only merge with one other rule. | 
| + if (!ruleInfo.multiEditMatch) | 
| + ruleInfo.multiEditMatch = match; | 
| + } | 
| + else | 
| + { | 
| + // For single character edits, multiple rules can be merged into | 
| + // one. e.g. "ad", "ads", and "adv" can be merged into "ad[sv]?". | 
| + if (!ruleInfo.matches) | 
| + ruleInfo.matches = new Array(urlFilter.length); | 
| + | 
| + // Matches at a particular index. For example, for a source string | 
| + // "ads", both target strings "ad" (deletion) and "adv" | 
| + // (substitution) match at index 2, hence they are grouped together | 
| + // to possibly be merged later into "ad[sv]?". | 
| + let matchesForIndex = ruleInfo.matches[edit.index]; | 
| + | 
| + if (matchesForIndex) | 
| + { | 
| + matchesForIndex.push(match); | 
| + } | 
| + else | 
| + { | 
| + matchesForIndex = [match]; | 
| + ruleInfo.matches[edit.index] = matchesForIndex; | 
| + } | 
| + | 
| + // Keep track of the best set of matches. We later sort by this to | 
| + // get best results. | 
| + if (!ruleInfo.bestMatches || | 
| + matchesForIndex.length > ruleInfo.bestMatches.length) | 
| + ruleInfo.bestMatches = matchesForIndex; | 
| + } | 
| + } | 
| + } | 
| + } | 
| + | 
| + // Filter out rules that have no matches at all. | 
| + let candidateRulesInfo = rulesInfo.filter(ruleInfo => | 
| + { | 
| + return ruleInfo.bestMatches || ruleInfo.multiEditMatch | 
| + }); | 
| + | 
| + // For best results, we have to sort the candidates by the largest set of | 
| + // matches. | 
| + // | 
| + // For example, we want "ads", "bds", "adv", "bdv", "adx", and "bdx" to | 
| + // generate "ad[svx]" and "bd[svx]" (2 rules), not "[ab]ds", "[ab]dv", and | 
| + // "[ab]dx" (3 rules). | 
| + candidateRulesInfo.sort((ruleInfo1, ruleInfo2) => | 
| + { | 
| + let weight1 = ruleInfo1.bestMatches ? ruleInfo1.bestMatches.length : | 
| + ruleInfo1.multiEditMatch ? 1 : 0; | 
| + let weight2 = ruleInfo2.bestMatches ? ruleInfo2.bestMatches.length : | 
| + ruleInfo2.multiEditMatch ? 1 : 0; | 
| + | 
| + return weight2 - weight1; | 
| + }); | 
| + | 
| + for (let ruleInfo of candidateRulesInfo) | 
| + { | 
| + let rule = ruleInfo.rule; | 
| + | 
| + // If this rule has already been merged into another rule, we skip it. | 
| + if (ruleInfo.merged) | 
| + continue; | 
| + | 
| + // Find the best set of rules to group, which is simply the largest set. | 
| + let best = (ruleInfo.matches || []).reduce((best, matchesForIndex) => | 
| + { | 
| + matchesForIndex = (matchesForIndex || []).filter(match => | 
| + { | 
| + // Filter out rules that have either already been merged into other | 
| + // rules or have had other rules merged into them. | 
| + return !rulesInfo[match.index].merged && | 
| + !rulesInfo[match.index].mergedInto; | 
| + }); | 
| + | 
| + return matchesForIndex.length > best.length ? matchesForIndex : best; | 
| + }, | 
| + []); | 
| + | 
| + let multiEdit = false; | 
| + | 
| + // If we couldn't find a single rule to merge with, let's see if we have a | 
| + // multiple character edit. e.g. we could merge "ad" and "adserver" into | 
| + // "ad(server)?". | 
| + if (best.length == 0 && ruleInfo.multiEditMatch && | 
| + !rulesInfo[ruleInfo.multiEditMatch.index].merged && | 
| + !rulesInfo[ruleInfo.multiEditMatch.index].mergedInto) | 
| + { | 
| + best = [ruleInfo.multiEditMatch]; | 
| + multiEdit = true; | 
| + } | 
| + | 
| + if (best.length > 0) | 
| + { | 
| + let urlFilter = rule.trigger["url-filter"]; | 
| + | 
| + let editIndex = best[0].edit.index; | 
| + | 
| + if (!multiEdit) | 
| + { | 
| + // Merge all the matching rules into this one. | 
| + | 
| + let characters = []; | 
| + let quantifier = ""; | 
| + | 
| + for (let match of best) | 
| + { | 
| + if (match.edit.type == "delete") | 
| + { | 
| + quantifier = "?"; | 
| + } | 
| + else | 
| + { | 
| + let character = rulesInfo[match.index].rule | 
| + .trigger["url-filter"][editIndex]; | 
| + characters.push(character); | 
| + } | 
| + | 
| + // Mark the target rule as merged so other rules don't try to merge | 
| + // it again. | 
| + rulesInfo[match.index].merged = true; | 
| + } | 
| + | 
| + urlFilter = urlFilter.substring(0, editIndex + 1) + quantifier + | 
| + urlFilter.substring(editIndex + 1); | 
| + if (characters.length > 0) | 
| + { | 
| + urlFilter = urlFilter.substring(0, editIndex) + "[" + | 
| + urlFilter[editIndex] + characters.join("") + "]" + | 
| + urlFilter.substring(editIndex + 1); | 
| + } | 
| + } | 
| + else | 
| + { | 
| + let editEndIndex = best[0].edit.endIndex; | 
| + | 
| + // Mark the target rule as merged so other rules don't try to merge it | 
| + // again. | 
| + rulesInfo[best[0].index].merged = true; | 
| + | 
| + urlFilter = urlFilter.substring(0, editIndex) + "(" + | 
| + urlFilter.substring(editIndex, editEndIndex) + ")?" + | 
| + urlFilter.substring(editEndIndex); | 
| + } | 
| + | 
| + rule.trigger["url-filter"] = urlFilter; | 
| + | 
| + // Mark this rule as one that has had other rules merged into it. | 
| + ruleInfo.mergedInto = true; | 
| + } | 
| + } | 
| +} | 
| + | 
| +function mergeRulesByArrayProperty(rulesInfo, propertyType, property) | 
| +{ | 
| + let set = new Set(); | 
| + | 
| + rulesInfo.forEach((ruleInfo, index) => | 
| + { | 
| + if (ruleInfo.rule[propertyType][property]) | 
| + { | 
| + for (let value of ruleInfo.rule[propertyType][property]) | 
| + set.add(value); | 
| + } | 
| + | 
| + if (index > 0) | 
| + ruleInfo.merged = true; | 
| + }); | 
| + | 
| + if (set.size > 0) | 
| + rulesInfo[0].rule[propertyType][property] = Array.from(set); | 
| + | 
| + rulesInfo[0].mergedInto = true; | 
| +} | 
| + | 
| +function groupRulesByMergeableProperty(rulesInfo, propertyType, property) | 
| +{ | 
| + let mergeableRulesInfoByGroup = new Map(); | 
| + | 
| + rulesInfo.forEach(ruleInfo => | 
| + { | 
| + let copy = { | 
| + trigger: Object.assign({}, ruleInfo.rule.trigger), | 
| + action: Object.assign({}, ruleInfo.rule.action) | 
| + }; | 
| + | 
| + delete copy[propertyType][property]; | 
| + | 
| + let groupKey = JSON.stringify(copy); | 
| + | 
| + let mergeableRulesInfo = mergeableRulesInfoByGroup.get(groupKey); | 
| + | 
| + if (mergeableRulesInfo) | 
| + mergeableRulesInfo.push(ruleInfo); | 
| + else | 
| + mergeableRulesInfoByGroup.set(groupKey, [ruleInfo]); | 
| + }); | 
| + | 
| + return mergeableRulesInfoByGroup; | 
| +} | 
| + | 
| +function mergeRules(rules, options) | 
| +{ | 
| + const defaultOptions = {exhaustive: false}; | 
| + | 
| + options = Object.assign({}, defaultOptions, options); | 
| + | 
| + let rulesInfo = rules.map(rule => ({rule})); | 
| + | 
| + groupRulesByMergeableProperty(rulesInfo, "trigger", "url-filter") | 
| + .forEach(mergeableRulesInfo => | 
| + { | 
| + if (mergeableRulesInfo.length > 1) | 
| + mergeRulesByURLFilter(mergeableRulesInfo, options.exhaustive); | 
| + }); | 
| + | 
| + // Filter out rules that have been merged into other rules. | 
| + rulesInfo = rulesInfo.filter(ruleInfo => !ruleInfo.merged); | 
| + | 
| + for (let arrayProperty of ["resource-type", "if-domain"]) | 
| + { | 
| + groupRulesByMergeableProperty(rulesInfo, "trigger", arrayProperty) | 
| + .forEach(mergeableRulesInfo => | 
| + { | 
| + if (mergeableRulesInfo.length > 1) | 
| + mergeRulesByArrayProperty(mergeableRulesInfo, "trigger", arrayProperty); | 
| + }); | 
| + | 
| + rulesInfo = rulesInfo.filter(ruleInfo => !ruleInfo.merged); | 
| + } | 
| + | 
| + return rulesInfo.map(ruleInfo => ruleInfo.rule); | 
| +} | 
| + | 
| let ContentBlockerList = | 
| /** | 
| * Create a new Adblock Plus filter to content blocker list converter | 
| * | 
| * @constructor | 
| */ | 
| exports.ContentBlockerList = function () | 
| { | 
| @@ -419,18 +809,25 @@ | 
| } | 
| }; | 
| /** | 
| * Generate content blocker list for all filters that were added | 
| * | 
| * @returns {Filter} filter Filter to convert | 
| */ | 
| -ContentBlockerList.prototype.generateRules = function(filter) | 
| +ContentBlockerList.prototype.generateRules = function(options) | 
| { | 
| + const defaultOptions = { | 
| + merge: false, | 
| + exhaustiveMerge: false | 
| + }; | 
| + | 
| + options = Object.assign({}, defaultOptions, options); | 
| + | 
| let rules = []; | 
| let groupedElemhideFilters = new Map(); | 
| for (let filter of this.elemhideFilters) | 
| { | 
| let result = convertElemHideFilter(filter, this.elemhideSelectorExceptions); | 
| if (!result) | 
| continue; | 
| @@ -467,10 +864,21 @@ | 
| for (let filter of this.elemhideExceptions) | 
| convertFilterAddRules(rules, filter, "ignore-previous-rules", false); | 
| for (let filter of this.requestFilters) | 
| convertFilterAddRules(rules, filter, "block", true); | 
| for (let filter of this.requestExceptions) | 
| convertFilterAddRules(rules, filter, "ignore-previous-rules", true); | 
| - return rules.filter(rule => !hasNonASCI(rule)); | 
| + rules = rules.filter(rule => !hasNonASCI(rule)); | 
| + | 
| + if (options.merge) | 
| + { | 
| + let mergeOptions = { | 
| + exhaustive: options.exhaustiveMerge | 
| + }; | 
| + | 
| + rules = mergeRules(rules, mergeOptions); | 
| + } | 
| + | 
| + return rules; | 
| }; |