| Index: lib/abp2blocklist.js | 
| =================================================================== | 
| --- a/lib/abp2blocklist.js | 
| +++ b/lib/abp2blocklist.js | 
| @@ -361,16 +361,310 @@ | 
| newSelector.push('[id=', selector.substring(pos.start + 1, pos.end), ']'); | 
| i = pos.end; | 
| } | 
| newSelector.push(selector.substring(i)); | 
| return newSelector.join(""); | 
| } | 
| +function closeMatch(s, t, {multi = false} = {}) | 
| 
kzar
2017/05/03 11:17:24
I've not seen this syntax before `{multi = false}
 
Manish Jethani
2017/05/03 14:41:54
This:
  function func(param1, param2, {option1 =
 
kzar
2017/05/03 15:19:04
Acknowledged.
 | 
| +{ | 
| + // This function returns an edit operation (one of "substitute", "delete", | 
| + // and "insert") along with an index in the source string where the edit | 
| + // should occur in order to arrive at the target string. | 
| + | 
| + let diff = s.length - t.length; | 
| + | 
| + // If the string lenghts differ by more than one character, we cannot arrive | 
| 
kzar
2017/05/03 11:17:24
Nit: Typo "lenghts".
 
Manish Jethani
2017/05/04 02:49:32
Done.
 | 
| + // at target from source in a single edit operation. | 
| + if (!multi && (diff < -1 || diff > 1)) | 
| + return null; | 
| + | 
| + // If target is longer than source, swap them for the purpose of our | 
| + // calculation. | 
| + if (diff < 0) | 
| + { | 
| + let tmp = s; | 
| + s = t; | 
| + t = tmp; | 
| + } | 
| + | 
| + let edit = null; | 
| + let multiEdit = false; | 
| + | 
| + let j = 0; | 
| + | 
| + for (let i = 0; i < s.length; i++) | 
| + { | 
| + if (s[i] == t[j]) | 
| + { | 
| + j++; | 
| + | 
| + if (edit && multiEdit && !edit.closeIndex) | 
| + edit.closeIndex = i; | 
| + } | 
| + else if (edit && (!multi || diff == 0 || edit.closeIndex)) | 
| + { | 
| + // Since we want one and only one edit operation, we must bail here. | 
| + return null; | 
| + } | 
| + else if ((s[i] == "." || s[i] == "+" || s[i] == "$" || s[i] == "?" || | 
| + s[i] == "{" || s[i] == "}" || s[i] == "(" || s[i] == ")" || | 
| + s[i] == "[" || s[i] == "]" || s[i] == "\\") || | 
| + (t[j] == "." || t[j] == "+" || t[j] == "$" || t[j] == "?" || | 
| + t[j] == "{" || t[j] == "}" || t[j] == "(" || t[j] == ")" || | 
| + t[j] == "[" || t[j] == "]" || t[j] == "\\")) | 
| + { | 
| + // We don't deal with special characters for now. | 
| 
kzar
2017/05/03 11:17:24
So we skip special characters in the url-filter re
 
Manish Jethani
2017/05/03 14:41:54
The above is not a good example because this is no
 
kzar
2017/05/03 15:19:04
Maybe add a comment explaining that assumption?
 
Manish Jethani
2017/05/04 02:49:32
Added a comment to explain this.
 | 
| + return null; | 
| + } | 
| + else | 
| + { | 
| + if (diff == 0) | 
| 
kzar
2017/05/03 11:17:24
Nit: Couldn't this be an `else if` too?
 
Manish Jethani
2017/05/04 02:49:32
Done.
 | 
| + { | 
| + // If both strings are equal in length, this is a substitution. | 
| + edit = {type: "substitute", index: i}; | 
| + j++; | 
| + } | 
| + else | 
| + { | 
| + if (edit) | 
| + multiEdit = true; | 
| + else if (diff > 0) | 
| 
kzar
2017/05/03 11:17:24
Nit: Please use braces since the clause spans mult
 
Manish Jethani
2017/05/04 02:49:32
Done.
 | 
| + // If the source string is longer, this is a deletion. | 
| + edit = {type: "delete", index: i}; | 
| + else | 
| + edit = {type: "insert", index: i}; | 
| + } | 
| + } | 
| + } | 
| + | 
| + if (edit && multiEdit && !edit.closeIndex) | 
| + { | 
| + if (j < t.length) | 
| + return null; | 
| + | 
| + edit.closeIndex = s.length; | 
| + } | 
| + | 
| + return edit; | 
| +} | 
| + | 
| +function ruleWithoutURLFilter(rule) | 
| +{ | 
| + let copy = { | 
| 
kzar
2017/05/03 15:19:04
How about `return Object.create(rule, {"url-filter
 
Manish Jethani
2017/05/04 02:49:31
That would not work for multiple reasons, but most
 | 
| + trigger: Object.assign({}, rule.trigger), | 
| + action: Object.assign({}, rule.action) | 
| + }; | 
| + | 
| + delete copy.trigger["url-filter"]; | 
| + | 
| + return copy; | 
| +} | 
| + | 
| +function mergeCloselyMatchingRules(rules, {multi = false} = {}) | 
| +{ | 
| + // Closely matching rules are likely to be within a certain range. We only | 
| + // look for matches within this range. If we increase this value, it can give | 
| + // us more matches and a smaller resulting rule set, but possibly at a | 
| + // significant performance cost. | 
| + const heuristicRange = 100; | 
| 
kzar
2017/05/03 15:19:04
Since the code either runs in a place where speed
 
Manish Jethani
2017/05/04 02:49:32
In the latest update the generateRules function ta
 | 
| + | 
| + let rulesInfo = new Array(rules.length); | 
| + | 
| + rules.forEach((rule, index) => | 
| + { | 
| + rulesInfo[index] = {rule}; | 
| 
kzar
2017/05/03 11:17:24
I'm not sure syntax like this will work for Safari
 
Manish Jethani
2017/05/03 14:41:54
I'll check, but if it doesn't work then I'll have
 
kzar
2017/05/08 08:13:02
You mentioned testing the code on Safari now, but
 
Manish Jethani
2017/05/08 14:03:58
I've been testing with Safari 10.
Anyway, this is
 
kzar
2017/05/09 10:05:46
I think you should test with Safari 9 at least onc
 
Manish Jethani
2017/05/09 15:52:46
"{rule: rule}" ought to work in every single JS en
 | 
| + | 
| + if (rule.action.type == "ignore-previous-rules") | 
| + { | 
| + rulesInfo[index].skip = true; | 
| + } | 
| + else | 
| + { | 
| + // Save a stringified version of the rule, but without the URL filter. We | 
| + // use this for comparison later. | 
| + rulesInfo[index].stringifiedWithoutURLFilter = | 
| + JSON.stringify(ruleWithoutURLFilter(rule)); | 
| + } | 
| + }); | 
| + | 
| + for (let i = 0; i < rules.length; i++) | 
| + { | 
| + if (rulesInfo[i].skip) | 
| + continue; | 
| + | 
| + for (let j = i + 1; j < i + heuristicRange && j < rules.length; j++) | 
| + { | 
| + if (rulesInfo[j].skip) | 
| + continue; | 
| + | 
| + // Check if the rules are identical except for the URL filter. | 
| + if (rulesInfo[i].stringifiedWithoutURLFilter == | 
| 
kzar
2017/05/03 15:19:04
I wonder if we could create a lookup table stringi
 
Manish Jethani
2017/05/04 02:49:32
I'm not sure what the benefit of that would be.
W
 | 
| + rulesInfo[j].stringifiedWithoutURLFilter) | 
| + { | 
| + let source = rules[i].trigger["url-filter"]; | 
| + let target = rules[j].trigger["url-filter"]; | 
| + | 
| + let edit = closeMatch(source, target, {multi}); | 
| + | 
| + if (edit) | 
| + { | 
| + let urlFilter, ruleInfo, match = {edit}; | 
| + | 
| + if (edit.type == "insert") | 
| + { | 
| + // Convert the insertion into a deletion and stick it on the target | 
| + // rule instead. We can only group deletions and substitutions; | 
| + // therefore insertions must be treated as deletions on the target | 
| + // rule, to be dealt with later. | 
| + urlFilter = target; | 
| + ruleInfo = rulesInfo[j]; | 
| + match.index = i; | 
| + edit.type = "delete"; | 
| + } | 
| + else | 
| + { | 
| + urlFilter = source; | 
| + ruleInfo = rulesInfo[i]; | 
| + match.index = j; | 
| + } | 
| + | 
| + if (edit.closeIndex) | 
| + { | 
| + if (!ruleInfo.multiEditMatch) | 
| + ruleInfo.multiEditMatch = match; | 
| + } | 
| + else | 
| + { | 
| + if (!ruleInfo.matches) | 
| + ruleInfo.matches = new Array(urlFilter.length + 1); | 
| + | 
| + let matchesForIndex = ruleInfo.matches[edit.index]; | 
| + | 
| + if (matchesForIndex) | 
| + { | 
| + matchesForIndex.push(match); | 
| + } | 
| + else | 
| + { | 
| + matchesForIndex = [match]; | 
| + ruleInfo.matches[edit.index] = matchesForIndex; | 
| + } | 
| + | 
| + if (!ruleInfo.bestMatches || | 
| + matchesForIndex.length > ruleInfo.bestMatches.length) | 
| + ruleInfo.bestMatches = matchesForIndex; | 
| + } | 
| + } | 
| + } | 
| + } | 
| + } | 
| + | 
| + let candidateRulesInfo = rulesInfo.filter(ruleInfo => ruleInfo.bestMatches || | 
| + ruleInfo.multiEditMatch); | 
| 
kzar
2017/05/03 11:17:24
Nit: Long line.
 
Manish Jethani
2017/05/04 02:49:31
Done.
 | 
| + | 
| + // For best results, we have to sort the candidates by the number of matches. | 
| + // For example, we want "ads", "bds", "adv", "bdv", and "bdx" to generate | 
| + // "ad[sv]" and "bd[svx]" (2 rules), not "[ab]ds", "[ab]dv", and "bdx" (3 | 
| + // rules). | 
| + candidateRulesInfo.sort((ruleInfo1, ruleInfo2) => | 
| + { | 
| + let weight1 = 1; | 
| + let weight2 = 1; | 
| + | 
| + if (ruleInfo1.bestMatches) | 
| + weight1 = ruleInfo1.bestMatches.length; | 
| + | 
| + if (ruleInfo2.bestMatches) | 
| + weight2 = ruleInfo2.bestMatches.length; | 
| + | 
| + return weight2 - weight1; | 
| + }); | 
| + | 
| + for (let ruleInfo of candidateRulesInfo) | 
| + { | 
| + let rule = ruleInfo.rule; | 
| + | 
| + if (rule._merged) | 
| + continue; | 
| + | 
| + // Find the best set of rules to group, which is simply the largest set. | 
| + let best = (ruleInfo.matches || []).reduce((best, matchesForIndex) => | 
| + { | 
| + matchesForIndex = (matchesForIndex || []).filter(match => | 
| + { | 
| + // Filter out rules that have either already been merged into other | 
| + // rules or have had other rules merged into them. | 
| + return !rules[match.index]._merged && | 
| + !rulesInfo[match.index].mergedInto; | 
| + }); | 
| + | 
| + return matchesForIndex.length > best.length ? matchesForIndex : best; | 
| + }, | 
| + []); | 
| + | 
| + if (best.length == 0 && ruleInfo.multiEditMatch && | 
| + !rules[ruleInfo.multiEditMatch.index]._merged && | 
| + !rulesInfo[ruleInfo.multiEditMatch.index].mergedInto) | 
| + best = [ruleInfo.multiEditMatch]; | 
| + | 
| + if (best.length > 0) | 
| + { | 
| + let urlFilter = rule.trigger["url-filter"]; | 
| + | 
| + let editIndex = best[0].edit.index; | 
| + | 
| + if (best[0] != ruleInfo.multiEditMatch) | 
| + { | 
| + // Merge all the matching rules into this one. | 
| + | 
| + let characters = []; | 
| + let quantifier = ""; | 
| + | 
| + for (let match of best) | 
| + { | 
| + if (match.edit.type == "delete") | 
| + quantifier = "?"; | 
| + else | 
| + characters.push(rules[match.index].trigger["url-filter"][editIndex]); | 
| + | 
| + rules[match.index]._merged = true; | 
| + } | 
| + | 
| + urlFilter = urlFilter.substring(0, editIndex + 1) + quantifier + | 
| + urlFilter.substring(editIndex + 1); | 
| + if (characters.length > 0) | 
| + { | 
| + urlFilter = urlFilter.substring(0, editIndex) + "[" + | 
| + urlFilter[editIndex] + characters.join("") + "]" + | 
| + urlFilter.substring(editIndex + 1); | 
| + } | 
| + } | 
| + else | 
| + { | 
| + let editCloseIndex = best[0].edit.closeIndex; | 
| + | 
| + rules[best[0].index]._merged = true; | 
| + | 
| + urlFilter = urlFilter.substring(0, editIndex) + "(" + | 
| + urlFilter.substring(editIndex, editCloseIndex) + ")?" + | 
| + urlFilter.substring(editCloseIndex); | 
| + } | 
| + | 
| + rule.trigger["url-filter"] = urlFilter; | 
| + | 
| + ruleInfo.mergedInto = true; | 
| + } | 
| + } | 
| + | 
| + return rules.filter(rule => !rule._merged); | 
| +} | 
| + | 
| let ContentBlockerList = | 
| /** | 
| * Create a new Adblock Plus filter to content blocker list converter | 
| * | 
| * @constructor | 
| */ | 
| exports.ContentBlockerList = function () | 
| { | 
| @@ -419,17 +713,18 @@ | 
| } | 
| }; | 
| /** | 
| * Generate content blocker list for all filters that were added | 
| * | 
| * @returns {Filter} filter Filter to convert | 
| */ | 
| -ContentBlockerList.prototype.generateRules = function(filter) | 
| +ContentBlockerList.prototype.generateRules = function( | 
| + {merge = false, multiMerge = false} = {}) | 
| { | 
| let rules = []; | 
| let groupedElemhideFilters = new Map(); | 
| for (let filter of this.elemhideFilters) | 
| { | 
| let result = convertElemHideFilter(filter, this.elemhideSelectorExceptions); | 
| if (!result) | 
| @@ -467,10 +762,15 @@ | 
| for (let filter of this.elemhideExceptions) | 
| convertFilterAddRules(rules, filter, "ignore-previous-rules", false); | 
| for (let filter of this.requestFilters) | 
| convertFilterAddRules(rules, filter, "block", true); | 
| for (let filter of this.requestExceptions) | 
| convertFilterAddRules(rules, filter, "ignore-previous-rules", true); | 
| - return rules.filter(rule => !hasNonASCI(rule)); | 
| + rules = rules.filter(rule => !hasNonASCI(rule)); | 
| + | 
| + if (merge) | 
| + rules = mergeCloselyMatchingRules(rules, {multi: multiMerge}); | 
| + | 
| + return rules; | 
| }; |