Index: lib/abp2blocklist.js |
=================================================================== |
--- a/lib/abp2blocklist.js |
+++ b/lib/abp2blocklist.js |
@@ -361,16 +361,310 @@ |
newSelector.push('[id=', selector.substring(pos.start + 1, pos.end), ']'); |
i = pos.end; |
} |
newSelector.push(selector.substring(i)); |
return newSelector.join(""); |
} |
+function closeMatch(s, t, {multi = false} = {}) |
kzar
2017/05/03 11:17:24
I've not seen this syntax before `{multi = false}
Manish Jethani
2017/05/03 14:41:54
This:
function func(param1, param2, {option1 =
kzar
2017/05/03 15:19:04
Acknowledged.
|
+{ |
+ // This function returns an edit operation (one of "substitute", "delete", |
+ // and "insert") along with an index in the source string where the edit |
+ // should occur in order to arrive at the target string. |
+ |
+ let diff = s.length - t.length; |
+ |
+ // If the string lenghts differ by more than one character, we cannot arrive |
kzar
2017/05/03 11:17:24
Nit: Typo "lenghts".
Manish Jethani
2017/05/04 02:49:32
Done.
|
+ // at target from source in a single edit operation. |
+ if (!multi && (diff < -1 || diff > 1)) |
+ return null; |
+ |
+ // If target is longer than source, swap them for the purpose of our |
+ // calculation. |
+ if (diff < 0) |
+ { |
+ let tmp = s; |
+ s = t; |
+ t = tmp; |
+ } |
+ |
+ let edit = null; |
+ let multiEdit = false; |
+ |
+ let j = 0; |
+ |
+ for (let i = 0; i < s.length; i++) |
+ { |
+ if (s[i] == t[j]) |
+ { |
+ j++; |
+ |
+ if (edit && multiEdit && !edit.closeIndex) |
+ edit.closeIndex = i; |
+ } |
+ else if (edit && (!multi || diff == 0 || edit.closeIndex)) |
+ { |
+ // Since we want one and only one edit operation, we must bail here. |
+ return null; |
+ } |
+ else if ((s[i] == "." || s[i] == "+" || s[i] == "$" || s[i] == "?" || |
+ s[i] == "{" || s[i] == "}" || s[i] == "(" || s[i] == ")" || |
+ s[i] == "[" || s[i] == "]" || s[i] == "\\") || |
+ (t[j] == "." || t[j] == "+" || t[j] == "$" || t[j] == "?" || |
+ t[j] == "{" || t[j] == "}" || t[j] == "(" || t[j] == ")" || |
+ t[j] == "[" || t[j] == "]" || t[j] == "\\")) |
+ { |
+ // We don't deal with special characters for now. |
kzar
2017/05/03 11:17:24
So we skip special characters in the url-filter re
Manish Jethani
2017/05/03 14:41:54
The above is not a good example because this is no
kzar
2017/05/03 15:19:04
Maybe add a comment explaining that assumption?
Manish Jethani
2017/05/04 02:49:32
Added a comment to explain this.
|
+ return null; |
+ } |
+ else |
+ { |
+ if (diff == 0) |
kzar
2017/05/03 11:17:24
Nit: Couldn't this be an `else if` too?
Manish Jethani
2017/05/04 02:49:32
Done.
|
+ { |
+ // If both strings are equal in length, this is a substitution. |
+ edit = {type: "substitute", index: i}; |
+ j++; |
+ } |
+ else |
+ { |
+ if (edit) |
+ multiEdit = true; |
+ else if (diff > 0) |
kzar
2017/05/03 11:17:24
Nit: Please use braces since the clause spans mult
Manish Jethani
2017/05/04 02:49:32
Done.
|
+ // If the source string is longer, this is a deletion. |
+ edit = {type: "delete", index: i}; |
+ else |
+ edit = {type: "insert", index: i}; |
+ } |
+ } |
+ } |
+ |
+ if (edit && multiEdit && !edit.closeIndex) |
+ { |
+ if (j < t.length) |
+ return null; |
+ |
+ edit.closeIndex = s.length; |
+ } |
+ |
+ return edit; |
+} |
+ |
+function ruleWithoutURLFilter(rule) |
+{ |
+ let copy = { |
kzar
2017/05/03 15:19:04
How about `return Object.create(rule, {"url-filter
Manish Jethani
2017/05/04 02:49:31
That would not work for multiple reasons, but most
|
+ trigger: Object.assign({}, rule.trigger), |
+ action: Object.assign({}, rule.action) |
+ }; |
+ |
+ delete copy.trigger["url-filter"]; |
+ |
+ return copy; |
+} |
+ |
+function mergeCloselyMatchingRules(rules, {multi = false} = {}) |
+{ |
+ // Closely matching rules are likely to be within a certain range. We only |
+ // look for matches within this range. If we increase this value, it can give |
+ // us more matches and a smaller resulting rule set, but possibly at a |
+ // significant performance cost. |
+ const heuristicRange = 100; |
kzar
2017/05/03 15:19:04
Since the code either runs in a place where speed
Manish Jethani
2017/05/04 02:49:32
In the latest update the generateRules function ta
|
+ |
+ let rulesInfo = new Array(rules.length); |
+ |
+ rules.forEach((rule, index) => |
+ { |
+ rulesInfo[index] = {rule}; |
kzar
2017/05/03 11:17:24
I'm not sure syntax like this will work for Safari
Manish Jethani
2017/05/03 14:41:54
I'll check, but if it doesn't work then I'll have
kzar
2017/05/08 08:13:02
You mentioned testing the code on Safari now, but
Manish Jethani
2017/05/08 14:03:58
I've been testing with Safari 10.
Anyway, this is
kzar
2017/05/09 10:05:46
I think you should test with Safari 9 at least onc
Manish Jethani
2017/05/09 15:52:46
"{rule: rule}" ought to work in every single JS en
|
+ |
+ if (rule.action.type == "ignore-previous-rules") |
+ { |
+ rulesInfo[index].skip = true; |
+ } |
+ else |
+ { |
+ // Save a stringified version of the rule, but without the URL filter. We |
+ // use this for comparison later. |
+ rulesInfo[index].stringifiedWithoutURLFilter = |
+ JSON.stringify(ruleWithoutURLFilter(rule)); |
+ } |
+ }); |
+ |
+ for (let i = 0; i < rules.length; i++) |
+ { |
+ if (rulesInfo[i].skip) |
+ continue; |
+ |
+ for (let j = i + 1; j < i + heuristicRange && j < rules.length; j++) |
+ { |
+ if (rulesInfo[j].skip) |
+ continue; |
+ |
+ // Check if the rules are identical except for the URL filter. |
+ if (rulesInfo[i].stringifiedWithoutURLFilter == |
kzar
2017/05/03 15:19:04
I wonder if we could create a lookup table stringi
Manish Jethani
2017/05/04 02:49:32
I'm not sure what the benefit of that would be.
W
|
+ rulesInfo[j].stringifiedWithoutURLFilter) |
+ { |
+ let source = rules[i].trigger["url-filter"]; |
+ let target = rules[j].trigger["url-filter"]; |
+ |
+ let edit = closeMatch(source, target, {multi}); |
+ |
+ if (edit) |
+ { |
+ let urlFilter, ruleInfo, match = {edit}; |
+ |
+ if (edit.type == "insert") |
+ { |
+ // Convert the insertion into a deletion and stick it on the target |
+ // rule instead. We can only group deletions and substitutions; |
+ // therefore insertions must be treated as deletions on the target |
+ // rule, to be dealt with later. |
+ urlFilter = target; |
+ ruleInfo = rulesInfo[j]; |
+ match.index = i; |
+ edit.type = "delete"; |
+ } |
+ else |
+ { |
+ urlFilter = source; |
+ ruleInfo = rulesInfo[i]; |
+ match.index = j; |
+ } |
+ |
+ if (edit.closeIndex) |
+ { |
+ if (!ruleInfo.multiEditMatch) |
+ ruleInfo.multiEditMatch = match; |
+ } |
+ else |
+ { |
+ if (!ruleInfo.matches) |
+ ruleInfo.matches = new Array(urlFilter.length + 1); |
+ |
+ let matchesForIndex = ruleInfo.matches[edit.index]; |
+ |
+ if (matchesForIndex) |
+ { |
+ matchesForIndex.push(match); |
+ } |
+ else |
+ { |
+ matchesForIndex = [match]; |
+ ruleInfo.matches[edit.index] = matchesForIndex; |
+ } |
+ |
+ if (!ruleInfo.bestMatches || |
+ matchesForIndex.length > ruleInfo.bestMatches.length) |
+ ruleInfo.bestMatches = matchesForIndex; |
+ } |
+ } |
+ } |
+ } |
+ } |
+ |
+ let candidateRulesInfo = rulesInfo.filter(ruleInfo => ruleInfo.bestMatches || |
+ ruleInfo.multiEditMatch); |
kzar
2017/05/03 11:17:24
Nit: Long line.
Manish Jethani
2017/05/04 02:49:31
Done.
|
+ |
+ // For best results, we have to sort the candidates by the number of matches. |
+ // For example, we want "ads", "bds", "adv", "bdv", and "bdx" to generate |
+ // "ad[sv]" and "bd[svx]" (2 rules), not "[ab]ds", "[ab]dv", and "bdx" (3 |
+ // rules). |
+ candidateRulesInfo.sort((ruleInfo1, ruleInfo2) => |
+ { |
+ let weight1 = 1; |
+ let weight2 = 1; |
+ |
+ if (ruleInfo1.bestMatches) |
+ weight1 = ruleInfo1.bestMatches.length; |
+ |
+ if (ruleInfo2.bestMatches) |
+ weight2 = ruleInfo2.bestMatches.length; |
+ |
+ return weight2 - weight1; |
+ }); |
+ |
+ for (let ruleInfo of candidateRulesInfo) |
+ { |
+ let rule = ruleInfo.rule; |
+ |
+ if (rule._merged) |
+ continue; |
+ |
+ // Find the best set of rules to group, which is simply the largest set. |
+ let best = (ruleInfo.matches || []).reduce((best, matchesForIndex) => |
+ { |
+ matchesForIndex = (matchesForIndex || []).filter(match => |
+ { |
+ // Filter out rules that have either already been merged into other |
+ // rules or have had other rules merged into them. |
+ return !rules[match.index]._merged && |
+ !rulesInfo[match.index].mergedInto; |
+ }); |
+ |
+ return matchesForIndex.length > best.length ? matchesForIndex : best; |
+ }, |
+ []); |
+ |
+ if (best.length == 0 && ruleInfo.multiEditMatch && |
+ !rules[ruleInfo.multiEditMatch.index]._merged && |
+ !rulesInfo[ruleInfo.multiEditMatch.index].mergedInto) |
+ best = [ruleInfo.multiEditMatch]; |
+ |
+ if (best.length > 0) |
+ { |
+ let urlFilter = rule.trigger["url-filter"]; |
+ |
+ let editIndex = best[0].edit.index; |
+ |
+ if (best[0] != ruleInfo.multiEditMatch) |
+ { |
+ // Merge all the matching rules into this one. |
+ |
+ let characters = []; |
+ let quantifier = ""; |
+ |
+ for (let match of best) |
+ { |
+ if (match.edit.type == "delete") |
+ quantifier = "?"; |
+ else |
+ characters.push(rules[match.index].trigger["url-filter"][editIndex]); |
+ |
+ rules[match.index]._merged = true; |
+ } |
+ |
+ urlFilter = urlFilter.substring(0, editIndex + 1) + quantifier + |
+ urlFilter.substring(editIndex + 1); |
+ if (characters.length > 0) |
+ { |
+ urlFilter = urlFilter.substring(0, editIndex) + "[" + |
+ urlFilter[editIndex] + characters.join("") + "]" + |
+ urlFilter.substring(editIndex + 1); |
+ } |
+ } |
+ else |
+ { |
+ let editCloseIndex = best[0].edit.closeIndex; |
+ |
+ rules[best[0].index]._merged = true; |
+ |
+ urlFilter = urlFilter.substring(0, editIndex) + "(" + |
+ urlFilter.substring(editIndex, editCloseIndex) + ")?" + |
+ urlFilter.substring(editCloseIndex); |
+ } |
+ |
+ rule.trigger["url-filter"] = urlFilter; |
+ |
+ ruleInfo.mergedInto = true; |
+ } |
+ } |
+ |
+ return rules.filter(rule => !rule._merged); |
+} |
+ |
let ContentBlockerList = |
/** |
* Create a new Adblock Plus filter to content blocker list converter |
* |
* @constructor |
*/ |
exports.ContentBlockerList = function () |
{ |
@@ -419,17 +713,18 @@ |
} |
}; |
/** |
* Generate content blocker list for all filters that were added |
* |
* @returns {Filter} filter Filter to convert |
*/ |
-ContentBlockerList.prototype.generateRules = function(filter) |
+ContentBlockerList.prototype.generateRules = function( |
+ {merge = false, multiMerge = false} = {}) |
{ |
let rules = []; |
let groupedElemhideFilters = new Map(); |
for (let filter of this.elemhideFilters) |
{ |
let result = convertElemHideFilter(filter, this.elemhideSelectorExceptions); |
if (!result) |
@@ -467,10 +762,15 @@ |
for (let filter of this.elemhideExceptions) |
convertFilterAddRules(rules, filter, "ignore-previous-rules", false); |
for (let filter of this.requestFilters) |
convertFilterAddRules(rules, filter, "block", true); |
for (let filter of this.requestExceptions) |
convertFilterAddRules(rules, filter, "ignore-previous-rules", true); |
- return rules.filter(rule => !hasNonASCI(rule)); |
+ rules = rules.filter(rule => !hasNonASCI(rule)); |
+ |
+ if (merge) |
+ rules = mergeCloselyMatchingRules(rules, {multi: multiMerge}); |
+ |
+ return rules; |
}; |