Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Unified Diff: lib/abp2blocklist.js

Issue 29426594: Issue 3673 - Merge closely matching rules (Closed) Base URL: https://hg.adblockplus.org/abp2blocklist
Patch Set: Add advanced merge support Created May 3, 2017, 4:44 a.m.
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « abp2blocklist.js ('k') | test/abp2blocklist.js » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: lib/abp2blocklist.js
===================================================================
--- a/lib/abp2blocklist.js
+++ b/lib/abp2blocklist.js
@@ -361,16 +361,310 @@
newSelector.push('[id=', selector.substring(pos.start + 1, pos.end), ']');
i = pos.end;
}
newSelector.push(selector.substring(i));
return newSelector.join("");
}
+function closeMatch(s, t, {multi = false} = {})
kzar 2017/05/03 11:17:24 I've not seen this syntax before `{multi = false}
Manish Jethani 2017/05/03 14:41:54 This: function func(param1, param2, {option1 =
kzar 2017/05/03 15:19:04 Acknowledged.
+{
+ // This function returns an edit operation (one of "substitute", "delete",
+ // and "insert") along with an index in the source string where the edit
+ // should occur in order to arrive at the target string.
+
+ let diff = s.length - t.length;
+
+ // If the string lenghts differ by more than one character, we cannot arrive
kzar 2017/05/03 11:17:24 Nit: Typo "lenghts".
Manish Jethani 2017/05/04 02:49:32 Done.
+ // at target from source in a single edit operation.
+ if (!multi && (diff < -1 || diff > 1))
+ return null;
+
+ // If target is longer than source, swap them for the purpose of our
+ // calculation.
+ if (diff < 0)
+ {
+ let tmp = s;
+ s = t;
+ t = tmp;
+ }
+
+ let edit = null;
+ let multiEdit = false;
+
+ let j = 0;
+
+ for (let i = 0; i < s.length; i++)
+ {
+ if (s[i] == t[j])
+ {
+ j++;
+
+ if (edit && multiEdit && !edit.closeIndex)
+ edit.closeIndex = i;
+ }
+ else if (edit && (!multi || diff == 0 || edit.closeIndex))
+ {
+ // Since we want one and only one edit operation, we must bail here.
+ return null;
+ }
+ else if ((s[i] == "." || s[i] == "+" || s[i] == "$" || s[i] == "?" ||
+ s[i] == "{" || s[i] == "}" || s[i] == "(" || s[i] == ")" ||
+ s[i] == "[" || s[i] == "]" || s[i] == "\\") ||
+ (t[j] == "." || t[j] == "+" || t[j] == "$" || t[j] == "?" ||
+ t[j] == "{" || t[j] == "}" || t[j] == "(" || t[j] == ")" ||
+ t[j] == "[" || t[j] == "]" || t[j] == "\\"))
+ {
+ // We don't deal with special characters for now.
kzar 2017/05/03 11:17:24 So we skip special characters in the url-filter re
Manish Jethani 2017/05/03 14:41:54 The above is not a good example because this is no
kzar 2017/05/03 15:19:04 Maybe add a comment explaining that assumption?
Manish Jethani 2017/05/04 02:49:32 Added a comment to explain this.
+ return null;
+ }
+ else
+ {
+ if (diff == 0)
kzar 2017/05/03 11:17:24 Nit: Couldn't this be an `else if` too?
Manish Jethani 2017/05/04 02:49:32 Done.
+ {
+ // If both strings are equal in length, this is a substitution.
+ edit = {type: "substitute", index: i};
+ j++;
+ }
+ else
+ {
+ if (edit)
+ multiEdit = true;
+ else if (diff > 0)
kzar 2017/05/03 11:17:24 Nit: Please use braces since the clause spans mult
Manish Jethani 2017/05/04 02:49:32 Done.
+ // If the source string is longer, this is a deletion.
+ edit = {type: "delete", index: i};
+ else
+ edit = {type: "insert", index: i};
+ }
+ }
+ }
+
+ if (edit && multiEdit && !edit.closeIndex)
+ {
+ if (j < t.length)
+ return null;
+
+ edit.closeIndex = s.length;
+ }
+
+ return edit;
+}
+
+function ruleWithoutURLFilter(rule)
+{
+ let copy = {
kzar 2017/05/03 15:19:04 How about `return Object.create(rule, {"url-filter
Manish Jethani 2017/05/04 02:49:31 That would not work for multiple reasons, but most
+ trigger: Object.assign({}, rule.trigger),
+ action: Object.assign({}, rule.action)
+ };
+
+ delete copy.trigger["url-filter"];
+
+ return copy;
+}
+
+function mergeCloselyMatchingRules(rules, {multi = false} = {})
+{
+ // Closely matching rules are likely to be within a certain range. We only
+ // look for matches within this range. If we increase this value, it can give
+ // us more matches and a smaller resulting rule set, but possibly at a
+ // significant performance cost.
+ const heuristicRange = 100;
kzar 2017/05/03 15:19:04 Since the code either runs in a place where speed
Manish Jethani 2017/05/04 02:49:32 In the latest update the generateRules function ta
+
+ let rulesInfo = new Array(rules.length);
+
+ rules.forEach((rule, index) =>
+ {
+ rulesInfo[index] = {rule};
kzar 2017/05/03 11:17:24 I'm not sure syntax like this will work for Safari
Manish Jethani 2017/05/03 14:41:54 I'll check, but if it doesn't work then I'll have
kzar 2017/05/08 08:13:02 You mentioned testing the code on Safari now, but
Manish Jethani 2017/05/08 14:03:58 I've been testing with Safari 10. Anyway, this is
kzar 2017/05/09 10:05:46 I think you should test with Safari 9 at least onc
Manish Jethani 2017/05/09 15:52:46 "{rule: rule}" ought to work in every single JS en
+
+ if (rule.action.type == "ignore-previous-rules")
+ {
+ rulesInfo[index].skip = true;
+ }
+ else
+ {
+ // Save a stringified version of the rule, but without the URL filter. We
+ // use this for comparison later.
+ rulesInfo[index].stringifiedWithoutURLFilter =
+ JSON.stringify(ruleWithoutURLFilter(rule));
+ }
+ });
+
+ for (let i = 0; i < rules.length; i++)
+ {
+ if (rulesInfo[i].skip)
+ continue;
+
+ for (let j = i + 1; j < i + heuristicRange && j < rules.length; j++)
+ {
+ if (rulesInfo[j].skip)
+ continue;
+
+ // Check if the rules are identical except for the URL filter.
+ if (rulesInfo[i].stringifiedWithoutURLFilter ==
kzar 2017/05/03 15:19:04 I wonder if we could create a lookup table stringi
Manish Jethani 2017/05/04 02:49:32 I'm not sure what the benefit of that would be. W
+ rulesInfo[j].stringifiedWithoutURLFilter)
+ {
+ let source = rules[i].trigger["url-filter"];
+ let target = rules[j].trigger["url-filter"];
+
+ let edit = closeMatch(source, target, {multi});
+
+ if (edit)
+ {
+ let urlFilter, ruleInfo, match = {edit};
+
+ if (edit.type == "insert")
+ {
+ // Convert the insertion into a deletion and stick it on the target
+ // rule instead. We can only group deletions and substitutions;
+ // therefore insertions must be treated as deletions on the target
+ // rule, to be dealt with later.
+ urlFilter = target;
+ ruleInfo = rulesInfo[j];
+ match.index = i;
+ edit.type = "delete";
+ }
+ else
+ {
+ urlFilter = source;
+ ruleInfo = rulesInfo[i];
+ match.index = j;
+ }
+
+ if (edit.closeIndex)
+ {
+ if (!ruleInfo.multiEditMatch)
+ ruleInfo.multiEditMatch = match;
+ }
+ else
+ {
+ if (!ruleInfo.matches)
+ ruleInfo.matches = new Array(urlFilter.length + 1);
+
+ let matchesForIndex = ruleInfo.matches[edit.index];
+
+ if (matchesForIndex)
+ {
+ matchesForIndex.push(match);
+ }
+ else
+ {
+ matchesForIndex = [match];
+ ruleInfo.matches[edit.index] = matchesForIndex;
+ }
+
+ if (!ruleInfo.bestMatches ||
+ matchesForIndex.length > ruleInfo.bestMatches.length)
+ ruleInfo.bestMatches = matchesForIndex;
+ }
+ }
+ }
+ }
+ }
+
+ let candidateRulesInfo = rulesInfo.filter(ruleInfo => ruleInfo.bestMatches ||
+ ruleInfo.multiEditMatch);
kzar 2017/05/03 11:17:24 Nit: Long line.
Manish Jethani 2017/05/04 02:49:31 Done.
+
+ // For best results, we have to sort the candidates by the number of matches.
+ // For example, we want "ads", "bds", "adv", "bdv", and "bdx" to generate
+ // "ad[sv]" and "bd[svx]" (2 rules), not "[ab]ds", "[ab]dv", and "bdx" (3
+ // rules).
+ candidateRulesInfo.sort((ruleInfo1, ruleInfo2) =>
+ {
+ let weight1 = 1;
+ let weight2 = 1;
+
+ if (ruleInfo1.bestMatches)
+ weight1 = ruleInfo1.bestMatches.length;
+
+ if (ruleInfo2.bestMatches)
+ weight2 = ruleInfo2.bestMatches.length;
+
+ return weight2 - weight1;
+ });
+
+ for (let ruleInfo of candidateRulesInfo)
+ {
+ let rule = ruleInfo.rule;
+
+ if (rule._merged)
+ continue;
+
+ // Find the best set of rules to group, which is simply the largest set.
+ let best = (ruleInfo.matches || []).reduce((best, matchesForIndex) =>
+ {
+ matchesForIndex = (matchesForIndex || []).filter(match =>
+ {
+ // Filter out rules that have either already been merged into other
+ // rules or have had other rules merged into them.
+ return !rules[match.index]._merged &&
+ !rulesInfo[match.index].mergedInto;
+ });
+
+ return matchesForIndex.length > best.length ? matchesForIndex : best;
+ },
+ []);
+
+ if (best.length == 0 && ruleInfo.multiEditMatch &&
+ !rules[ruleInfo.multiEditMatch.index]._merged &&
+ !rulesInfo[ruleInfo.multiEditMatch.index].mergedInto)
+ best = [ruleInfo.multiEditMatch];
+
+ if (best.length > 0)
+ {
+ let urlFilter = rule.trigger["url-filter"];
+
+ let editIndex = best[0].edit.index;
+
+ if (best[0] != ruleInfo.multiEditMatch)
+ {
+ // Merge all the matching rules into this one.
+
+ let characters = [];
+ let quantifier = "";
+
+ for (let match of best)
+ {
+ if (match.edit.type == "delete")
+ quantifier = "?";
+ else
+ characters.push(rules[match.index].trigger["url-filter"][editIndex]);
+
+ rules[match.index]._merged = true;
+ }
+
+ urlFilter = urlFilter.substring(0, editIndex + 1) + quantifier +
+ urlFilter.substring(editIndex + 1);
+ if (characters.length > 0)
+ {
+ urlFilter = urlFilter.substring(0, editIndex) + "[" +
+ urlFilter[editIndex] + characters.join("") + "]" +
+ urlFilter.substring(editIndex + 1);
+ }
+ }
+ else
+ {
+ let editCloseIndex = best[0].edit.closeIndex;
+
+ rules[best[0].index]._merged = true;
+
+ urlFilter = urlFilter.substring(0, editIndex) + "(" +
+ urlFilter.substring(editIndex, editCloseIndex) + ")?" +
+ urlFilter.substring(editCloseIndex);
+ }
+
+ rule.trigger["url-filter"] = urlFilter;
+
+ ruleInfo.mergedInto = true;
+ }
+ }
+
+ return rules.filter(rule => !rule._merged);
+}
+
let ContentBlockerList =
/**
* Create a new Adblock Plus filter to content blocker list converter
*
* @constructor
*/
exports.ContentBlockerList = function ()
{
@@ -419,17 +713,18 @@
}
};
/**
* Generate content blocker list for all filters that were added
*
* @returns {Filter} filter Filter to convert
*/
-ContentBlockerList.prototype.generateRules = function(filter)
+ContentBlockerList.prototype.generateRules = function(
+ {merge = false, multiMerge = false} = {})
{
let rules = [];
let groupedElemhideFilters = new Map();
for (let filter of this.elemhideFilters)
{
let result = convertElemHideFilter(filter, this.elemhideSelectorExceptions);
if (!result)
@@ -467,10 +762,15 @@
for (let filter of this.elemhideExceptions)
convertFilterAddRules(rules, filter, "ignore-previous-rules", false);
for (let filter of this.requestFilters)
convertFilterAddRules(rules, filter, "block", true);
for (let filter of this.requestExceptions)
convertFilterAddRules(rules, filter, "ignore-previous-rules", true);
- return rules.filter(rule => !hasNonASCI(rule));
+ rules = rules.filter(rule => !hasNonASCI(rule));
+
+ if (merge)
+ rules = mergeCloselyMatchingRules(rules, {multi: multiMerge});
+
+ return rules;
};
« no previous file with comments | « abp2blocklist.js ('k') | test/abp2blocklist.js » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld