| Index: lib/abp2blocklist.js |
| =================================================================== |
| --- a/lib/abp2blocklist.js |
| +++ b/lib/abp2blocklist.js |
| @@ -361,16 +361,239 @@ |
| newSelector.push('[id=', selector.substring(pos.start + 1, pos.end), ']'); |
| i = pos.end; |
| } |
| newSelector.push(selector.substring(i)); |
| return newSelector.join(""); |
| } |
| +function closeMatch(s, t) |
| +{ |
| + // This function returns an edit operation (one of "substitute", "delete", |
| + // and "insert") along with an index in the source string where the edit |
| + // should occur in order to arrive at the target string. |
| + |
| + let diff = s.length - t.length; |
| + |
| + // If the string lenghts differ by more than one character, we cannot arrive |
| + // at target from source in a single edit operation. |
| + if (diff < -1 || diff > 1) |
| + return null; |
| + |
| + // If target is longer than source, swap them for the purpose of our |
| + // calculation. |
| + if (diff == -1) |
| + { |
| + let tmp = s; |
| + s = t; |
| + t = tmp; |
| + } |
| + |
| + let edit = null; |
| + |
| + for (let i = 0, j = 0; i < s.length; i++) |
| + { |
| + if (s[i] == t[j]) |
| + { |
| + j++; |
| + } |
| + else if (edit) |
| + { |
| + // Since we want one and only one edit operation, we must bail here. |
| + return null; |
| + } |
| + else if ((s[i] == "." || s[i] == "+" || s[i] == "$" || s[i] == "?" || |
| + s[i] == "{" || s[i] == "}" || s[i] == "(" || s[i] == ")" || |
| + s[i] == "[" || s[i] == "]" || s[i] == "\\") || |
| + (t[j] == "." || t[j] == "+" || t[j] == "$" || t[j] == "?" || |
| + t[j] == "{" || t[j] == "}" || t[j] == "(" || t[j] == ")" || |
| + t[j] == "[" || t[j] == "]" || t[j] == "\\")) |
| + { |
| + // We don't deal with special characters for now. |
|
Manish Jethani
2017/05/02 15:39:53
It should be possible to merge "\[" and "\(" into
|
| + return null; |
| + } |
| + else |
| + { |
| + switch (diff) |
| + { |
| + case 0: |
| + // If both strings are equal in length, this is a substitution. |
| + edit = {type: "substitute", index: i}; |
| + j++; |
| + break; |
| + case 1: |
| + // If the source string is longer, this is a deletion. |
| + edit = {type: "delete", index: i}; |
| + break; |
| + default: |
| + edit = {type: "insert", index: i}; |
| + } |
| + } |
| + } |
| + |
| + return edit; |
| +} |
| + |
| +function ruleWithoutURLFilter(rule) |
| +{ |
| + let copy = { |
| + trigger: Object.assign({}, rule.trigger), |
| + action: Object.assign({}, rule.action) |
| + }; |
| + |
| + delete copy.trigger["url-filter"]; |
| + |
| + return copy; |
| +} |
| + |
| +function mergeCloselyMatchingRules(rules) |
| +{ |
| + let rulesInfo = new Array(rules.length); |
| + |
| + rules.forEach((rule, index) => |
| + { |
| + let skip = false; |
| + |
| + // For these rules we're getting dimishing returns. i.e. there aren't too |
| + // many of these that qualify anyway, they just slow us down. |
| + if (rule.action.type == "css-display-none" || |
| + rule.action.type == "ignore-previous-rules" || |
| + rule.trigger["url-filter-is-case-sensitive"] || |
| + rule.trigger["load-type"] || |
| + rule.trigger["if-domain"] || |
| + rule.trigger["unless-domain"]) |
| + skip = true; |
| + |
| + if (skip) |
| + { |
| + rulesInfo[index] = {skip: true}; |
| + } |
| + else |
| + { |
| + // Save a stringified version of the rule, but without the URL filter. We |
| + // use this for comparison later. |
| + rulesInfo[index] = { |
| + stringifiedWithoutURLFilter: JSON.stringify(ruleWithoutURLFilter(rule)) |
| + }; |
| + } |
| + }); |
| + |
| + for (let i = 0; i < rules.length; i++) |
| + { |
| + if (rulesInfo[i].skip) |
| + continue; |
| + |
| + for (let j = i + 1; j < rules.length; j++) |
| + { |
| + if (rulesInfo[j].skip) |
| + continue; |
| + |
| + // Check if the rules are identical except for the URL filter. |
| + if (rulesInfo[i].stringifiedWithoutURLFilter == |
| + rulesInfo[j].stringifiedWithoutURLFilter) |
| + { |
| + let source = rules[i].trigger["url-filter"]; |
| + let target = rules[j].trigger["url-filter"]; |
| + |
| + // Find out if the Levenshtein distance between the rules is 1. |
| + let edit = closeMatch(source, target); |
| + |
| + if (edit) |
| + { |
| + let urlFilter, ruleInfo, match = {edit}; |
| + |
| + if (edit.type == "insert") |
| + { |
| + // Convert the insertion into a deletion and stick it on the target |
| + // rule instead. We can only group deletions and substitutions; |
| + // therefore insertions must be treated as deletions on the target |
| + // rule, to be dealt with later. |
| + urlFilter = target; |
| + ruleInfo = rulesInfo[j]; |
| + match.index = i; |
| + edit.type = "delete"; |
| + } |
| + else |
| + { |
| + urlFilter = source; |
| + ruleInfo = rulesInfo[i]; |
| + match.index = j; |
| + } |
| + |
| + if (!ruleInfo.matches) |
| + ruleInfo.matches = new Array(urlFilter.length + 1); |
| + |
| + let matchesForIndex = ruleInfo.matches[edit.index]; |
| + |
| + if (matchesForIndex) |
| + matchesForIndex.push(match); |
| + else |
| + ruleInfo.matches[edit.index] = [match]; |
| + } |
| + } |
| + } |
| + } |
| + |
| + let mergedRules = []; |
| + |
| + rules.forEach((rule, index) => |
| + { |
| + let ruleInfo = rulesInfo[index]; |
| + |
| + if (ruleInfo.merged) |
| + return; |
| + |
| + // Find the best set of rules to group, which is simply the largest set. |
| + let best = null; |
| + for (let matchesForIndex of ruleInfo.matches || []) |
| + { |
| + if (matchesForIndex && (!best || matchesForIndex.length > best.length)) |
| + best = matchesForIndex; |
| + } |
| + |
| + if (best) |
| + { |
| + // Merge all the matching rules into this one. |
| + |
| + let editIndex = best[0].edit.index; |
| + |
| + let characters = []; |
| + let quantifier = ""; |
| + |
| + for (let match of best) |
| + { |
| + if (match.edit.type == "delete") |
| + quantifier = "?"; |
| + else |
| + characters.push(rules[match.index].trigger["url-filter"][editIndex]); |
| + |
| + rulesInfo[match.index].merged = true; |
| + } |
| + |
| + let urlFilter = rule.trigger["url-filter"]; |
| + |
| + urlFilter = urlFilter.substring(0, editIndex + 1) + quantifier + |
| + urlFilter.substring(editIndex + 1); |
| + if (characters.length > 0) |
| + { |
| + urlFilter = urlFilter.substring(0, editIndex) + "[" + |
| + urlFilter[editIndex] + characters.join("") + "]" + |
| + urlFilter.substring(editIndex + 1); |
| + } |
| + |
| + rule.trigger["url-filter"] = urlFilter; |
| + } |
| + |
| + mergedRules.push(rule); |
| + }); |
| + |
| + return mergedRules; |
| +} |
| + |
| let ContentBlockerList = |
| /** |
| * Create a new Adblock Plus filter to content blocker list converter |
| * |
| * @constructor |
| */ |
| exports.ContentBlockerList = function () |
| { |
| @@ -467,10 +690,14 @@ |
| for (let filter of this.elemhideExceptions) |
| convertFilterAddRules(rules, filter, "ignore-previous-rules", false); |
| for (let filter of this.requestFilters) |
| convertFilterAddRules(rules, filter, "block", true); |
| for (let filter of this.requestExceptions) |
| convertFilterAddRules(rules, filter, "ignore-previous-rules", true); |
| - return rules.filter(rule => !hasNonASCI(rule)); |
| + rules = rules.filter(rule => !hasNonASCI(rule)); |
| + |
| + rules = mergeCloselyMatchingRules(rules); |
| + |
| + return rules; |
| }; |