| Index: lib/abp2blocklist.js | 
| =================================================================== | 
| --- a/lib/abp2blocklist.js | 
| +++ b/lib/abp2blocklist.js | 
| @@ -33,16 +33,63 @@ | 
| | typeMap.POPUP | 
| | typeMap.OBJECT | 
| | typeMap.OBJECT_SUBREQUEST | 
| | typeMap.XMLHTTPREQUEST | 
| | typeMap.PING | 
| | typeMap.SUBDOCUMENT | 
| | typeMap.OTHER); | 
|  | 
| +function pearsonHash(message) | 
| +{ | 
| +  // This is an implementation of the Pearson hashing algorithm, where we | 
| +  // generate a 32-bit digest for a given message. | 
| + | 
| +  // Note that this code will only look at the lowest 8 bits of each character. | 
| +  // For best results, encode the input as UTF-8. | 
| + | 
| +  // A table of all numbers from 0 through 255 shuffled up. | 
| +  const table = [ | 
| +    0xe5, 0x0a, 0x9f, 0x79, 0x99, 0xad, 0x10, 0x85, 0x5d, 0x55, 0x75, 0x2e, | 
| +    0x04, 0x1a, 0xb5, 0x7d, 0x96, 0xe6, 0xa3, 0xc6, 0x82, 0x87, 0xb2, 0xef, | 
| +    0x00, 0x64, 0x70, 0x4b, 0xe4, 0x2f, 0x37, 0x52, 0x90, 0x1d, 0x08, 0x68, | 
| +    0x3a, 0x26, 0x74, 0xaa, 0xc1, 0x80, 0x17, 0xc2, 0x0f, 0xec, 0xc8, 0x1f, | 
| +    0xe2, 0x3c, 0xe1, 0xa1, 0x8f, 0xfd, 0x9d, 0x0b, 0xd5, 0xcc, 0xd4, 0xd9, | 
| +    0xf0, 0x3d, 0x5e, 0x57, 0xae, 0x12, 0x46, 0xb0, 0x63, 0x94, 0x61, 0x9a, | 
| +    0xbb, 0x76, 0x0c, 0x3f, 0xc4, 0x59, 0xdc, 0x5c, 0xb4, 0xc7, 0x73, 0x39, | 
| +    0x65, 0x2c, 0x2a, 0xc3, 0xed, 0x20, 0x54, 0xfe, 0xfb, 0xd0, 0xa6, 0x33, | 
| +    0x4a, 0x9e, 0xe7, 0x49, 0xea, 0x58, 0xaf, 0x35, 0x30, 0x95, 0x2b, 0x56, | 
| +    0x14, 0xff, 0xb1, 0xd6, 0x27, 0x6a, 0x88, 0x89, 0x43, 0x4c, 0xca, 0xb9, | 
| +    0x21, 0x8a, 0x78, 0xf1, 0x18, 0x1e, 0xd1, 0xe0, 0x60, 0xf8, 0x3e, 0xdd, | 
| +    0x25, 0x16, 0xde, 0xc0, 0x98, 0x28, 0x7a, 0x3b, 0x1b, 0x45, 0xa5, 0xb3, | 
| +    0xe3, 0x84, 0xd3, 0xb8, 0xbd, 0x47, 0xe9, 0xfa, 0xc9, 0xb6, 0xbe, 0x6c, | 
| +    0x9c, 0xac, 0xda, 0xfc, 0x41, 0x0d, 0x07, 0x91, 0x6b, 0x6f, 0x03, 0xcb, | 
| +    0xbc, 0x8d, 0x06, 0x01, 0xd2, 0x8c, 0x19, 0x5a, 0x02, 0xba, 0x4f, 0x6e, | 
| +    0x2d, 0xf4, 0xcf, 0x7e, 0x6d, 0x42, 0x93, 0x31, 0xbf, 0xdf, 0x5f, 0x67, | 
| +    0x53, 0xf6, 0x38, 0x9b, 0xa7, 0xe8, 0xee, 0x5b, 0x0e, 0x22, 0xdb, 0x51, | 
| +    0x8e, 0x69, 0x97, 0x32, 0x36, 0xce, 0x77, 0x4d, 0xa4, 0xf2, 0x23, 0xc5, | 
| +    0x11, 0x05, 0xab, 0xf9, 0x13, 0xd8, 0x7b, 0xa8, 0x40, 0x66, 0xd7, 0x24, | 
| +    0x86, 0xa0, 0xeb, 0xf3, 0x81, 0x4e, 0x50, 0xf7, 0xb7, 0x7f, 0x83, 0xa2, | 
| +    0xa9, 0x09, 0xcd, 0x62, 0x7c, 0x92, 0x8b, 0x71, 0x44, 0xf5, 0x72, 0x1c, | 
| +    0x29, 0x48, 0x34, 0x15 | 
| +  ]; | 
| + | 
| +  let digest = 0; | 
| + | 
| +  for (let i = 0; i < 4; i++) | 
| +  { | 
| +    let d = table[(message.charCodeAt(0) + i) % 256]; | 
| +    for (let j = 1; j < message.length; j++) | 
| +      d = table[d ^ message.charCodeAt(j)]; | 
| +    digest |= (d & 0xff) << i * 8; | 
| +  } | 
| + | 
| +  return digest; | 
| +} | 
| + | 
| function parseDomains(domains, included, excluded) | 
| { | 
| for (let domain in domains) | 
| { | 
| if (domain != "") | 
| { | 
| let enabled = domains[domain]; | 
| domain = punycode.toASCII(domain.toLowerCase()); | 
| @@ -361,16 +408,425 @@ | 
| newSelector.push('[id=', selector.substring(pos.start + 1, pos.end), ']'); | 
| i = pos.end; | 
| } | 
| newSelector.push(selector.substring(i)); | 
|  | 
| return newSelector.join(""); | 
| } | 
|  | 
| +function closeMatch(s, t, singleCharacterOnly) | 
| +{ | 
| +  // This function returns an edit operation, one of "substitute", "delete", | 
| +  // and "insert", along with an index in the source string where the edit must | 
| +  // occur in order to arrive at the target string. If the strings are not a | 
| +  // close match, it returns null. | 
| + | 
| +  // If singleCharacterOnly is false, deletions or insertions of a contiguous | 
| +  // range of characters from one string into the other, at the same index, are | 
| +  // treated as a single edit. For example, "internal" and "international" are | 
| +  // considered to be one edit apart, inserting the substring "tiona" from the | 
| +  // latter into the former. | 
| + | 
| +  // A few things to note: | 
| +  // | 
| +  //   1) This function does not care about how the input strings are treated | 
| +  //      by the caller. It only treats them as raw strings. For example, the | 
| +  //      caller may treat them as regular expressions, where "[ab]" and "[bc]" | 
| +  //      could be considered to have an edit distance of 1, since the order | 
| +  //      within the brackets does not matter. This function will still return | 
| +  //      null for this set of inputs since they are two edits apart. | 
| +  // | 
| +  //   2) To be friendly to calling code that might be passing in regular | 
| +  //      expressions anyway, this function will simply return null if it | 
| +  //      encounters a special character (e.g. "\", "?", "+", "*", etc.) in the | 
| +  //      delta. For example, given "Hello" and "Hello, how are you?", it will | 
| +  //      return null instead of "{type: 'insert', index: 5, endIndex: 19}". | 
| +  // | 
| +  //   3) The calling code within this file does indeed pass in regular | 
| +  //      expressions (the strict subset of JavaScript regular expressions | 
| +  //      supported by WebKit for content blockers), making the important | 
| +  //      assumption that the parts where two such regular expressions may | 
| +  //      differ can always be treated as normal strings. | 
| +  // | 
| +  //      For example, "^https?://.*/ads" and "^https?://.*/adv" differ only in | 
| +  //      the last character, therefore the regular expressions can safely be | 
| +  //      merged into "^https?://.*/ad[sv]". If, for example, the characters in | 
| +  //      the delta were to appear within square brackets originally in the | 
| +  //      input strings (e.g. "^https?://.*/ad[sx]" and "^https?://.*/ad[vx]"), | 
| +  //      the calling code would have to do extra work to merge the two regular | 
| +  //      expressions correctly. The calling code within this file assumes that | 
| +  //      this is never the case. | 
| + | 
| +  let diff = s.length - t.length; | 
| + | 
| +  // If the string lengths differ by more than one character, we cannot arrive | 
| +  // at target from source in a single edit operation. | 
| +  if (singleCharacterOnly && (diff < -1 || diff > 1)) | 
| +    return null; | 
| + | 
| +  // If target is longer than source, swap them for the purpose of our | 
| +  // calculation. | 
| +  if (diff < 0) | 
| +  { | 
| +    let tmp = s; | 
| +    s = t; | 
| +    t = tmp; | 
| +  } | 
| + | 
| +  let edit = null; | 
| + | 
| +  // If the string lengths differ by only one character at most, use the simple | 
| +  // algorithm to find a single character edit. | 
| +  if (diff == 0 || diff == 1 || diff == -1) | 
| +  { | 
| +    for (let i = 0, j = 0; i < s.length; i++) | 
| +    { | 
| +      if (s[i] == t[j]) | 
| +      { | 
| +        j++; | 
| +      } | 
| +      else if (edit) | 
| +      { | 
| +        // Since we want one and only one edit operation, we must bail here. | 
| +        return null; | 
| +      } | 
| +      else if ((s[i] == "." || s[i] == "+" || s[i] == "$" || s[i] == "?" || | 
| +                s[i] == "{" || s[i] == "}" || s[i] == "(" || s[i] == ")" || | 
| +                s[i] == "[" || s[i] == "]" || s[i] == "\\") || | 
| +               (t[j] == "." || t[j] == "+" || t[j] == "$" || t[j] == "?" || | 
| +                t[j] == "{" || t[j] == "}" || t[j] == "(" || t[j] == ")" || | 
| +                t[j] == "[" || t[j] == "]" || t[j] == "\\")) | 
| +      { | 
| +        // We don't deal with special characters for now. | 
| +        return null; | 
| +      } | 
| +      else if (diff == 0) | 
| +      { | 
| +        // If both strings are equal in length, this is a substitution. | 
| +        edit = {type: "substitute", index: i}; | 
| +        j++; | 
| +      } | 
| +      else if (diff > 0) | 
| +      { | 
| +        // If the source string is longer, this is a deletion. | 
| +        edit = {type: "delete", index: i}; | 
| +      } | 
| +      else | 
| +      { | 
| +        edit = {type: "insert", index: i}; | 
| +      } | 
| +    } | 
| +  } | 
| +  else if (!singleCharacterOnly) | 
| +  { | 
| +    // Try another algorithm to find a multiple character deletion or | 
| +    // insertion. | 
| + | 
| +    let i = 0, j = 0; | 
| + | 
| +    for (; i < s.length; i++) | 
| +    { | 
| +      if (s[i] != t[i]) | 
| +        break; | 
| +    } | 
| + | 
| +    for (; j < t.length; j++) | 
| +    { | 
| +      if (t.length - j == i || | 
| +          s[s.length - j - 1] != t[t.length - j - 1]) | 
| +        break; | 
| +    } | 
| + | 
| +    if (i != t.length - j) | 
| +      return null; | 
| + | 
| +    for (let k = i; k < s.length - j; k++) | 
| +    { | 
| +      // If there are any special characters in the delta, bail. | 
| +      if (s[k] == "." || s[k] == "+" || s[k] == "$" || s[k] == "?" || | 
| +          s[k] == "{" || s[k] == "}" || s[k] == "(" || s[k] == ")" || | 
| +          s[k] == "[" || s[k] == "]" || s[k] == "\\") | 
| +        return null; | 
| +    } | 
| + | 
| +    if (diff > 0) | 
| +    { | 
| +      edit = {type: "delete", index: i, endIndex: s.length - j}; | 
| +    } | 
| +    else | 
| +    { | 
| +      edit = {type: "insert", index: i, endIndex: s.length - j}; | 
| +    } | 
| +  } | 
| + | 
| +  return edit; | 
| +} | 
| + | 
| +function mergeCloselyMatchingRules(rules, options) | 
| +{ | 
| +  const defaultOptions = {advanced: false, exhaustive: false}; | 
| + | 
| +  options = Object.assign({}, defaultOptions, options); | 
| + | 
| +  // Closely matching rules are likely to be within a certain range. We only | 
| +  // look for matches within this range. If we increase this value, it can give | 
| +  // us more matches and a smaller resulting rule set, but possibly at a | 
| +  // significant performance cost. | 
| +  const heuristicRange = 100; | 
| + | 
| +  let rulesInfo = new Array(rules.length); | 
| + | 
| +  rules.forEach((rule, index) => | 
| +  { | 
| +    rulesInfo[index] = {rule}; | 
| + | 
| +    if (rule.action.type == "ignore-previous-rules") | 
| +    { | 
| +      rulesInfo[index].skip = true; | 
| +    } | 
| +    else | 
| +    { | 
| +      // Save a hash of the rule but without the URL filter. We use this for | 
| +      // comparison later. | 
| +      let copy = { | 
| +        trigger: Object.assign({}, rule.trigger), | 
| +        action: Object.assign({}, rule.action) | 
| +      }; | 
| + | 
| +      delete copy.trigger["url-filter"]; | 
| + | 
| +      let stringified = JSON.stringify(copy); | 
| + | 
| +      if (options.exhaustive) | 
| +      { | 
| +        // The Pearson hash function expects all characters to be within the | 
| +        // 8-bit range. | 
| +        stringified = encodeURIComponent(stringified); | 
| + | 
| +        rulesInfo[index].ruleHash = pearsonHash(stringified); | 
| +      } | 
| +      else | 
| +      { | 
| +        rulesInfo[index].ruleHash = stringified; | 
| +      } | 
| +    } | 
| +  }); | 
| + | 
| +  for (let i = 0; i < rules.length; i++) | 
| +  { | 
| +    if (rulesInfo[i].skip) | 
| +      continue; | 
| + | 
| +    let limit = options.exhaustive ? rules.length : | 
| +                Math.min(i + heuristicRange, rules.length); | 
| + | 
| +    for (let j = i + 1; j < limit; j++) | 
| +    { | 
| +      if (rulesInfo[j].skip) | 
| +        continue; | 
| + | 
| +      // Check if the rules are identical except for the URL filter. | 
| +      if (rulesInfo[i].ruleHash == rulesInfo[j].ruleHash) | 
| +      { | 
| +        let source = rules[i].trigger["url-filter"]; | 
| +        let target = rules[j].trigger["url-filter"]; | 
| + | 
| +        let edit = closeMatch(source, target, !options.advanced); | 
| + | 
| +        if (edit) | 
| +        { | 
| +          let urlFilter, ruleInfo, match = {edit}; | 
| + | 
| +          if (edit.type == "insert") | 
| +          { | 
| +            // Convert the insertion into a deletion and stick it on the target | 
| +            // rule instead. We can only group deletions and substitutions; | 
| +            // therefore insertions must be treated as deletions on the target | 
| +            // rule. | 
| +            urlFilter = target; | 
| +            ruleInfo = rulesInfo[j]; | 
| +            match.index = i; | 
| +            edit.type = "delete"; | 
| +          } | 
| +          else | 
| +          { | 
| +            urlFilter = source; | 
| +            ruleInfo = rulesInfo[i]; | 
| +            match.index = j; | 
| +          } | 
| + | 
| +          // If the edit has an end index, it represents a multiple character | 
| +          // edit. | 
| +          let multiEdit = !!edit.endIndex; | 
| + | 
| +          if (multiEdit) | 
| +          { | 
| +            // We only care about a single multiple character edit because the | 
| +            // number of characters for such a match doesn't matter, we can | 
| +            // only merge with one other rule. | 
| +            if (!ruleInfo.multiEditMatch) | 
| +              ruleInfo.multiEditMatch = match; | 
| +          } | 
| +          else | 
| +          { | 
| +            // For single character edits, multiple rules can be merged into | 
| +            // one. e.g. "ad", "ads", and "adv" can be merged into "ad[sv]?". | 
| +            if (!ruleInfo.matches) | 
| +              ruleInfo.matches = new Array(urlFilter.length + 1); | 
| + | 
| +            // Matches at a particular index. For example, for a source string | 
| +            // "ads", both target strings "ad" (deletion) and "adv" | 
| +            // (substitution) match at index 2, hence they are grouped together | 
| +            // to possibly be merged later into "ad[sv]?". | 
| +            let matchesForIndex = ruleInfo.matches[edit.index]; | 
| + | 
| +            if (matchesForIndex) | 
| +            { | 
| +              matchesForIndex.push(match); | 
| +            } | 
| +            else | 
| +            { | 
| +              matchesForIndex = [match]; | 
| +              ruleInfo.matches[edit.index] = matchesForIndex; | 
| +            } | 
| + | 
| +            // Keep track of the best set of matches. We later sort by this to | 
| +            // get best results. | 
| +            if (!ruleInfo.bestMatches || | 
| +                matchesForIndex.length > ruleInfo.bestMatches.length) | 
| +              ruleInfo.bestMatches = matchesForIndex; | 
| +          } | 
| +        } | 
| +      } | 
| +    } | 
| +  } | 
| + | 
| +  // Filter out rules that have no matches at all. | 
| +  let candidateRulesInfo = rulesInfo.filter(ruleInfo => | 
| +  { | 
| +    return ruleInfo.bestMatches || ruleInfo.multiEditMatch | 
| +  }); | 
| + | 
| +  // For best results, we have to sort the candidates by the largest set of | 
| +  // matches. | 
| +  // | 
| +  // For example, we want "ads", "bds", "adv", "bdv", "adx", and "bdx" to | 
| +  // generate "ad[svx]" and "bd[svx]" (2 rules), not "[ab]ds", "[ab]dv", and | 
| +  // "[ab]dx" (3 rules). | 
| +  candidateRulesInfo.sort((ruleInfo1, ruleInfo2) => | 
| +  { | 
| +    let weight1 = ruleInfo1.bestMatches ? ruleInfo1.bestMatches.length : | 
| +                  ruleInfo1.multiEditMatch ? 1 : 0; | 
| +    let weight2 = ruleInfo2.bestMatches ? ruleInfo2.bestMatches.length : | 
| +                  ruleInfo2.multiEditMatch ? 1 : 0; | 
| + | 
| +    return weight2 - weight1; | 
| +  }); | 
| + | 
| +  for (let ruleInfo of candidateRulesInfo) | 
| +  { | 
| +    let rule = ruleInfo.rule; | 
| + | 
| +    // If this rule has already been merged into another rule, we skip it. | 
| +    if (ruleInfo.merged) | 
| +      continue; | 
| + | 
| +    // Find the best set of rules to group, which is simply the largest set. | 
| +    let best = (ruleInfo.matches || []).reduce((best, matchesForIndex) => | 
| +    { | 
| +      matchesForIndex = (matchesForIndex || []).filter(match => | 
| +      { | 
| +        // Filter out rules that have either already been merged into other | 
| +        // rules or have had other rules merged into them. | 
| +        return !rulesInfo[match.index].merged && | 
| +               !rulesInfo[match.index].mergedInto; | 
| +      }); | 
| + | 
| +      return matchesForIndex.length > best.length ? matchesForIndex : best; | 
| +    }, | 
| +    []); | 
| + | 
| +    let multiEdit = false; | 
| + | 
| +    // If we couldn't find a single rule to merge with, let's see if we have a | 
| +    // multiple character edit. e.g. we could merge "ad" and "adserver" into | 
| +    // "ad(server)?". | 
| +    if (best.length == 0 && ruleInfo.multiEditMatch && | 
| +        !rulesInfo[ruleInfo.multiEditMatch.index].merged && | 
| +        !rulesInfo[ruleInfo.multiEditMatch.index].mergedInto) | 
| +    { | 
| +      best = [ruleInfo.multiEditMatch]; | 
| +      multiEdit = true; | 
| +    } | 
| + | 
| +    if (best.length > 0) | 
| +    { | 
| +      let urlFilter = rule.trigger["url-filter"]; | 
| + | 
| +      let editIndex = best[0].edit.index; | 
| + | 
| +      if (!multiEdit) | 
| +      { | 
| +        // Merge all the matching rules into this one. | 
| + | 
| +        let characters = []; | 
| +        let quantifier = ""; | 
| + | 
| +        for (let match of best) | 
| +        { | 
| +          if (match.edit.type == "delete") | 
| +          { | 
| +            quantifier = "?"; | 
| +          } | 
| +          else | 
| +          { | 
| +            let character = rules[match.index].trigger["url-filter"][editIndex]; | 
| +            characters.push(character); | 
| +          } | 
| + | 
| +          // Mark the target rule as merged so other rules don't try to merge | 
| +          // it again. | 
| +          rulesInfo[match.index].merged = true; | 
| +        } | 
| + | 
| +        urlFilter = urlFilter.substring(0, editIndex + 1) + quantifier + | 
| +                    urlFilter.substring(editIndex + 1); | 
| +        if (characters.length > 0) | 
| +        { | 
| +          urlFilter = urlFilter.substring(0, editIndex) + "[" + | 
| +                      urlFilter[editIndex] + characters.join("") + "]" + | 
| +                      urlFilter.substring(editIndex + 1); | 
| +        } | 
| +      } | 
| +      else | 
| +      { | 
| +        let editEndIndex = best[0].edit.endIndex; | 
| + | 
| +        // Mark the target rule as merged so other rules don't try to merge it | 
| +        // again. | 
| +        rulesInfo[best[0].index].merged = true; | 
| + | 
| +        urlFilter = urlFilter.substring(0, editIndex) + "(" + | 
| +                    urlFilter.substring(editIndex, editEndIndex) + ")?" + | 
| +                    urlFilter.substring(editEndIndex); | 
| +      } | 
| + | 
| +      rule.trigger["url-filter"] = urlFilter; | 
| + | 
| +      // Mark this rule as one that has had other rules merged into it. | 
| +      ruleInfo.mergedInto = true; | 
| +    } | 
| +  } | 
| + | 
| +  // Filter out rules that have been merged into other rules. | 
| +  return rulesInfo.filter(ruleInfo => !ruleInfo.merged) | 
| +         .map(ruleInfo => ruleInfo.rule); | 
| +} | 
| + | 
| let ContentBlockerList = | 
| /** | 
| * Create a new Adblock Plus filter to content blocker list converter | 
| * | 
| * @constructor | 
| */ | 
| exports.ContentBlockerList = function () | 
| { | 
| @@ -419,18 +875,27 @@ | 
| } | 
| }; | 
|  | 
| /** | 
| * Generate content blocker list for all filters that were added | 
| * | 
| * @returns   {Filter}   filter    Filter to convert | 
| */ | 
| -ContentBlockerList.prototype.generateRules = function(filter) | 
| +ContentBlockerList.prototype.generateRules = function(options) | 
| { | 
| +  const defaultOptions = { | 
| +    merge: false, | 
| +    fastMerge: true, | 
| +    advancedMerge: null, | 
| +    exhaustiveMerge: null | 
| +  }; | 
| + | 
| +  options = Object.assign({}, defaultOptions, options); | 
| + | 
| let rules = []; | 
|  | 
| let groupedElemhideFilters = new Map(); | 
| for (let filter of this.elemhideFilters) | 
| { | 
| let result = convertElemHideFilter(filter, this.elemhideSelectorExceptions); | 
| if (!result) | 
| continue; | 
| @@ -467,10 +932,26 @@ | 
|  | 
| for (let filter of this.elemhideExceptions) | 
| convertFilterAddRules(rules, filter, "ignore-previous-rules", false); | 
| for (let filter of this.requestFilters) | 
| convertFilterAddRules(rules, filter, "block", true); | 
| for (let filter of this.requestExceptions) | 
| convertFilterAddRules(rules, filter, "ignore-previous-rules", true); | 
|  | 
| -  return rules.filter(rule => !hasNonASCI(rule)); | 
| +  rules = rules.filter(rule => !hasNonASCI(rule)); | 
| + | 
| +  if (options.merge) | 
| +  { | 
| +    // If the more specific options are specified (e.g. "advanced" and | 
| +    // "exhaustive"), they override the more general options (e.g. "fast"). | 
| +    let mergeOptions = { | 
| +      advanced: options.advancedMerge || | 
| +                (!options.fastMerge && options.advancedMerge != false), | 
| +      exhaustive: options.exhaustiveMerge || | 
| +                  (!options.fastMerge && options.exhaustiveMerge != false) | 
| +    }; | 
| + | 
| +    rules = mergeCloselyMatchingRules(rules, mergeOptions); | 
| +  } | 
| + | 
| +  return rules; | 
| }; | 
|  |