| OLD | NEW | 
|---|
| 1 /* | 1 /* | 
| 2  * This file is part of Adblock Plus <https://adblockplus.org/>, | 2  * This file is part of Adblock Plus <https://adblockplus.org/>, | 
| 3  * Copyright (C) 2006-2017 eyeo GmbH | 3  * Copyright (C) 2006-2017 eyeo GmbH | 
| 4  * | 4  * | 
| 5  * Adblock Plus is free software: you can redistribute it and/or modify | 5  * Adblock Plus is free software: you can redistribute it and/or modify | 
| 6  * it under the terms of the GNU General Public License version 3 as | 6  * it under the terms of the GNU General Public License version 3 as | 
| 7  * published by the Free Software Foundation. | 7  * published by the Free Software Foundation. | 
| 8  * | 8  * | 
| 9  * Adblock Plus is distributed in the hope that it will be useful, | 9  * Adblock Plus is distributed in the hope that it will be useful, | 
| 10  * but WITHOUT ANY WARRANTY; without even the implied warranty of | 10  * but WITHOUT ANY WARRANTY; without even the implied warranty of | 
| (...skipping 348 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 359   { | 359   { | 
| 360     newSelector.push(selector.substring(i, pos.start)); | 360     newSelector.push(selector.substring(i, pos.start)); | 
| 361     newSelector.push('[id=', selector.substring(pos.start + 1, pos.end), ']'); | 361     newSelector.push('[id=', selector.substring(pos.start + 1, pos.end), ']'); | 
| 362     i = pos.end; | 362     i = pos.end; | 
| 363   } | 363   } | 
| 364   newSelector.push(selector.substring(i)); | 364   newSelector.push(selector.substring(i)); | 
| 365 | 365 | 
| 366   return newSelector.join(""); | 366   return newSelector.join(""); | 
| 367 } | 367 } | 
| 368 | 368 | 
|  | 369 function closeMatch(s, t) | 
|  | 370 { | 
|  | 371   // This function returns an edit operation, one of "substitute", "delete", | 
|  | 372   // and "insert", along with an index in the source string where the edit must | 
|  | 373   // occur in order to arrive at the target string. If the strings are not a | 
|  | 374   // close match, it returns null. | 
|  | 375   // | 
|  | 376   // Two strings are considered to be a close match if they are one edit | 
|  | 377   // operation apart. | 
|  | 378   // | 
|  | 379   // Deletions or insertions of a contiguous range of characters from one | 
|  | 380   // string into the other, at the same index, are treated as a single edit. | 
|  | 381   // For example, "internal" and "international" are considered to be one edit | 
|  | 382   // apart and therefore a close match. | 
|  | 383 | 
|  | 384   // A few things to note: | 
|  | 385   // | 
|  | 386   //   1) This function does not care about the format of the input strings. | 
|  | 387   //   For example, the caller may pass in regular expressions, where "[ab]" | 
|  | 388   //   and "[bc]" could be considered to be a close match, since the order | 
|  | 389   //   within the brackets doesn't matter. This function will still return null | 
|  | 390   //   for this set of inputs since they are two edits apart. | 
|  | 391   // | 
|  | 392   //   2) To be friendly to calling code that might be passing in regular | 
|  | 393   //   expressions, this function will simply return null if it encounters a | 
|  | 394   //   special character (e.g. "\", "?", "+", etc.) in the delta. For example, | 
|  | 395   //   given "Hello" and "Hello, how are you?", it will return null. | 
|  | 396   // | 
|  | 397   //   3) If the caller does indeed pass in regular expressions, it must make | 
|  | 398   //   the important assumption that the parts where two such regular | 
|  | 399   //   expressions may differ can always be treated as normal strings. For | 
|  | 400   //   example, "^https?://.*/ads" and "^https?://.*/adv" differ only in the | 
|  | 401   //   last character, therefore the regular expressions can safely be merged | 
|  | 402   //   into "^https?://.*/ad[sv]". | 
|  | 403 | 
|  | 404   let diff = s.length - t.length; | 
|  | 405 | 
|  | 406   // If target is longer than source, swap them for the purpose of our | 
|  | 407   // calculation. | 
|  | 408   if (diff < 0) | 
|  | 409   { | 
|  | 410     let tmp = s; | 
|  | 411     s = t; | 
|  | 412     t = tmp; | 
|  | 413   } | 
|  | 414 | 
|  | 415   let edit = null; | 
|  | 416 | 
|  | 417   let i = 0, j = 0; | 
|  | 418 | 
|  | 419   for (; i < s.length; i++) | 
|  | 420   { | 
|  | 421     if (s[i] != t[i]) | 
|  | 422       break; | 
|  | 423   } | 
|  | 424 | 
|  | 425   for (; j < t.length; j++) | 
|  | 426   { | 
|  | 427     if (t.length - j == i || s[s.length - j - 1] != t[t.length - j - 1]) | 
|  | 428       break; | 
|  | 429   } | 
|  | 430 | 
|  | 431   if (diff == 0) | 
|  | 432   { | 
|  | 433     if (t.length - j - i != 1) | 
|  | 434       return null; | 
|  | 435   } | 
|  | 436   else if (i != t.length - j) | 
|  | 437   { | 
|  | 438     return null; | 
|  | 439   } | 
|  | 440 | 
|  | 441   for (let k = i; k < s.length - j; k++) | 
|  | 442   { | 
|  | 443     // If there are any special characters in the delta, bail. | 
|  | 444     if (s[k] == "." || s[k] == "+" || s[k] == "$" || s[k] == "?" || | 
|  | 445         s[k] == "{" || s[k] == "}" || s[k] == "(" || s[k] == ")" || | 
|  | 446         s[k] == "[" || s[k] == "]" || s[k] == "\\") | 
|  | 447       return null; | 
|  | 448   } | 
|  | 449 | 
|  | 450   if (diff == 0) | 
|  | 451   { | 
|  | 452     edit = {type: "substitute", index: i}; | 
|  | 453   } | 
|  | 454   else if (diff > 0) | 
|  | 455   { | 
|  | 456     edit = {type: "delete", index: i}; | 
|  | 457 | 
|  | 458     if (diff > 1) | 
|  | 459       edit.endIndex = s.length - j; | 
|  | 460   } | 
|  | 461   else | 
|  | 462   { | 
|  | 463     edit = {type: "insert", index: i}; | 
|  | 464 | 
|  | 465     if (diff < -1) | 
|  | 466       edit.endIndex = s.length - j; | 
|  | 467   } | 
|  | 468 | 
|  | 469   return edit; | 
|  | 470 } | 
|  | 471 | 
|  | 472 function mergeRulesByURLFilter(rulesInfo, exhaustive) | 
|  | 473 { | 
|  | 474   // Closely matching rules are likely to be within a certain range. We only | 
|  | 475   // look for matches within this range. If we increase this value, it can give | 
|  | 476   // us more matches and a smaller resulting rule set, but possibly at a | 
|  | 477   // significant performance cost. | 
|  | 478   const heuristicRange = 10; | 
|  | 479 | 
|  | 480   for (let i = 0; i < rulesInfo.length; i++) | 
|  | 481   { | 
|  | 482     let limit = exhaustive ? rulesInfo.length : | 
|  | 483                 Math.min(i + heuristicRange, rulesInfo.length); | 
|  | 484 | 
|  | 485     for (let j = i + 1; j < limit; j++) | 
|  | 486     { | 
|  | 487       let source = rulesInfo[i].rule.trigger["url-filter"]; | 
|  | 488       let target = rulesInfo[j].rule.trigger["url-filter"]; | 
|  | 489 | 
|  | 490       let edit = closeMatch(source, target); | 
|  | 491 | 
|  | 492       if (edit) | 
|  | 493       { | 
|  | 494         let urlFilter, ruleInfo, match = {edit}; | 
|  | 495 | 
|  | 496         if (edit.type == "insert") | 
|  | 497         { | 
|  | 498           // Convert the insertion into a deletion and stick it on the target | 
|  | 499           // rule instead. We can only group deletions and substitutions; | 
|  | 500           // therefore insertions must be treated as deletions on the target | 
|  | 501           // rule. | 
|  | 502           urlFilter = target; | 
|  | 503           ruleInfo = rulesInfo[j]; | 
|  | 504           match.index = i; | 
|  | 505           edit.type = "delete"; | 
|  | 506         } | 
|  | 507         else | 
|  | 508         { | 
|  | 509           urlFilter = source; | 
|  | 510           ruleInfo = rulesInfo[i]; | 
|  | 511           match.index = j; | 
|  | 512         } | 
|  | 513 | 
|  | 514         // If the edit has an end index, it represents a multiple character | 
|  | 515         // edit. | 
|  | 516         let multiEdit = !!edit.endIndex; | 
|  | 517 | 
|  | 518         if (multiEdit) | 
|  | 519         { | 
|  | 520           // We only care about a single multiple character edit because the | 
|  | 521           // number of characters for such a match doesn't matter, we can | 
|  | 522           // only merge with one other rule. | 
|  | 523           if (!ruleInfo.multiEditMatch) | 
|  | 524             ruleInfo.multiEditMatch = match; | 
|  | 525         } | 
|  | 526         else | 
|  | 527         { | 
|  | 528           // For single character edits, multiple rules can be merged into | 
|  | 529           // one. e.g. "ad", "ads", and "adv" can be merged into "ad[sv]?". | 
|  | 530           if (!ruleInfo.matches) | 
|  | 531             ruleInfo.matches = new Array(urlFilter.length); | 
|  | 532 | 
|  | 533           // Matches at a particular index. For example, for a source string | 
|  | 534           // "ads", both target strings "ad" (deletion) and "adv" | 
|  | 535           // (substitution) match at index 2, hence they are grouped together | 
|  | 536           // to possibly be merged later into "ad[sv]?". | 
|  | 537           let matchesForIndex = ruleInfo.matches[edit.index]; | 
|  | 538 | 
|  | 539           if (matchesForIndex) | 
|  | 540           { | 
|  | 541             matchesForIndex.push(match); | 
|  | 542           } | 
|  | 543           else | 
|  | 544           { | 
|  | 545             matchesForIndex = [match]; | 
|  | 546             ruleInfo.matches[edit.index] = matchesForIndex; | 
|  | 547           } | 
|  | 548 | 
|  | 549           // Keep track of the best set of matches. We later sort by this to | 
|  | 550           // get best results. | 
|  | 551           if (!ruleInfo.bestMatches || | 
|  | 552               matchesForIndex.length > ruleInfo.bestMatches.length) | 
|  | 553             ruleInfo.bestMatches = matchesForIndex; | 
|  | 554         } | 
|  | 555       } | 
|  | 556     } | 
|  | 557   } | 
|  | 558 | 
|  | 559   // Filter out rules that have no matches at all. | 
|  | 560   let candidateRulesInfo = rulesInfo.filter(ruleInfo => | 
|  | 561   { | 
|  | 562     return ruleInfo.bestMatches || ruleInfo.multiEditMatch | 
|  | 563   }); | 
|  | 564 | 
|  | 565   // For best results, we have to sort the candidates by the largest set of | 
|  | 566   // matches. | 
|  | 567   // | 
|  | 568   // For example, we want "ads", "bds", "adv", "bdv", "adx", and "bdx" to | 
|  | 569   // generate "ad[svx]" and "bd[svx]" (2 rules), not "[ab]ds", "[ab]dv", and | 
|  | 570   // "[ab]dx" (3 rules). | 
|  | 571   candidateRulesInfo.sort((ruleInfo1, ruleInfo2) => | 
|  | 572   { | 
|  | 573     let weight1 = ruleInfo1.bestMatches ? ruleInfo1.bestMatches.length : | 
|  | 574                   ruleInfo1.multiEditMatch ? 1 : 0; | 
|  | 575     let weight2 = ruleInfo2.bestMatches ? ruleInfo2.bestMatches.length : | 
|  | 576                   ruleInfo2.multiEditMatch ? 1 : 0; | 
|  | 577 | 
|  | 578     return weight2 - weight1; | 
|  | 579   }); | 
|  | 580 | 
|  | 581   for (let ruleInfo of candidateRulesInfo) | 
|  | 582   { | 
|  | 583     let rule = ruleInfo.rule; | 
|  | 584 | 
|  | 585     // If this rule has already been merged into another rule, we skip it. | 
|  | 586     if (ruleInfo.merged) | 
|  | 587       continue; | 
|  | 588 | 
|  | 589     // Find the best set of rules to group, which is simply the largest set. | 
|  | 590     let best = (ruleInfo.matches || []).reduce((best, matchesForIndex) => | 
|  | 591     { | 
|  | 592       matchesForIndex = (matchesForIndex || []).filter(match => | 
|  | 593       { | 
|  | 594         // Filter out rules that have either already been merged into other | 
|  | 595         // rules or have had other rules merged into them. | 
|  | 596         return !rulesInfo[match.index].merged && | 
|  | 597                !rulesInfo[match.index].mergedInto; | 
|  | 598       }); | 
|  | 599 | 
|  | 600       return matchesForIndex.length > best.length ? matchesForIndex : best; | 
|  | 601     }, | 
|  | 602     []); | 
|  | 603 | 
|  | 604     let multiEdit = false; | 
|  | 605 | 
|  | 606     // If we couldn't find a single rule to merge with, let's see if we have a | 
|  | 607     // multiple character edit. e.g. we could merge "ad" and "adserver" into | 
|  | 608     // "ad(server)?". | 
|  | 609     if (best.length == 0 && ruleInfo.multiEditMatch && | 
|  | 610         !rulesInfo[ruleInfo.multiEditMatch.index].merged && | 
|  | 611         !rulesInfo[ruleInfo.multiEditMatch.index].mergedInto) | 
|  | 612     { | 
|  | 613       best = [ruleInfo.multiEditMatch]; | 
|  | 614       multiEdit = true; | 
|  | 615     } | 
|  | 616 | 
|  | 617     if (best.length > 0) | 
|  | 618     { | 
|  | 619       let urlFilter = rule.trigger["url-filter"]; | 
|  | 620 | 
|  | 621       let editIndex = best[0].edit.index; | 
|  | 622 | 
|  | 623       if (!multiEdit) | 
|  | 624       { | 
|  | 625         // Merge all the matching rules into this one. | 
|  | 626 | 
|  | 627         let characters = []; | 
|  | 628         let quantifier = ""; | 
|  | 629 | 
|  | 630         for (let match of best) | 
|  | 631         { | 
|  | 632           if (match.edit.type == "delete") | 
|  | 633           { | 
|  | 634             quantifier = "?"; | 
|  | 635           } | 
|  | 636           else | 
|  | 637           { | 
|  | 638             let character = rulesInfo[match.index].rule | 
|  | 639                             .trigger["url-filter"][editIndex]; | 
|  | 640             characters.push(character); | 
|  | 641           } | 
|  | 642 | 
|  | 643           // Mark the target rule as merged so other rules don't try to merge | 
|  | 644           // it again. | 
|  | 645           rulesInfo[match.index].merged = true; | 
|  | 646         } | 
|  | 647 | 
|  | 648         urlFilter = urlFilter.substring(0, editIndex + 1) + quantifier + | 
|  | 649                     urlFilter.substring(editIndex + 1); | 
|  | 650         if (characters.length > 0) | 
|  | 651         { | 
|  | 652           urlFilter = urlFilter.substring(0, editIndex) + "[" + | 
|  | 653                       urlFilter[editIndex] + characters.join("") + "]" + | 
|  | 654                       urlFilter.substring(editIndex + 1); | 
|  | 655         } | 
|  | 656       } | 
|  | 657       else | 
|  | 658       { | 
|  | 659         let editEndIndex = best[0].edit.endIndex; | 
|  | 660 | 
|  | 661         // Mark the target rule as merged so other rules don't try to merge it | 
|  | 662         // again. | 
|  | 663         rulesInfo[best[0].index].merged = true; | 
|  | 664 | 
|  | 665         urlFilter = urlFilter.substring(0, editIndex) + "(" + | 
|  | 666                     urlFilter.substring(editIndex, editEndIndex) + ")?" + | 
|  | 667                     urlFilter.substring(editEndIndex); | 
|  | 668       } | 
|  | 669 | 
|  | 670       rule.trigger["url-filter"] = urlFilter; | 
|  | 671 | 
|  | 672       // Mark this rule as one that has had other rules merged into it. | 
|  | 673       ruleInfo.mergedInto = true; | 
|  | 674     } | 
|  | 675   } | 
|  | 676 } | 
|  | 677 | 
|  | 678 function mergeRulesByArrayProperty(rulesInfo, propertyType, property) | 
|  | 679 { | 
|  | 680   let set = new Set(); | 
|  | 681 | 
|  | 682   rulesInfo.forEach((ruleInfo, index) => | 
|  | 683   { | 
|  | 684     if (ruleInfo.rule[propertyType][property]) | 
|  | 685     { | 
|  | 686       for (let value of ruleInfo.rule[propertyType][property]) | 
|  | 687         set.add(value); | 
|  | 688     } | 
|  | 689 | 
|  | 690     if (index > 0) | 
|  | 691       ruleInfo.merged = true; | 
|  | 692   }); | 
|  | 693 | 
|  | 694   if (set.size > 0) | 
|  | 695     rulesInfo[0].rule[propertyType][property] = Array.from(set); | 
|  | 696 | 
|  | 697   rulesInfo[0].mergedInto = true; | 
|  | 698 } | 
|  | 699 | 
|  | 700 function groupRulesByMergeableProperty(rulesInfo, propertyType, property) | 
|  | 701 { | 
|  | 702   let mergeableRulesInfoByGroup = new Map(); | 
|  | 703 | 
|  | 704   rulesInfo.forEach(ruleInfo => | 
|  | 705   { | 
|  | 706     let copy = { | 
|  | 707       trigger: Object.assign({}, ruleInfo.rule.trigger), | 
|  | 708       action: Object.assign({}, ruleInfo.rule.action) | 
|  | 709     }; | 
|  | 710 | 
|  | 711     delete copy[propertyType][property]; | 
|  | 712 | 
|  | 713     let groupKey = JSON.stringify(copy); | 
|  | 714 | 
|  | 715     let mergeableRulesInfo = mergeableRulesInfoByGroup.get(groupKey); | 
|  | 716 | 
|  | 717     if (mergeableRulesInfo) | 
|  | 718       mergeableRulesInfo.push(ruleInfo); | 
|  | 719     else | 
|  | 720       mergeableRulesInfoByGroup.set(groupKey, [ruleInfo]); | 
|  | 721   }); | 
|  | 722 | 
|  | 723   return mergeableRulesInfoByGroup; | 
|  | 724 } | 
|  | 725 | 
|  | 726 function mergeRules(rules, options) | 
|  | 727 { | 
|  | 728   const defaultOptions = {exhaustive: false}; | 
|  | 729 | 
|  | 730   options = Object.assign({}, defaultOptions, options); | 
|  | 731 | 
|  | 732   let rulesInfo = rules.map(rule => ({rule})); | 
|  | 733 | 
|  | 734   groupRulesByMergeableProperty(rulesInfo, "trigger", "url-filter") | 
|  | 735   .forEach(mergeableRulesInfo => | 
|  | 736   { | 
|  | 737     if (mergeableRulesInfo.length > 1) | 
|  | 738       mergeRulesByURLFilter(mergeableRulesInfo, options.exhaustive); | 
|  | 739   }); | 
|  | 740 | 
|  | 741   // Filter out rules that have been merged into other rules. | 
|  | 742   rulesInfo = rulesInfo.filter(ruleInfo => !ruleInfo.merged); | 
|  | 743 | 
|  | 744   for (let arrayProperty of ["resource-type", "if-domain"]) | 
|  | 745   { | 
|  | 746     groupRulesByMergeableProperty(rulesInfo, "trigger", arrayProperty) | 
|  | 747     .forEach(mergeableRulesInfo => | 
|  | 748     { | 
|  | 749       if (mergeableRulesInfo.length > 1) | 
|  | 750         mergeRulesByArrayProperty(mergeableRulesInfo, "trigger", arrayProperty); | 
|  | 751     }); | 
|  | 752 | 
|  | 753     rulesInfo = rulesInfo.filter(ruleInfo => !ruleInfo.merged); | 
|  | 754   } | 
|  | 755 | 
|  | 756   return rulesInfo.map(ruleInfo => ruleInfo.rule); | 
|  | 757 } | 
|  | 758 | 
| 369 let ContentBlockerList = | 759 let ContentBlockerList = | 
| 370 /** | 760 /** | 
| 371  * Create a new Adblock Plus filter to content blocker list converter | 761  * Create a new Adblock Plus filter to content blocker list converter | 
| 372  * | 762  * | 
| 373  * @constructor | 763  * @constructor | 
| 374  */ | 764  */ | 
| 375 exports.ContentBlockerList = function () | 765 exports.ContentBlockerList = function () | 
| 376 { | 766 { | 
| 377   this.requestFilters = []; | 767   this.requestFilters = []; | 
| 378   this.requestExceptions = []; | 768   this.requestExceptions = []; | 
| (...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 417 | 807 | 
| 418     parseDomains(filter.domains, domains, []); | 808     parseDomains(filter.domains, domains, []); | 
| 419   } | 809   } | 
| 420 }; | 810 }; | 
| 421 | 811 | 
| 422 /** | 812 /** | 
| 423  * Generate content blocker list for all filters that were added | 813  * Generate content blocker list for all filters that were added | 
| 424  * | 814  * | 
| 425  * @returns   {Filter}   filter    Filter to convert | 815  * @returns   {Filter}   filter    Filter to convert | 
| 426  */ | 816  */ | 
| 427 ContentBlockerList.prototype.generateRules = function(filter) | 817 ContentBlockerList.prototype.generateRules = function(options) | 
| 428 { | 818 { | 
|  | 819   const defaultOptions = { | 
|  | 820     merge: false, | 
|  | 821     exhaustiveMerge: false | 
|  | 822   }; | 
|  | 823 | 
|  | 824   options = Object.assign({}, defaultOptions, options); | 
|  | 825 | 
| 429   let rules = []; | 826   let rules = []; | 
| 430 | 827 | 
| 431   let groupedElemhideFilters = new Map(); | 828   let groupedElemhideFilters = new Map(); | 
| 432   for (let filter of this.elemhideFilters) | 829   for (let filter of this.elemhideFilters) | 
| 433   { | 830   { | 
| 434     let result = convertElemHideFilter(filter, this.elemhideSelectorExceptions); | 831     let result = convertElemHideFilter(filter, this.elemhideSelectorExceptions); | 
| 435     if (!result) | 832     if (!result) | 
| 436       continue; | 833       continue; | 
| 437 | 834 | 
| 438     if (result.matchDomains.length == 0) | 835     if (result.matchDomains.length == 0) | 
| (...skipping 26 matching lines...) Expand all  Loading... | 
| 465     } | 862     } | 
| 466   }); | 863   }); | 
| 467 | 864 | 
| 468   for (let filter of this.elemhideExceptions) | 865   for (let filter of this.elemhideExceptions) | 
| 469     convertFilterAddRules(rules, filter, "ignore-previous-rules", false); | 866     convertFilterAddRules(rules, filter, "ignore-previous-rules", false); | 
| 470   for (let filter of this.requestFilters) | 867   for (let filter of this.requestFilters) | 
| 471     convertFilterAddRules(rules, filter, "block", true); | 868     convertFilterAddRules(rules, filter, "block", true); | 
| 472   for (let filter of this.requestExceptions) | 869   for (let filter of this.requestExceptions) | 
| 473     convertFilterAddRules(rules, filter, "ignore-previous-rules", true); | 870     convertFilterAddRules(rules, filter, "ignore-previous-rules", true); | 
| 474 | 871 | 
| 475   return rules.filter(rule => !hasNonASCI(rule)); | 872   rules = rules.filter(rule => !hasNonASCI(rule)); | 
|  | 873 | 
|  | 874   if (options.merge) | 
|  | 875   { | 
|  | 876     let mergeOptions = { | 
|  | 877       exhaustive: options.exhaustiveMerge | 
|  | 878     }; | 
|  | 879 | 
|  | 880     rules = mergeRules(rules, mergeOptions); | 
|  | 881   } | 
|  | 882 | 
|  | 883   return rules; | 
| 476 }; | 884 }; | 
| OLD | NEW | 
|---|