Left: | ||
Right: |
OLD | NEW |
---|---|
1 /* | 1 /* |
2 * This file is part of Adblock Plus <https://adblockplus.org/>, | 2 * This file is part of Adblock Plus <https://adblockplus.org/>, |
3 * Copyright (C) 2006-2017 eyeo GmbH | 3 * Copyright (C) 2006-2017 eyeo GmbH |
4 * | 4 * |
5 * Adblock Plus is free software: you can redistribute it and/or modify | 5 * Adblock Plus is free software: you can redistribute it and/or modify |
6 * it under the terms of the GNU General Public License version 3 as | 6 * it under the terms of the GNU General Public License version 3 as |
7 * published by the Free Software Foundation. | 7 * published by the Free Software Foundation. |
8 * | 8 * |
9 * Adblock Plus is distributed in the hope that it will be useful, | 9 * Adblock Plus is distributed in the hope that it will be useful, |
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of | 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
(...skipping 348 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
359 { | 359 { |
360 newSelector.push(selector.substring(i, pos.start)); | 360 newSelector.push(selector.substring(i, pos.start)); |
361 newSelector.push('[id=', selector.substring(pos.start + 1, pos.end), ']'); | 361 newSelector.push('[id=', selector.substring(pos.start + 1, pos.end), ']'); |
362 i = pos.end; | 362 i = pos.end; |
363 } | 363 } |
364 newSelector.push(selector.substring(i)); | 364 newSelector.push(selector.substring(i)); |
365 | 365 |
366 return newSelector.join(""); | 366 return newSelector.join(""); |
367 } | 367 } |
368 | 368 |
369 function closeMatch(s, t) | |
370 { | |
371 // This function returns an edit operation, one of "substitute", "delete", | |
kzar
2017/05/08 08:13:03
Mind using the JSDoc syntax for the comment explai
Manish Jethani
2017/05/08 23:12:48
Done.
| |
372 // and "insert", along with an index in the source string where the edit must | |
373 // occur in order to arrive at the target string. If the strings are not a | |
374 // close match, it returns null. | |
375 // | |
376 // Two strings are considered to be a close match if they are one edit | |
377 // operation apart. | |
378 // | |
379 // Deletions or insertions of a contiguous range of characters from one | |
380 // string into the other, at the same index, are treated as a single edit. | |
381 // For example, "internal" and "international" are considered to be one edit | |
382 // apart and therefore a close match. | |
383 | |
384 // A few things to note: | |
385 // | |
386 // 1) This function does not care about the format of the input strings. | |
387 // For example, the caller may pass in regular expressions, where "[ab]" | |
388 // and "[bc]" could be considered to be a close match, since the order | |
389 // within the brackets doesn't matter. This function will still return null | |
390 // for this set of inputs since they are two edits apart. | |
391 // | |
392 // 2) To be friendly to calling code that might be passing in regular | |
393 // expressions, this function will simply return null if it encounters a | |
394 // special character (e.g. "\", "?", "+", etc.) in the delta. For example, | |
395 // given "Hello" and "Hello, how are you?", it will return null. | |
396 // | |
397 // 3) If the caller does indeed pass in regular expressions, it must make | |
398 // the important assumption that the parts where two such regular | |
399 // expressions may differ can always be treated as normal strings. For | |
400 // example, "^https?://.*/ads" and "^https?://.*/adv" differ only in the | |
401 // last character, therefore the regular expressions can safely be merged | |
402 // into "^https?://.*/ad[sv]". | |
403 | |
404 let diff = s.length - t.length; | |
405 | |
406 // If target is longer than source, swap them for the purpose of our | |
407 // calculation. | |
408 if (diff < 0) | |
409 { | |
410 let tmp = s; | |
411 s = t; | |
412 t = tmp; | |
413 } | |
414 | |
415 let edit = null; | |
416 | |
417 let i = 0, j = 0; | |
418 | |
419 for (; i < s.length; i++) | |
420 { | |
421 if (s[i] != t[i]) | |
422 break; | |
423 } | |
424 | |
425 for (; j < t.length; j++) | |
426 { | |
427 if (t.length - j == i || s[s.length - j - 1] != t[t.length - j - 1]) | |
kzar
2017/05/08 08:13:02
I find this part hard to grok, could you add a com
Manish Jethani
2017/05/08 23:12:48
Done.
| |
428 break; | |
429 } | |
430 | |
431 if (diff == 0) | |
432 { | |
433 if (t.length - j - i != 1) | |
434 return null; | |
435 } | |
436 else if (i != t.length - j) | |
437 { | |
438 return null; | |
439 } | |
440 | |
441 for (let k = i; k < s.length - j; k++) | |
442 { | |
443 // If there are any special characters in the delta, bail. | |
kzar
2017/05/08 08:13:02
Nit: IMO this comment doesn't add much.
Manish Jethani
2017/05/08 23:12:48
I'm slightly rephrasing here, but since I've added
kzar
2017/05/09 10:05:46
Fair enough. Thanks for those extra comments, they
| |
444 if (s[k] == "." || s[k] == "+" || s[k] == "$" || s[k] == "?" || | |
445 s[k] == "{" || s[k] == "}" || s[k] == "(" || s[k] == ")" || | |
446 s[k] == "[" || s[k] == "]" || s[k] == "\\") | |
447 return null; | |
448 } | |
449 | |
450 if (diff == 0) | |
451 { | |
452 edit = {type: "substitute", index: i}; | |
453 } | |
454 else if (diff > 0) | |
455 { | |
456 edit = {type: "delete", index: i}; | |
457 | |
458 if (diff > 1) | |
459 edit.endIndex = s.length - j; | |
460 } | |
461 else | |
462 { | |
463 edit = {type: "insert", index: i}; | |
464 | |
465 if (diff < -1) | |
466 edit.endIndex = s.length - j; | |
467 } | |
468 | |
469 return edit; | |
470 } | |
471 | |
472 function eliminateRedundantRulesByURLFilter(rulesInfo) | |
473 { | |
474 for (let i = 0; i < rulesInfo.length; i++) | |
475 { | |
476 // If this rule is already marked as redundant, don't bother comparing it | |
477 // with other rules. | |
478 if (rulesInfo[i].redundant) | |
479 continue; | |
480 | |
481 for (let j = i + 1; j < rulesInfo.length; j++) | |
482 { | |
483 if (rulesInfo[j].redundant) | |
484 continue; | |
485 | |
486 let source = rulesInfo[i].rule.trigger["url-filter"]; | |
487 let target = rulesInfo[j].rule.trigger["url-filter"]; | |
488 | |
489 if (source.length >= target.length) | |
490 { | |
491 // If one URL filter is a substring of the other starting at the | |
492 // beginning, the other one is clearly redundant. | |
493 if (source.substring(0, target.length) == target) | |
494 { | |
495 rulesInfo[i].redundant = true; | |
496 break; | |
497 } | |
498 } | |
499 else if (target.substring(0, source.length) == source) | |
500 { | |
501 rulesInfo[j].redundant = true; | |
502 } | |
503 } | |
504 } | |
505 | |
506 return rulesInfo.filter(ruleInfo => !ruleInfo.redundant); | |
507 } | |
508 | |
509 function mergeRulesByURLFilter(rulesInfo, exhaustive) | |
510 { | |
511 // Closely matching rules are likely to be within a certain range. We only | |
512 // look for matches within this range. If we increase this value, it can give | |
513 // us more matches and a smaller resulting rule set, but possibly at a | |
514 // significant performance cost. | |
515 const heuristicRange = 10; | |
kzar
2017/05/08 08:13:03
Could you mention about the behaviour (or lack the
Manish Jethani
2017/05/08 23:12:48
Done.
| |
516 | |
517 if (exhaustive) | |
518 // Throw out obviously redundant rules. | |
kzar
2017/05/08 08:13:02
Nit: Please surround with braces since it spans mu
Manish Jethani
2017/05/08 23:12:48
Done.
| |
519 rulesInfo = eliminateRedundantRulesByURLFilter(rulesInfo); | |
520 | |
521 if (rulesInfo.length <= 1) | |
522 return; | |
523 | |
524 for (let i = 0; i < rulesInfo.length; i++) | |
525 { | |
526 let limit = exhaustive ? rulesInfo.length : | |
527 Math.min(i + heuristicRange, rulesInfo.length); | |
528 | |
529 for (let j = i + 1; j < limit; j++) | |
530 { | |
531 let source = rulesInfo[i].rule.trigger["url-filter"]; | |
532 let target = rulesInfo[j].rule.trigger["url-filter"]; | |
533 | |
534 let edit = closeMatch(source, target); | |
535 | |
536 if (edit) | |
537 { | |
538 let urlFilter, ruleInfo, match = {edit}; | |
539 | |
540 if (edit.type == "insert") | |
541 { | |
542 // Convert the insertion into a deletion and stick it on the target | |
543 // rule instead. We can only group deletions and substitutions; | |
544 // therefore insertions must be treated as deletions on the target | |
545 // rule. | |
546 urlFilter = target; | |
547 ruleInfo = rulesInfo[j]; | |
548 match.index = i; | |
549 edit.type = "delete"; | |
550 } | |
551 else | |
552 { | |
553 urlFilter = source; | |
554 ruleInfo = rulesInfo[i]; | |
555 match.index = j; | |
556 } | |
557 | |
558 // If the edit has an end index, it represents a multiple character | |
559 // edit. | |
560 let multiEdit = !!edit.endIndex; | |
561 | |
562 if (multiEdit) | |
563 { | |
564 // We only care about a single multiple character edit because the | |
565 // number of characters for such a match doesn't matter, we can | |
566 // only merge with one other rule. | |
567 if (!ruleInfo.multiEditMatch) | |
568 ruleInfo.multiEditMatch = match; | |
569 } | |
570 else | |
571 { | |
572 // For single character edits, multiple rules can be merged into | |
573 // one. e.g. "ad", "ads", and "adv" can be merged into "ad[sv]?". | |
574 if (!ruleInfo.matches) | |
575 ruleInfo.matches = new Array(urlFilter.length); | |
576 | |
577 // Matches at a particular index. For example, for a source string | |
578 // "ads", both target strings "ad" (deletion) and "adv" | |
579 // (substitution) match at index 2, hence they are grouped together | |
580 // to possibly be merged later into "ad[sv]?". | |
581 let matchesForIndex = ruleInfo.matches[edit.index]; | |
582 | |
583 if (matchesForIndex) | |
584 { | |
585 matchesForIndex.push(match); | |
586 } | |
587 else | |
588 { | |
589 matchesForIndex = [match]; | |
590 ruleInfo.matches[edit.index] = matchesForIndex; | |
591 } | |
592 | |
593 // Keep track of the best set of matches. We later sort by this to | |
594 // get best results. | |
595 if (!ruleInfo.bestMatches || | |
596 matchesForIndex.length > ruleInfo.bestMatches.length) | |
597 ruleInfo.bestMatches = matchesForIndex; | |
598 } | |
599 } | |
600 } | |
601 } | |
602 | |
603 // Filter out rules that have no matches at all. | |
604 let candidateRulesInfo = rulesInfo.filter(ruleInfo => | |
605 { | |
606 return ruleInfo.bestMatches || ruleInfo.multiEditMatch | |
607 }); | |
608 | |
609 // For best results, we have to sort the candidates by the largest set of | |
610 // matches. | |
611 // | |
612 // For example, we want "ads", "bds", "adv", "bdv", "adx", and "bdx" to | |
613 // generate "ad[svx]" and "bd[svx]" (2 rules), not "[ab]ds", "[ab]dv", and | |
614 // "[ab]dx" (3 rules). | |
615 candidateRulesInfo.sort((ruleInfo1, ruleInfo2) => | |
616 { | |
617 let weight1 = ruleInfo1.bestMatches ? ruleInfo1.bestMatches.length : | |
618 ruleInfo1.multiEditMatch ? 1 : 0; | |
619 let weight2 = ruleInfo2.bestMatches ? ruleInfo2.bestMatches.length : | |
620 ruleInfo2.multiEditMatch ? 1 : 0; | |
621 | |
622 return weight2 - weight1; | |
623 }); | |
624 | |
625 for (let ruleInfo of candidateRulesInfo) | |
626 { | |
627 let rule = ruleInfo.rule; | |
628 | |
629 // If this rule has already been merged into another rule, we skip it. | |
630 if (ruleInfo.merged) | |
631 continue; | |
632 | |
633 // Find the best set of rules to group, which is simply the largest set. | |
634 let best = (ruleInfo.matches || []).reduce((best, matchesForIndex) => | |
635 { | |
636 matchesForIndex = (matchesForIndex || []).filter(match => | |
637 { | |
638 // Filter out rules that have either already been merged into other | |
639 // rules or have had other rules merged into them. | |
640 return !rulesInfo[match.index].merged && | |
641 !rulesInfo[match.index].mergedInto; | |
642 }); | |
643 | |
644 return matchesForIndex.length > best.length ? matchesForIndex : best; | |
645 }, | |
646 []); | |
647 | |
648 let multiEdit = false; | |
649 | |
650 // If we couldn't find a single rule to merge with, let's see if we have a | |
651 // multiple character edit. e.g. we could merge "ad" and "adserver" into | |
652 // "ad(server)?". | |
653 if (best.length == 0 && ruleInfo.multiEditMatch && | |
654 !rulesInfo[ruleInfo.multiEditMatch.index].merged && | |
655 !rulesInfo[ruleInfo.multiEditMatch.index].mergedInto) | |
656 { | |
657 best = [ruleInfo.multiEditMatch]; | |
658 multiEdit = true; | |
659 } | |
660 | |
661 if (best.length > 0) | |
662 { | |
663 let urlFilter = rule.trigger["url-filter"]; | |
664 | |
665 let editIndex = best[0].edit.index; | |
666 | |
667 if (!multiEdit) | |
668 { | |
669 // Merge all the matching rules into this one. | |
670 | |
671 let characters = []; | |
672 let quantifier = ""; | |
673 | |
674 for (let match of best) | |
675 { | |
676 if (match.edit.type == "delete") | |
677 { | |
678 quantifier = "?"; | |
679 } | |
680 else | |
681 { | |
682 let character = rulesInfo[match.index].rule | |
683 .trigger["url-filter"][editIndex]; | |
684 characters.push(character); | |
685 } | |
686 | |
687 // Mark the target rule as merged so other rules don't try to merge | |
688 // it again. | |
689 rulesInfo[match.index].merged = true; | |
690 } | |
691 | |
692 urlFilter = urlFilter.substring(0, editIndex + 1) + quantifier + | |
693 urlFilter.substring(editIndex + 1); | |
694 if (characters.length > 0) | |
695 { | |
696 urlFilter = urlFilter.substring(0, editIndex) + "[" + | |
697 urlFilter[editIndex] + characters.join("") + "]" + | |
698 urlFilter.substring(editIndex + 1); | |
699 } | |
700 } | |
701 else | |
702 { | |
703 let editEndIndex = best[0].edit.endIndex; | |
704 | |
705 // Mark the target rule as merged so other rules don't try to merge it | |
706 // again. | |
707 rulesInfo[best[0].index].merged = true; | |
708 | |
709 urlFilter = urlFilter.substring(0, editIndex) + "(" + | |
710 urlFilter.substring(editIndex, editEndIndex) + ")?" + | |
711 urlFilter.substring(editEndIndex); | |
712 } | |
713 | |
714 rule.trigger["url-filter"] = urlFilter; | |
715 | |
716 // Mark this rule as one that has had other rules merged into it. | |
717 ruleInfo.mergedInto = true; | |
718 } | |
719 } | |
720 } | |
721 | |
722 function mergeRulesByArrayProperty(rulesInfo, propertyType, property) | |
723 { | |
724 if (rulesInfo.length <= 1) | |
725 return; | |
726 | |
727 let set = new Set(); | |
728 | |
729 rulesInfo.forEach((ruleInfo, index) => | |
730 { | |
731 if (ruleInfo.rule[propertyType][property]) | |
732 { | |
733 for (let value of ruleInfo.rule[propertyType][property]) | |
734 set.add(value); | |
735 } | |
736 | |
737 if (index > 0) | |
738 ruleInfo.merged = true; | |
739 }); | |
740 | |
741 if (set.size > 0) | |
742 rulesInfo[0].rule[propertyType][property] = Array.from(set); | |
743 | |
744 rulesInfo[0].mergedInto = true; | |
745 } | |
746 | |
747 function groupRulesByMergeableProperty(rulesInfo, propertyType, property) | |
748 { | |
749 let mergeableRulesInfoByGroup = new Map(); | |
750 | |
751 rulesInfo.forEach(ruleInfo => | |
752 { | |
753 let copy = { | |
754 trigger: Object.assign({}, ruleInfo.rule.trigger), | |
755 action: Object.assign({}, ruleInfo.rule.action) | |
756 }; | |
757 | |
758 delete copy[propertyType][property]; | |
759 | |
760 let groupKey = JSON.stringify(copy); | |
761 | |
762 let mergeableRulesInfo = mergeableRulesInfoByGroup.get(groupKey); | |
763 | |
764 if (mergeableRulesInfo) | |
765 mergeableRulesInfo.push(ruleInfo); | |
766 else | |
767 mergeableRulesInfoByGroup.set(groupKey, [ruleInfo]); | |
768 }); | |
769 | |
770 return mergeableRulesInfoByGroup; | |
771 } | |
772 | |
773 function mergeRules(rules, options) | |
774 { | |
775 const defaultOptions = {exhaustive: false}; | |
776 | |
777 options = Object.assign({}, defaultOptions, options); | |
778 | |
779 let rulesInfo = rules.map(rule => ({rule})); | |
kzar
2017/05/08 08:13:02
What's the purpose of this line?
Manish Jethani
2017/05/08 14:03:59
The purpose of this line is to create one "ruleInf
kzar
2017/05/09 10:05:46
Argh, sorry that was an especially dumb question.
Manish Jethani
2017/05/09 15:52:46
Haha, no worries :)
| |
780 | |
781 groupRulesByMergeableProperty(rulesInfo, "trigger", "url-filter") | |
782 .forEach(mergeableRulesInfo => | |
kzar
2017/05/08 08:13:03
Any reason you used forEach instead of for ... of
Manish Jethani
2017/05/08 14:04:00
It was for (let value of map.values()) { ... } at
kzar
2017/05/09 10:05:46
Well groupRulesByMergeableProperty creates the arr
Manish Jethani
2017/05/09 15:52:46
The intermediate array here would an array of the
| |
783 { | |
784 if (mergeableRulesInfo.length > 1) | |
785 mergeRulesByURLFilter(mergeableRulesInfo, options.exhaustive); | |
786 }); | |
787 | |
788 // Filter out rules that are redundant or have been merged into other rules. | |
789 rulesInfo = rulesInfo.filter(ruleInfo => !ruleInfo.redundant && | |
790 !ruleInfo.merged); | |
791 | |
792 for (let arrayProperty of ["resource-type", "if-domain"]) | |
793 { | |
794 groupRulesByMergeableProperty(rulesInfo, "trigger", arrayProperty) | |
795 .forEach(mergeableRulesInfo => | |
kzar
2017/05/08 08:13:02
This logic seems pretty much the same as above, I
Manish Jethani
2017/05/08 14:03:59
I suppose we could merge them into one, but then w
kzar
2017/05/09 10:05:46
Acknowledged.
| |
796 { | |
797 if (mergeableRulesInfo.length > 1) | |
798 mergeRulesByArrayProperty(mergeableRulesInfo, "trigger", arrayProperty); | |
799 }); | |
800 | |
801 rulesInfo = rulesInfo.filter(ruleInfo => !ruleInfo.merged); | |
kzar
2017/05/08 08:13:02
Maybe a dumb question, but instead of setting the
Manish Jethani
2017/05/08 14:03:58
I could try that, but then we'd have to adjust the
kzar
2017/05/09 10:05:46
Acknowledged.
| |
802 } | |
803 | |
804 return rulesInfo.map(ruleInfo => ruleInfo.rule); | |
805 } | |
806 | |
369 let ContentBlockerList = | 807 let ContentBlockerList = |
370 /** | 808 /** |
371 * Create a new Adblock Plus filter to content blocker list converter | 809 * Create a new Adblock Plus filter to content blocker list converter |
372 * | 810 * |
373 * @constructor | 811 * @constructor |
374 */ | 812 */ |
375 exports.ContentBlockerList = function () | 813 exports.ContentBlockerList = function () |
376 { | 814 { |
377 this.requestFilters = []; | 815 this.requestFilters = []; |
378 this.requestExceptions = []; | 816 this.requestExceptions = []; |
(...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
417 | 855 |
418 parseDomains(filter.domains, domains, []); | 856 parseDomains(filter.domains, domains, []); |
419 } | 857 } |
420 }; | 858 }; |
421 | 859 |
422 /** | 860 /** |
423 * Generate content blocker list for all filters that were added | 861 * Generate content blocker list for all filters that were added |
424 * | 862 * |
425 * @returns {Filter} filter Filter to convert | 863 * @returns {Filter} filter Filter to convert |
426 */ | 864 */ |
427 ContentBlockerList.prototype.generateRules = function(filter) | 865 ContentBlockerList.prototype.generateRules = function(options) |
428 { | 866 { |
867 const defaultOptions = { | |
868 merge: false, | |
869 exhaustiveMerge: false | |
870 }; | |
871 | |
872 options = Object.assign({}, defaultOptions, options); | |
873 | |
429 let rules = []; | 874 let rules = []; |
430 | 875 |
431 let groupedElemhideFilters = new Map(); | 876 let groupedElemhideFilters = new Map(); |
432 for (let filter of this.elemhideFilters) | 877 for (let filter of this.elemhideFilters) |
433 { | 878 { |
434 let result = convertElemHideFilter(filter, this.elemhideSelectorExceptions); | 879 let result = convertElemHideFilter(filter, this.elemhideSelectorExceptions); |
435 if (!result) | 880 if (!result) |
436 continue; | 881 continue; |
437 | 882 |
438 if (result.matchDomains.length == 0) | 883 if (result.matchDomains.length == 0) |
(...skipping 26 matching lines...) Expand all Loading... | |
465 } | 910 } |
466 }); | 911 }); |
467 | 912 |
468 for (let filter of this.elemhideExceptions) | 913 for (let filter of this.elemhideExceptions) |
469 convertFilterAddRules(rules, filter, "ignore-previous-rules", false); | 914 convertFilterAddRules(rules, filter, "ignore-previous-rules", false); |
470 for (let filter of this.requestFilters) | 915 for (let filter of this.requestFilters) |
471 convertFilterAddRules(rules, filter, "block", true); | 916 convertFilterAddRules(rules, filter, "block", true); |
472 for (let filter of this.requestExceptions) | 917 for (let filter of this.requestExceptions) |
473 convertFilterAddRules(rules, filter, "ignore-previous-rules", true); | 918 convertFilterAddRules(rules, filter, "ignore-previous-rules", true); |
474 | 919 |
475 return rules.filter(rule => !hasNonASCI(rule)); | 920 rules = rules.filter(rule => !hasNonASCI(rule)); |
921 | |
922 if (options.merge) | |
923 { | |
924 let mergeOptions = { | |
kzar
2017/05/08 08:13:02
Couldn't you just pass options.exhaustiveMerge str
Manish Jethani
2017/05/08 23:12:48
Done.
| |
925 exhaustive: options.exhaustiveMerge | |
926 }; | |
927 | |
928 rules = mergeRules(rules, mergeOptions); | |
929 } | |
930 | |
931 return rules; | |
476 }; | 932 }; |
OLD | NEW |