Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Side by Side Diff: lib/abp2blocklist.js

Issue 29426594: Issue 3673 - Merge closely matching rules (Closed) Base URL: https://hg.adblockplus.org/abp2blocklist
Patch Set: Add advanced merge support Created May 3, 2017, 4:44 a.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View unified diff | Download patch
« no previous file with comments | « abp2blocklist.js ('k') | test/abp2blocklist.js » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * This file is part of Adblock Plus <https://adblockplus.org/>, 2 * This file is part of Adblock Plus <https://adblockplus.org/>,
3 * Copyright (C) 2006-2017 eyeo GmbH 3 * Copyright (C) 2006-2017 eyeo GmbH
4 * 4 *
5 * Adblock Plus is free software: you can redistribute it and/or modify 5 * Adblock Plus is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 3 as 6 * it under the terms of the GNU General Public License version 3 as
7 * published by the Free Software Foundation. 7 * published by the Free Software Foundation.
8 * 8 *
9 * Adblock Plus is distributed in the hope that it will be useful, 9 * Adblock Plus is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
(...skipping 348 matching lines...) Expand 10 before | Expand all | Expand 10 after
359 { 359 {
360 newSelector.push(selector.substring(i, pos.start)); 360 newSelector.push(selector.substring(i, pos.start));
361 newSelector.push('[id=', selector.substring(pos.start + 1, pos.end), ']'); 361 newSelector.push('[id=', selector.substring(pos.start + 1, pos.end), ']');
362 i = pos.end; 362 i = pos.end;
363 } 363 }
364 newSelector.push(selector.substring(i)); 364 newSelector.push(selector.substring(i));
365 365
366 return newSelector.join(""); 366 return newSelector.join("");
367 } 367 }
368 368
369 function closeMatch(s, t, {multi = false} = {})
kzar 2017/05/03 11:17:24 I've not seen this syntax before `{multi = false}
Manish Jethani 2017/05/03 14:41:54 This: function func(param1, param2, {option1 =
kzar 2017/05/03 15:19:04 Acknowledged.
370 {
371 // This function returns an edit operation (one of "substitute", "delete",
372 // and "insert") along with an index in the source string where the edit
373 // should occur in order to arrive at the target string.
374
375 let diff = s.length - t.length;
376
377 // If the string lenghts differ by more than one character, we cannot arrive
kzar 2017/05/03 11:17:24 Nit: Typo "lenghts".
Manish Jethani 2017/05/04 02:49:32 Done.
378 // at target from source in a single edit operation.
379 if (!multi && (diff < -1 || diff > 1))
380 return null;
381
382 // If target is longer than source, swap them for the purpose of our
383 // calculation.
384 if (diff < 0)
385 {
386 let tmp = s;
387 s = t;
388 t = tmp;
389 }
390
391 let edit = null;
392 let multiEdit = false;
393
394 let j = 0;
395
396 for (let i = 0; i < s.length; i++)
397 {
398 if (s[i] == t[j])
399 {
400 j++;
401
402 if (edit && multiEdit && !edit.closeIndex)
403 edit.closeIndex = i;
404 }
405 else if (edit && (!multi || diff == 0 || edit.closeIndex))
406 {
407 // Since we want one and only one edit operation, we must bail here.
408 return null;
409 }
410 else if ((s[i] == "." || s[i] == "+" || s[i] == "$" || s[i] == "?" ||
411 s[i] == "{" || s[i] == "}" || s[i] == "(" || s[i] == ")" ||
412 s[i] == "[" || s[i] == "]" || s[i] == "\\") ||
413 (t[j] == "." || t[j] == "+" || t[j] == "$" || t[j] == "?" ||
414 t[j] == "{" || t[j] == "}" || t[j] == "(" || t[j] == ")" ||
415 t[j] == "[" || t[j] == "]" || t[j] == "\\"))
416 {
417 // We don't deal with special characters for now.
kzar 2017/05/03 11:17:24 So we skip special characters in the url-filter re
Manish Jethani 2017/05/03 14:41:54 The above is not a good example because this is no
kzar 2017/05/03 15:19:04 Maybe add a comment explaining that assumption?
Manish Jethani 2017/05/04 02:49:32 Added a comment to explain this.
418 return null;
419 }
420 else
421 {
422 if (diff == 0)
kzar 2017/05/03 11:17:24 Nit: Couldn't this be an `else if` too?
Manish Jethani 2017/05/04 02:49:32 Done.
423 {
424 // If both strings are equal in length, this is a substitution.
425 edit = {type: "substitute", index: i};
426 j++;
427 }
428 else
429 {
430 if (edit)
431 multiEdit = true;
432 else if (diff > 0)
kzar 2017/05/03 11:17:24 Nit: Please use braces since the clause spans mult
Manish Jethani 2017/05/04 02:49:32 Done.
433 // If the source string is longer, this is a deletion.
434 edit = {type: "delete", index: i};
435 else
436 edit = {type: "insert", index: i};
437 }
438 }
439 }
440
441 if (edit && multiEdit && !edit.closeIndex)
442 {
443 if (j < t.length)
444 return null;
445
446 edit.closeIndex = s.length;
447 }
448
449 return edit;
450 }
451
452 function ruleWithoutURLFilter(rule)
453 {
454 let copy = {
kzar 2017/05/03 15:19:04 How about `return Object.create(rule, {"url-filter
Manish Jethani 2017/05/04 02:49:31 That would not work for multiple reasons, but most
455 trigger: Object.assign({}, rule.trigger),
456 action: Object.assign({}, rule.action)
457 };
458
459 delete copy.trigger["url-filter"];
460
461 return copy;
462 }
463
464 function mergeCloselyMatchingRules(rules, {multi = false} = {})
465 {
466 // Closely matching rules are likely to be within a certain range. We only
467 // look for matches within this range. If we increase this value, it can give
468 // us more matches and a smaller resulting rule set, but possibly at a
469 // significant performance cost.
470 const heuristicRange = 100;
kzar 2017/05/03 15:19:04 Since the code either runs in a place where speed
Manish Jethani 2017/05/04 02:49:32 In the latest update the generateRules function ta
471
472 let rulesInfo = new Array(rules.length);
473
474 rules.forEach((rule, index) =>
475 {
476 rulesInfo[index] = {rule};
kzar 2017/05/03 11:17:24 I'm not sure syntax like this will work for Safari
Manish Jethani 2017/05/03 14:41:54 I'll check, but if it doesn't work then I'll have
kzar 2017/05/08 08:13:02 You mentioned testing the code on Safari now, but
Manish Jethani 2017/05/08 14:03:58 I've been testing with Safari 10. Anyway, this is
kzar 2017/05/09 10:05:46 I think you should test with Safari 9 at least onc
Manish Jethani 2017/05/09 15:52:46 "{rule: rule}" ought to work in every single JS en
477
478 if (rule.action.type == "ignore-previous-rules")
479 {
480 rulesInfo[index].skip = true;
481 }
482 else
483 {
484 // Save a stringified version of the rule, but without the URL filter. We
485 // use this for comparison later.
486 rulesInfo[index].stringifiedWithoutURLFilter =
487 JSON.stringify(ruleWithoutURLFilter(rule));
488 }
489 });
490
491 for (let i = 0; i < rules.length; i++)
492 {
493 if (rulesInfo[i].skip)
494 continue;
495
496 for (let j = i + 1; j < i + heuristicRange && j < rules.length; j++)
497 {
498 if (rulesInfo[j].skip)
499 continue;
500
501 // Check if the rules are identical except for the URL filter.
502 if (rulesInfo[i].stringifiedWithoutURLFilter ==
kzar 2017/05/03 15:19:04 I wonder if we could create a lookup table stringi
Manish Jethani 2017/05/04 02:49:32 I'm not sure what the benefit of that would be. W
503 rulesInfo[j].stringifiedWithoutURLFilter)
504 {
505 let source = rules[i].trigger["url-filter"];
506 let target = rules[j].trigger["url-filter"];
507
508 let edit = closeMatch(source, target, {multi});
509
510 if (edit)
511 {
512 let urlFilter, ruleInfo, match = {edit};
513
514 if (edit.type == "insert")
515 {
516 // Convert the insertion into a deletion and stick it on the target
517 // rule instead. We can only group deletions and substitutions;
518 // therefore insertions must be treated as deletions on the target
519 // rule, to be dealt with later.
520 urlFilter = target;
521 ruleInfo = rulesInfo[j];
522 match.index = i;
523 edit.type = "delete";
524 }
525 else
526 {
527 urlFilter = source;
528 ruleInfo = rulesInfo[i];
529 match.index = j;
530 }
531
532 if (edit.closeIndex)
533 {
534 if (!ruleInfo.multiEditMatch)
535 ruleInfo.multiEditMatch = match;
536 }
537 else
538 {
539 if (!ruleInfo.matches)
540 ruleInfo.matches = new Array(urlFilter.length + 1);
541
542 let matchesForIndex = ruleInfo.matches[edit.index];
543
544 if (matchesForIndex)
545 {
546 matchesForIndex.push(match);
547 }
548 else
549 {
550 matchesForIndex = [match];
551 ruleInfo.matches[edit.index] = matchesForIndex;
552 }
553
554 if (!ruleInfo.bestMatches ||
555 matchesForIndex.length > ruleInfo.bestMatches.length)
556 ruleInfo.bestMatches = matchesForIndex;
557 }
558 }
559 }
560 }
561 }
562
563 let candidateRulesInfo = rulesInfo.filter(ruleInfo => ruleInfo.bestMatches ||
564 ruleInfo.multiEditMatch) ;
kzar 2017/05/03 11:17:24 Nit: Long line.
Manish Jethani 2017/05/04 02:49:31 Done.
565
566 // For best results, we have to sort the candidates by the number of matches.
567 // For example, we want "ads", "bds", "adv", "bdv", and "bdx" to generate
568 // "ad[sv]" and "bd[svx]" (2 rules), not "[ab]ds", "[ab]dv", and "bdx" (3
569 // rules).
570 candidateRulesInfo.sort((ruleInfo1, ruleInfo2) =>
571 {
572 let weight1 = 1;
573 let weight2 = 1;
574
575 if (ruleInfo1.bestMatches)
576 weight1 = ruleInfo1.bestMatches.length;
577
578 if (ruleInfo2.bestMatches)
579 weight2 = ruleInfo2.bestMatches.length;
580
581 return weight2 - weight1;
582 });
583
584 for (let ruleInfo of candidateRulesInfo)
585 {
586 let rule = ruleInfo.rule;
587
588 if (rule._merged)
589 continue;
590
591 // Find the best set of rules to group, which is simply the largest set.
592 let best = (ruleInfo.matches || []).reduce((best, matchesForIndex) =>
593 {
594 matchesForIndex = (matchesForIndex || []).filter(match =>
595 {
596 // Filter out rules that have either already been merged into other
597 // rules or have had other rules merged into them.
598 return !rules[match.index]._merged &&
599 !rulesInfo[match.index].mergedInto;
600 });
601
602 return matchesForIndex.length > best.length ? matchesForIndex : best;
603 },
604 []);
605
606 if (best.length == 0 && ruleInfo.multiEditMatch &&
607 !rules[ruleInfo.multiEditMatch.index]._merged &&
608 !rulesInfo[ruleInfo.multiEditMatch.index].mergedInto)
609 best = [ruleInfo.multiEditMatch];
610
611 if (best.length > 0)
612 {
613 let urlFilter = rule.trigger["url-filter"];
614
615 let editIndex = best[0].edit.index;
616
617 if (best[0] != ruleInfo.multiEditMatch)
618 {
619 // Merge all the matching rules into this one.
620
621 let characters = [];
622 let quantifier = "";
623
624 for (let match of best)
625 {
626 if (match.edit.type == "delete")
627 quantifier = "?";
628 else
629 characters.push(rules[match.index].trigger["url-filter"][editIndex]) ;
630
631 rules[match.index]._merged = true;
632 }
633
634 urlFilter = urlFilter.substring(0, editIndex + 1) + quantifier +
635 urlFilter.substring(editIndex + 1);
636 if (characters.length > 0)
637 {
638 urlFilter = urlFilter.substring(0, editIndex) + "[" +
639 urlFilter[editIndex] + characters.join("") + "]" +
640 urlFilter.substring(editIndex + 1);
641 }
642 }
643 else
644 {
645 let editCloseIndex = best[0].edit.closeIndex;
646
647 rules[best[0].index]._merged = true;
648
649 urlFilter = urlFilter.substring(0, editIndex) + "(" +
650 urlFilter.substring(editIndex, editCloseIndex) + ")?" +
651 urlFilter.substring(editCloseIndex);
652 }
653
654 rule.trigger["url-filter"] = urlFilter;
655
656 ruleInfo.mergedInto = true;
657 }
658 }
659
660 return rules.filter(rule => !rule._merged);
661 }
662
369 let ContentBlockerList = 663 let ContentBlockerList =
370 /** 664 /**
371 * Create a new Adblock Plus filter to content blocker list converter 665 * Create a new Adblock Plus filter to content blocker list converter
372 * 666 *
373 * @constructor 667 * @constructor
374 */ 668 */
375 exports.ContentBlockerList = function () 669 exports.ContentBlockerList = function ()
376 { 670 {
377 this.requestFilters = []; 671 this.requestFilters = [];
378 this.requestExceptions = []; 672 this.requestExceptions = [];
(...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after
417 711
418 parseDomains(filter.domains, domains, []); 712 parseDomains(filter.domains, domains, []);
419 } 713 }
420 }; 714 };
421 715
422 /** 716 /**
423 * Generate content blocker list for all filters that were added 717 * Generate content blocker list for all filters that were added
424 * 718 *
425 * @returns {Filter} filter Filter to convert 719 * @returns {Filter} filter Filter to convert
426 */ 720 */
427 ContentBlockerList.prototype.generateRules = function(filter) 721 ContentBlockerList.prototype.generateRules = function(
722 {merge = false, multiMerge = false} = {})
428 { 723 {
429 let rules = []; 724 let rules = [];
430 725
431 let groupedElemhideFilters = new Map(); 726 let groupedElemhideFilters = new Map();
432 for (let filter of this.elemhideFilters) 727 for (let filter of this.elemhideFilters)
433 { 728 {
434 let result = convertElemHideFilter(filter, this.elemhideSelectorExceptions); 729 let result = convertElemHideFilter(filter, this.elemhideSelectorExceptions);
435 if (!result) 730 if (!result)
436 continue; 731 continue;
437 732
(...skipping 27 matching lines...) Expand all
465 } 760 }
466 }); 761 });
467 762
468 for (let filter of this.elemhideExceptions) 763 for (let filter of this.elemhideExceptions)
469 convertFilterAddRules(rules, filter, "ignore-previous-rules", false); 764 convertFilterAddRules(rules, filter, "ignore-previous-rules", false);
470 for (let filter of this.requestFilters) 765 for (let filter of this.requestFilters)
471 convertFilterAddRules(rules, filter, "block", true); 766 convertFilterAddRules(rules, filter, "block", true);
472 for (let filter of this.requestExceptions) 767 for (let filter of this.requestExceptions)
473 convertFilterAddRules(rules, filter, "ignore-previous-rules", true); 768 convertFilterAddRules(rules, filter, "ignore-previous-rules", true);
474 769
475 return rules.filter(rule => !hasNonASCI(rule)); 770 rules = rules.filter(rule => !hasNonASCI(rule));
771
772 if (merge)
773 rules = mergeCloselyMatchingRules(rules, {multi: multiMerge});
774
775 return rules;
476 }; 776 };
OLDNEW
« no previous file with comments | « abp2blocklist.js ('k') | test/abp2blocklist.js » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld