Left: | ||
Right: |
OLD | NEW |
---|---|
1 /* | 1 /* |
2 * This file is part of Adblock Plus <https://adblockplus.org/>, | 2 * This file is part of Adblock Plus <https://adblockplus.org/>, |
3 * Copyright (C) 2006-2017 eyeo GmbH | 3 * Copyright (C) 2006-2017 eyeo GmbH |
4 * | 4 * |
5 * Adblock Plus is free software: you can redistribute it and/or modify | 5 * Adblock Plus is free software: you can redistribute it and/or modify |
6 * it under the terms of the GNU General Public License version 3 as | 6 * it under the terms of the GNU General Public License version 3 as |
7 * published by the Free Software Foundation. | 7 * published by the Free Software Foundation. |
8 * | 8 * |
9 * Adblock Plus is distributed in the hope that it will be useful, | 9 * Adblock Plus is distributed in the hope that it will be useful, |
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of | 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
(...skipping 348 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
359 { | 359 { |
360 newSelector.push(selector.substring(i, pos.start)); | 360 newSelector.push(selector.substring(i, pos.start)); |
361 newSelector.push('[id=', selector.substring(pos.start + 1, pos.end), ']'); | 361 newSelector.push('[id=', selector.substring(pos.start + 1, pos.end), ']'); |
362 i = pos.end; | 362 i = pos.end; |
363 } | 363 } |
364 newSelector.push(selector.substring(i)); | 364 newSelector.push(selector.substring(i)); |
365 | 365 |
366 return newSelector.join(""); | 366 return newSelector.join(""); |
367 } | 367 } |
368 | 368 |
369 function closeMatch(s, t, {multi = false} = {}) | |
kzar
2017/05/03 11:17:24
I've not seen this syntax before `{multi = false}
Manish Jethani
2017/05/03 14:41:54
This:
function func(param1, param2, {option1 =
kzar
2017/05/03 15:19:04
Acknowledged.
| |
370 { | |
371 // This function returns an edit operation (one of "substitute", "delete", | |
372 // and "insert") along with an index in the source string where the edit | |
373 // should occur in order to arrive at the target string. | |
374 | |
375 let diff = s.length - t.length; | |
376 | |
377 // If the string lenghts differ by more than one character, we cannot arrive | |
kzar
2017/05/03 11:17:24
Nit: Typo "lenghts".
Manish Jethani
2017/05/04 02:49:32
Done.
| |
378 // at target from source in a single edit operation. | |
379 if (!multi && (diff < -1 || diff > 1)) | |
380 return null; | |
381 | |
382 // If target is longer than source, swap them for the purpose of our | |
383 // calculation. | |
384 if (diff < 0) | |
385 { | |
386 let tmp = s; | |
387 s = t; | |
388 t = tmp; | |
389 } | |
390 | |
391 let edit = null; | |
392 let multiEdit = false; | |
393 | |
394 let j = 0; | |
395 | |
396 for (let i = 0; i < s.length; i++) | |
397 { | |
398 if (s[i] == t[j]) | |
399 { | |
400 j++; | |
401 | |
402 if (edit && multiEdit && !edit.closeIndex) | |
403 edit.closeIndex = i; | |
404 } | |
405 else if (edit && (!multi || diff == 0 || edit.closeIndex)) | |
406 { | |
407 // Since we want one and only one edit operation, we must bail here. | |
408 return null; | |
409 } | |
410 else if ((s[i] == "." || s[i] == "+" || s[i] == "$" || s[i] == "?" || | |
411 s[i] == "{" || s[i] == "}" || s[i] == "(" || s[i] == ")" || | |
412 s[i] == "[" || s[i] == "]" || s[i] == "\\") || | |
413 (t[j] == "." || t[j] == "+" || t[j] == "$" || t[j] == "?" || | |
414 t[j] == "{" || t[j] == "}" || t[j] == "(" || t[j] == ")" || | |
415 t[j] == "[" || t[j] == "]" || t[j] == "\\")) | |
416 { | |
417 // We don't deal with special characters for now. | |
kzar
2017/05/03 11:17:24
So we skip special characters in the url-filter re
Manish Jethani
2017/05/03 14:41:54
The above is not a good example because this is no
kzar
2017/05/03 15:19:04
Maybe add a comment explaining that assumption?
Manish Jethani
2017/05/04 02:49:32
Added a comment to explain this.
| |
418 return null; | |
419 } | |
420 else | |
421 { | |
422 if (diff == 0) | |
kzar
2017/05/03 11:17:24
Nit: Couldn't this be an `else if` too?
Manish Jethani
2017/05/04 02:49:32
Done.
| |
423 { | |
424 // If both strings are equal in length, this is a substitution. | |
425 edit = {type: "substitute", index: i}; | |
426 j++; | |
427 } | |
428 else | |
429 { | |
430 if (edit) | |
431 multiEdit = true; | |
432 else if (diff > 0) | |
kzar
2017/05/03 11:17:24
Nit: Please use braces since the clause spans mult
Manish Jethani
2017/05/04 02:49:32
Done.
| |
433 // If the source string is longer, this is a deletion. | |
434 edit = {type: "delete", index: i}; | |
435 else | |
436 edit = {type: "insert", index: i}; | |
437 } | |
438 } | |
439 } | |
440 | |
441 if (edit && multiEdit && !edit.closeIndex) | |
442 { | |
443 if (j < t.length) | |
444 return null; | |
445 | |
446 edit.closeIndex = s.length; | |
447 } | |
448 | |
449 return edit; | |
450 } | |
451 | |
452 function ruleWithoutURLFilter(rule) | |
453 { | |
454 let copy = { | |
kzar
2017/05/03 15:19:04
How about `return Object.create(rule, {"url-filter
Manish Jethani
2017/05/04 02:49:31
That would not work for multiple reasons, but most
| |
455 trigger: Object.assign({}, rule.trigger), | |
456 action: Object.assign({}, rule.action) | |
457 }; | |
458 | |
459 delete copy.trigger["url-filter"]; | |
460 | |
461 return copy; | |
462 } | |
463 | |
464 function mergeCloselyMatchingRules(rules, {multi = false} = {}) | |
465 { | |
466 // Closely matching rules are likely to be within a certain range. We only | |
467 // look for matches within this range. If we increase this value, it can give | |
468 // us more matches and a smaller resulting rule set, but possibly at a | |
469 // significant performance cost. | |
470 const heuristicRange = 100; | |
kzar
2017/05/03 15:19:04
Since the code either runs in a place where speed
Manish Jethani
2017/05/04 02:49:32
In the latest update the generateRules function ta
| |
471 | |
472 let rulesInfo = new Array(rules.length); | |
473 | |
474 rules.forEach((rule, index) => | |
475 { | |
476 rulesInfo[index] = {rule}; | |
kzar
2017/05/03 11:17:24
I'm not sure syntax like this will work for Safari
Manish Jethani
2017/05/03 14:41:54
I'll check, but if it doesn't work then I'll have
kzar
2017/05/08 08:13:02
You mentioned testing the code on Safari now, but
Manish Jethani
2017/05/08 14:03:58
I've been testing with Safari 10.
Anyway, this is
kzar
2017/05/09 10:05:46
I think you should test with Safari 9 at least onc
Manish Jethani
2017/05/09 15:52:46
"{rule: rule}" ought to work in every single JS en
| |
477 | |
478 if (rule.action.type == "ignore-previous-rules") | |
479 { | |
480 rulesInfo[index].skip = true; | |
481 } | |
482 else | |
483 { | |
484 // Save a stringified version of the rule, but without the URL filter. We | |
485 // use this for comparison later. | |
486 rulesInfo[index].stringifiedWithoutURLFilter = | |
487 JSON.stringify(ruleWithoutURLFilter(rule)); | |
488 } | |
489 }); | |
490 | |
491 for (let i = 0; i < rules.length; i++) | |
492 { | |
493 if (rulesInfo[i].skip) | |
494 continue; | |
495 | |
496 for (let j = i + 1; j < i + heuristicRange && j < rules.length; j++) | |
497 { | |
498 if (rulesInfo[j].skip) | |
499 continue; | |
500 | |
501 // Check if the rules are identical except for the URL filter. | |
502 if (rulesInfo[i].stringifiedWithoutURLFilter == | |
kzar
2017/05/03 15:19:04
I wonder if we could create a lookup table stringi
Manish Jethani
2017/05/04 02:49:32
I'm not sure what the benefit of that would be.
W
| |
503 rulesInfo[j].stringifiedWithoutURLFilter) | |
504 { | |
505 let source = rules[i].trigger["url-filter"]; | |
506 let target = rules[j].trigger["url-filter"]; | |
507 | |
508 let edit = closeMatch(source, target, {multi}); | |
509 | |
510 if (edit) | |
511 { | |
512 let urlFilter, ruleInfo, match = {edit}; | |
513 | |
514 if (edit.type == "insert") | |
515 { | |
516 // Convert the insertion into a deletion and stick it on the target | |
517 // rule instead. We can only group deletions and substitutions; | |
518 // therefore insertions must be treated as deletions on the target | |
519 // rule, to be dealt with later. | |
520 urlFilter = target; | |
521 ruleInfo = rulesInfo[j]; | |
522 match.index = i; | |
523 edit.type = "delete"; | |
524 } | |
525 else | |
526 { | |
527 urlFilter = source; | |
528 ruleInfo = rulesInfo[i]; | |
529 match.index = j; | |
530 } | |
531 | |
532 if (edit.closeIndex) | |
533 { | |
534 if (!ruleInfo.multiEditMatch) | |
535 ruleInfo.multiEditMatch = match; | |
536 } | |
537 else | |
538 { | |
539 if (!ruleInfo.matches) | |
540 ruleInfo.matches = new Array(urlFilter.length + 1); | |
541 | |
542 let matchesForIndex = ruleInfo.matches[edit.index]; | |
543 | |
544 if (matchesForIndex) | |
545 { | |
546 matchesForIndex.push(match); | |
547 } | |
548 else | |
549 { | |
550 matchesForIndex = [match]; | |
551 ruleInfo.matches[edit.index] = matchesForIndex; | |
552 } | |
553 | |
554 if (!ruleInfo.bestMatches || | |
555 matchesForIndex.length > ruleInfo.bestMatches.length) | |
556 ruleInfo.bestMatches = matchesForIndex; | |
557 } | |
558 } | |
559 } | |
560 } | |
561 } | |
562 | |
563 let candidateRulesInfo = rulesInfo.filter(ruleInfo => ruleInfo.bestMatches || | |
564 ruleInfo.multiEditMatch) ; | |
kzar
2017/05/03 11:17:24
Nit: Long line.
Manish Jethani
2017/05/04 02:49:31
Done.
| |
565 | |
566 // For best results, we have to sort the candidates by the number of matches. | |
567 // For example, we want "ads", "bds", "adv", "bdv", and "bdx" to generate | |
568 // "ad[sv]" and "bd[svx]" (2 rules), not "[ab]ds", "[ab]dv", and "bdx" (3 | |
569 // rules). | |
570 candidateRulesInfo.sort((ruleInfo1, ruleInfo2) => | |
571 { | |
572 let weight1 = 1; | |
573 let weight2 = 1; | |
574 | |
575 if (ruleInfo1.bestMatches) | |
576 weight1 = ruleInfo1.bestMatches.length; | |
577 | |
578 if (ruleInfo2.bestMatches) | |
579 weight2 = ruleInfo2.bestMatches.length; | |
580 | |
581 return weight2 - weight1; | |
582 }); | |
583 | |
584 for (let ruleInfo of candidateRulesInfo) | |
585 { | |
586 let rule = ruleInfo.rule; | |
587 | |
588 if (rule._merged) | |
589 continue; | |
590 | |
591 // Find the best set of rules to group, which is simply the largest set. | |
592 let best = (ruleInfo.matches || []).reduce((best, matchesForIndex) => | |
593 { | |
594 matchesForIndex = (matchesForIndex || []).filter(match => | |
595 { | |
596 // Filter out rules that have either already been merged into other | |
597 // rules or have had other rules merged into them. | |
598 return !rules[match.index]._merged && | |
599 !rulesInfo[match.index].mergedInto; | |
600 }); | |
601 | |
602 return matchesForIndex.length > best.length ? matchesForIndex : best; | |
603 }, | |
604 []); | |
605 | |
606 if (best.length == 0 && ruleInfo.multiEditMatch && | |
607 !rules[ruleInfo.multiEditMatch.index]._merged && | |
608 !rulesInfo[ruleInfo.multiEditMatch.index].mergedInto) | |
609 best = [ruleInfo.multiEditMatch]; | |
610 | |
611 if (best.length > 0) | |
612 { | |
613 let urlFilter = rule.trigger["url-filter"]; | |
614 | |
615 let editIndex = best[0].edit.index; | |
616 | |
617 if (best[0] != ruleInfo.multiEditMatch) | |
618 { | |
619 // Merge all the matching rules into this one. | |
620 | |
621 let characters = []; | |
622 let quantifier = ""; | |
623 | |
624 for (let match of best) | |
625 { | |
626 if (match.edit.type == "delete") | |
627 quantifier = "?"; | |
628 else | |
629 characters.push(rules[match.index].trigger["url-filter"][editIndex]) ; | |
630 | |
631 rules[match.index]._merged = true; | |
632 } | |
633 | |
634 urlFilter = urlFilter.substring(0, editIndex + 1) + quantifier + | |
635 urlFilter.substring(editIndex + 1); | |
636 if (characters.length > 0) | |
637 { | |
638 urlFilter = urlFilter.substring(0, editIndex) + "[" + | |
639 urlFilter[editIndex] + characters.join("") + "]" + | |
640 urlFilter.substring(editIndex + 1); | |
641 } | |
642 } | |
643 else | |
644 { | |
645 let editCloseIndex = best[0].edit.closeIndex; | |
646 | |
647 rules[best[0].index]._merged = true; | |
648 | |
649 urlFilter = urlFilter.substring(0, editIndex) + "(" + | |
650 urlFilter.substring(editIndex, editCloseIndex) + ")?" + | |
651 urlFilter.substring(editCloseIndex); | |
652 } | |
653 | |
654 rule.trigger["url-filter"] = urlFilter; | |
655 | |
656 ruleInfo.mergedInto = true; | |
657 } | |
658 } | |
659 | |
660 return rules.filter(rule => !rule._merged); | |
661 } | |
662 | |
369 let ContentBlockerList = | 663 let ContentBlockerList = |
370 /** | 664 /** |
371 * Create a new Adblock Plus filter to content blocker list converter | 665 * Create a new Adblock Plus filter to content blocker list converter |
372 * | 666 * |
373 * @constructor | 667 * @constructor |
374 */ | 668 */ |
375 exports.ContentBlockerList = function () | 669 exports.ContentBlockerList = function () |
376 { | 670 { |
377 this.requestFilters = []; | 671 this.requestFilters = []; |
378 this.requestExceptions = []; | 672 this.requestExceptions = []; |
(...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
417 | 711 |
418 parseDomains(filter.domains, domains, []); | 712 parseDomains(filter.domains, domains, []); |
419 } | 713 } |
420 }; | 714 }; |
421 | 715 |
422 /** | 716 /** |
423 * Generate content blocker list for all filters that were added | 717 * Generate content blocker list for all filters that were added |
424 * | 718 * |
425 * @returns {Filter} filter Filter to convert | 719 * @returns {Filter} filter Filter to convert |
426 */ | 720 */ |
427 ContentBlockerList.prototype.generateRules = function(filter) | 721 ContentBlockerList.prototype.generateRules = function( |
722 {merge = false, multiMerge = false} = {}) | |
428 { | 723 { |
429 let rules = []; | 724 let rules = []; |
430 | 725 |
431 let groupedElemhideFilters = new Map(); | 726 let groupedElemhideFilters = new Map(); |
432 for (let filter of this.elemhideFilters) | 727 for (let filter of this.elemhideFilters) |
433 { | 728 { |
434 let result = convertElemHideFilter(filter, this.elemhideSelectorExceptions); | 729 let result = convertElemHideFilter(filter, this.elemhideSelectorExceptions); |
435 if (!result) | 730 if (!result) |
436 continue; | 731 continue; |
437 | 732 |
(...skipping 27 matching lines...) Expand all Loading... | |
465 } | 760 } |
466 }); | 761 }); |
467 | 762 |
468 for (let filter of this.elemhideExceptions) | 763 for (let filter of this.elemhideExceptions) |
469 convertFilterAddRules(rules, filter, "ignore-previous-rules", false); | 764 convertFilterAddRules(rules, filter, "ignore-previous-rules", false); |
470 for (let filter of this.requestFilters) | 765 for (let filter of this.requestFilters) |
471 convertFilterAddRules(rules, filter, "block", true); | 766 convertFilterAddRules(rules, filter, "block", true); |
472 for (let filter of this.requestExceptions) | 767 for (let filter of this.requestExceptions) |
473 convertFilterAddRules(rules, filter, "ignore-previous-rules", true); | 768 convertFilterAddRules(rules, filter, "ignore-previous-rules", true); |
474 | 769 |
475 return rules.filter(rule => !hasNonASCI(rule)); | 770 rules = rules.filter(rule => !hasNonASCI(rule)); |
771 | |
772 if (merge) | |
773 rules = mergeCloselyMatchingRules(rules, {multi: multiMerge}); | |
774 | |
775 return rules; | |
476 }; | 776 }; |
OLD | NEW |