| Left: | ||
| Right: |
| OLD | NEW |
|---|---|
| 1 /* | 1 /* |
| 2 * This file is part of Adblock Plus <https://adblockplus.org/>, | 2 * This file is part of Adblock Plus <https://adblockplus.org/>, |
| 3 * Copyright (C) 2006-2017 eyeo GmbH | 3 * Copyright (C) 2006-2017 eyeo GmbH |
| 4 * | 4 * |
| 5 * Adblock Plus is free software: you can redistribute it and/or modify | 5 * Adblock Plus is free software: you can redistribute it and/or modify |
| 6 * it under the terms of the GNU General Public License version 3 as | 6 * it under the terms of the GNU General Public License version 3 as |
| 7 * published by the Free Software Foundation. | 7 * published by the Free Software Foundation. |
| 8 * | 8 * |
| 9 * Adblock Plus is distributed in the hope that it will be useful, | 9 * Adblock Plus is distributed in the hope that it will be useful, |
| 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of | 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| (...skipping 348 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 359 { | 359 { |
| 360 newSelector.push(selector.substring(i, pos.start)); | 360 newSelector.push(selector.substring(i, pos.start)); |
| 361 newSelector.push('[id=', selector.substring(pos.start + 1, pos.end), ']'); | 361 newSelector.push('[id=', selector.substring(pos.start + 1, pos.end), ']'); |
| 362 i = pos.end; | 362 i = pos.end; |
| 363 } | 363 } |
| 364 newSelector.push(selector.substring(i)); | 364 newSelector.push(selector.substring(i)); |
| 365 | 365 |
| 366 return newSelector.join(""); | 366 return newSelector.join(""); |
| 367 } | 367 } |
| 368 | 368 |
| 369 function closeMatch(s, t, {multi = false} = {}) | |
|
kzar
2017/05/03 11:17:24
I've not seen this syntax before `{multi = false}
Manish Jethani
2017/05/03 14:41:54
This:
function func(param1, param2, {option1 =
kzar
2017/05/03 15:19:04
Acknowledged.
| |
| 370 { | |
| 371 // This function returns an edit operation (one of "substitute", "delete", | |
| 372 // and "insert") along with an index in the source string where the edit | |
| 373 // should occur in order to arrive at the target string. | |
| 374 | |
| 375 let diff = s.length - t.length; | |
| 376 | |
| 377 // If the string lenghts differ by more than one character, we cannot arrive | |
|
kzar
2017/05/03 11:17:24
Nit: Typo "lenghts".
Manish Jethani
2017/05/04 02:49:32
Done.
| |
| 378 // at target from source in a single edit operation. | |
| 379 if (!multi && (diff < -1 || diff > 1)) | |
| 380 return null; | |
| 381 | |
| 382 // If target is longer than source, swap them for the purpose of our | |
| 383 // calculation. | |
| 384 if (diff < 0) | |
| 385 { | |
| 386 let tmp = s; | |
| 387 s = t; | |
| 388 t = tmp; | |
| 389 } | |
| 390 | |
| 391 let edit = null; | |
| 392 let multiEdit = false; | |
| 393 | |
| 394 let j = 0; | |
| 395 | |
| 396 for (let i = 0; i < s.length; i++) | |
| 397 { | |
| 398 if (s[i] == t[j]) | |
| 399 { | |
| 400 j++; | |
| 401 | |
| 402 if (edit && multiEdit && !edit.closeIndex) | |
| 403 edit.closeIndex = i; | |
| 404 } | |
| 405 else if (edit && (!multi || diff == 0 || edit.closeIndex)) | |
| 406 { | |
| 407 // Since we want one and only one edit operation, we must bail here. | |
| 408 return null; | |
| 409 } | |
| 410 else if ((s[i] == "." || s[i] == "+" || s[i] == "$" || s[i] == "?" || | |
| 411 s[i] == "{" || s[i] == "}" || s[i] == "(" || s[i] == ")" || | |
| 412 s[i] == "[" || s[i] == "]" || s[i] == "\\") || | |
| 413 (t[j] == "." || t[j] == "+" || t[j] == "$" || t[j] == "?" || | |
| 414 t[j] == "{" || t[j] == "}" || t[j] == "(" || t[j] == ")" || | |
| 415 t[j] == "[" || t[j] == "]" || t[j] == "\\")) | |
| 416 { | |
| 417 // We don't deal with special characters for now. | |
|
kzar
2017/05/03 11:17:24
So we skip special characters in the url-filter re
Manish Jethani
2017/05/03 14:41:54
The above is not a good example because this is no
kzar
2017/05/03 15:19:04
Maybe add a comment explaining that assumption?
Manish Jethani
2017/05/04 02:49:32
Added a comment to explain this.
| |
| 418 return null; | |
| 419 } | |
| 420 else | |
| 421 { | |
| 422 if (diff == 0) | |
|
kzar
2017/05/03 11:17:24
Nit: Couldn't this be an `else if` too?
Manish Jethani
2017/05/04 02:49:32
Done.
| |
| 423 { | |
| 424 // If both strings are equal in length, this is a substitution. | |
| 425 edit = {type: "substitute", index: i}; | |
| 426 j++; | |
| 427 } | |
| 428 else | |
| 429 { | |
| 430 if (edit) | |
| 431 multiEdit = true; | |
| 432 else if (diff > 0) | |
|
kzar
2017/05/03 11:17:24
Nit: Please use braces since the clause spans mult
Manish Jethani
2017/05/04 02:49:32
Done.
| |
| 433 // If the source string is longer, this is a deletion. | |
| 434 edit = {type: "delete", index: i}; | |
| 435 else | |
| 436 edit = {type: "insert", index: i}; | |
| 437 } | |
| 438 } | |
| 439 } | |
| 440 | |
| 441 if (edit && multiEdit && !edit.closeIndex) | |
| 442 { | |
| 443 if (j < t.length) | |
| 444 return null; | |
| 445 | |
| 446 edit.closeIndex = s.length; | |
| 447 } | |
| 448 | |
| 449 return edit; | |
| 450 } | |
| 451 | |
| 452 function ruleWithoutURLFilter(rule) | |
| 453 { | |
| 454 let copy = { | |
|
kzar
2017/05/03 15:19:04
How about `return Object.create(rule, {"url-filter
Manish Jethani
2017/05/04 02:49:31
That would not work for multiple reasons, but most
| |
| 455 trigger: Object.assign({}, rule.trigger), | |
| 456 action: Object.assign({}, rule.action) | |
| 457 }; | |
| 458 | |
| 459 delete copy.trigger["url-filter"]; | |
| 460 | |
| 461 return copy; | |
| 462 } | |
| 463 | |
| 464 function mergeCloselyMatchingRules(rules, {multi = false} = {}) | |
| 465 { | |
| 466 // Closely matching rules are likely to be within a certain range. We only | |
| 467 // look for matches within this range. If we increase this value, it can give | |
| 468 // us more matches and a smaller resulting rule set, but possibly at a | |
| 469 // significant performance cost. | |
| 470 const heuristicRange = 100; | |
|
kzar
2017/05/03 15:19:04
Since the code either runs in a place where speed
Manish Jethani
2017/05/04 02:49:32
In the latest update the generateRules function ta
| |
| 471 | |
| 472 let rulesInfo = new Array(rules.length); | |
| 473 | |
| 474 rules.forEach((rule, index) => | |
| 475 { | |
| 476 rulesInfo[index] = {rule}; | |
|
kzar
2017/05/03 11:17:24
I'm not sure syntax like this will work for Safari
Manish Jethani
2017/05/03 14:41:54
I'll check, but if it doesn't work then I'll have
kzar
2017/05/08 08:13:02
You mentioned testing the code on Safari now, but
Manish Jethani
2017/05/08 14:03:58
I've been testing with Safari 10.
Anyway, this is
kzar
2017/05/09 10:05:46
I think you should test with Safari 9 at least onc
Manish Jethani
2017/05/09 15:52:46
"{rule: rule}" ought to work in every single JS en
| |
| 477 | |
| 478 if (rule.action.type == "ignore-previous-rules") | |
| 479 { | |
| 480 rulesInfo[index].skip = true; | |
| 481 } | |
| 482 else | |
| 483 { | |
| 484 // Save a stringified version of the rule, but without the URL filter. We | |
| 485 // use this for comparison later. | |
| 486 rulesInfo[index].stringifiedWithoutURLFilter = | |
| 487 JSON.stringify(ruleWithoutURLFilter(rule)); | |
| 488 } | |
| 489 }); | |
| 490 | |
| 491 for (let i = 0; i < rules.length; i++) | |
| 492 { | |
| 493 if (rulesInfo[i].skip) | |
| 494 continue; | |
| 495 | |
| 496 for (let j = i + 1; j < i + heuristicRange && j < rules.length; j++) | |
| 497 { | |
| 498 if (rulesInfo[j].skip) | |
| 499 continue; | |
| 500 | |
| 501 // Check if the rules are identical except for the URL filter. | |
| 502 if (rulesInfo[i].stringifiedWithoutURLFilter == | |
|
kzar
2017/05/03 15:19:04
I wonder if we could create a lookup table stringi
Manish Jethani
2017/05/04 02:49:32
I'm not sure what the benefit of that would be.
W
| |
| 503 rulesInfo[j].stringifiedWithoutURLFilter) | |
| 504 { | |
| 505 let source = rules[i].trigger["url-filter"]; | |
| 506 let target = rules[j].trigger["url-filter"]; | |
| 507 | |
| 508 let edit = closeMatch(source, target, {multi}); | |
| 509 | |
| 510 if (edit) | |
| 511 { | |
| 512 let urlFilter, ruleInfo, match = {edit}; | |
| 513 | |
| 514 if (edit.type == "insert") | |
| 515 { | |
| 516 // Convert the insertion into a deletion and stick it on the target | |
| 517 // rule instead. We can only group deletions and substitutions; | |
| 518 // therefore insertions must be treated as deletions on the target | |
| 519 // rule, to be dealt with later. | |
| 520 urlFilter = target; | |
| 521 ruleInfo = rulesInfo[j]; | |
| 522 match.index = i; | |
| 523 edit.type = "delete"; | |
| 524 } | |
| 525 else | |
| 526 { | |
| 527 urlFilter = source; | |
| 528 ruleInfo = rulesInfo[i]; | |
| 529 match.index = j; | |
| 530 } | |
| 531 | |
| 532 if (edit.closeIndex) | |
| 533 { | |
| 534 if (!ruleInfo.multiEditMatch) | |
| 535 ruleInfo.multiEditMatch = match; | |
| 536 } | |
| 537 else | |
| 538 { | |
| 539 if (!ruleInfo.matches) | |
| 540 ruleInfo.matches = new Array(urlFilter.length + 1); | |
| 541 | |
| 542 let matchesForIndex = ruleInfo.matches[edit.index]; | |
| 543 | |
| 544 if (matchesForIndex) | |
| 545 { | |
| 546 matchesForIndex.push(match); | |
| 547 } | |
| 548 else | |
| 549 { | |
| 550 matchesForIndex = [match]; | |
| 551 ruleInfo.matches[edit.index] = matchesForIndex; | |
| 552 } | |
| 553 | |
| 554 if (!ruleInfo.bestMatches || | |
| 555 matchesForIndex.length > ruleInfo.bestMatches.length) | |
| 556 ruleInfo.bestMatches = matchesForIndex; | |
| 557 } | |
| 558 } | |
| 559 } | |
| 560 } | |
| 561 } | |
| 562 | |
| 563 let candidateRulesInfo = rulesInfo.filter(ruleInfo => ruleInfo.bestMatches || | |
| 564 ruleInfo.multiEditMatch) ; | |
|
kzar
2017/05/03 11:17:24
Nit: Long line.
Manish Jethani
2017/05/04 02:49:31
Done.
| |
| 565 | |
| 566 // For best results, we have to sort the candidates by the number of matches. | |
| 567 // For example, we want "ads", "bds", "adv", "bdv", and "bdx" to generate | |
| 568 // "ad[sv]" and "bd[svx]" (2 rules), not "[ab]ds", "[ab]dv", and "bdx" (3 | |
| 569 // rules). | |
| 570 candidateRulesInfo.sort((ruleInfo1, ruleInfo2) => | |
| 571 { | |
| 572 let weight1 = 1; | |
| 573 let weight2 = 1; | |
| 574 | |
| 575 if (ruleInfo1.bestMatches) | |
| 576 weight1 = ruleInfo1.bestMatches.length; | |
| 577 | |
| 578 if (ruleInfo2.bestMatches) | |
| 579 weight2 = ruleInfo2.bestMatches.length; | |
| 580 | |
| 581 return weight2 - weight1; | |
| 582 }); | |
| 583 | |
| 584 for (let ruleInfo of candidateRulesInfo) | |
| 585 { | |
| 586 let rule = ruleInfo.rule; | |
| 587 | |
| 588 if (rule._merged) | |
| 589 continue; | |
| 590 | |
| 591 // Find the best set of rules to group, which is simply the largest set. | |
| 592 let best = (ruleInfo.matches || []).reduce((best, matchesForIndex) => | |
| 593 { | |
| 594 matchesForIndex = (matchesForIndex || []).filter(match => | |
| 595 { | |
| 596 // Filter out rules that have either already been merged into other | |
| 597 // rules or have had other rules merged into them. | |
| 598 return !rules[match.index]._merged && | |
| 599 !rulesInfo[match.index].mergedInto; | |
| 600 }); | |
| 601 | |
| 602 return matchesForIndex.length > best.length ? matchesForIndex : best; | |
| 603 }, | |
| 604 []); | |
| 605 | |
| 606 if (best.length == 0 && ruleInfo.multiEditMatch && | |
| 607 !rules[ruleInfo.multiEditMatch.index]._merged && | |
| 608 !rulesInfo[ruleInfo.multiEditMatch.index].mergedInto) | |
| 609 best = [ruleInfo.multiEditMatch]; | |
| 610 | |
| 611 if (best.length > 0) | |
| 612 { | |
| 613 let urlFilter = rule.trigger["url-filter"]; | |
| 614 | |
| 615 let editIndex = best[0].edit.index; | |
| 616 | |
| 617 if (best[0] != ruleInfo.multiEditMatch) | |
| 618 { | |
| 619 // Merge all the matching rules into this one. | |
| 620 | |
| 621 let characters = []; | |
| 622 let quantifier = ""; | |
| 623 | |
| 624 for (let match of best) | |
| 625 { | |
| 626 if (match.edit.type == "delete") | |
| 627 quantifier = "?"; | |
| 628 else | |
| 629 characters.push(rules[match.index].trigger["url-filter"][editIndex]) ; | |
| 630 | |
| 631 rules[match.index]._merged = true; | |
| 632 } | |
| 633 | |
| 634 urlFilter = urlFilter.substring(0, editIndex + 1) + quantifier + | |
| 635 urlFilter.substring(editIndex + 1); | |
| 636 if (characters.length > 0) | |
| 637 { | |
| 638 urlFilter = urlFilter.substring(0, editIndex) + "[" + | |
| 639 urlFilter[editIndex] + characters.join("") + "]" + | |
| 640 urlFilter.substring(editIndex + 1); | |
| 641 } | |
| 642 } | |
| 643 else | |
| 644 { | |
| 645 let editCloseIndex = best[0].edit.closeIndex; | |
| 646 | |
| 647 rules[best[0].index]._merged = true; | |
| 648 | |
| 649 urlFilter = urlFilter.substring(0, editIndex) + "(" + | |
| 650 urlFilter.substring(editIndex, editCloseIndex) + ")?" + | |
| 651 urlFilter.substring(editCloseIndex); | |
| 652 } | |
| 653 | |
| 654 rule.trigger["url-filter"] = urlFilter; | |
| 655 | |
| 656 ruleInfo.mergedInto = true; | |
| 657 } | |
| 658 } | |
| 659 | |
| 660 return rules.filter(rule => !rule._merged); | |
| 661 } | |
| 662 | |
| 369 let ContentBlockerList = | 663 let ContentBlockerList = |
| 370 /** | 664 /** |
| 371 * Create a new Adblock Plus filter to content blocker list converter | 665 * Create a new Adblock Plus filter to content blocker list converter |
| 372 * | 666 * |
| 373 * @constructor | 667 * @constructor |
| 374 */ | 668 */ |
| 375 exports.ContentBlockerList = function () | 669 exports.ContentBlockerList = function () |
| 376 { | 670 { |
| 377 this.requestFilters = []; | 671 this.requestFilters = []; |
| 378 this.requestExceptions = []; | 672 this.requestExceptions = []; |
| (...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 417 | 711 |
| 418 parseDomains(filter.domains, domains, []); | 712 parseDomains(filter.domains, domains, []); |
| 419 } | 713 } |
| 420 }; | 714 }; |
| 421 | 715 |
| 422 /** | 716 /** |
| 423 * Generate content blocker list for all filters that were added | 717 * Generate content blocker list for all filters that were added |
| 424 * | 718 * |
| 425 * @returns {Filter} filter Filter to convert | 719 * @returns {Filter} filter Filter to convert |
| 426 */ | 720 */ |
| 427 ContentBlockerList.prototype.generateRules = function(filter) | 721 ContentBlockerList.prototype.generateRules = function( |
| 722 {merge = false, multiMerge = false} = {}) | |
| 428 { | 723 { |
| 429 let rules = []; | 724 let rules = []; |
| 430 | 725 |
| 431 let groupedElemhideFilters = new Map(); | 726 let groupedElemhideFilters = new Map(); |
| 432 for (let filter of this.elemhideFilters) | 727 for (let filter of this.elemhideFilters) |
| 433 { | 728 { |
| 434 let result = convertElemHideFilter(filter, this.elemhideSelectorExceptions); | 729 let result = convertElemHideFilter(filter, this.elemhideSelectorExceptions); |
| 435 if (!result) | 730 if (!result) |
| 436 continue; | 731 continue; |
| 437 | 732 |
| (...skipping 27 matching lines...) Expand all Loading... | |
| 465 } | 760 } |
| 466 }); | 761 }); |
| 467 | 762 |
| 468 for (let filter of this.elemhideExceptions) | 763 for (let filter of this.elemhideExceptions) |
| 469 convertFilterAddRules(rules, filter, "ignore-previous-rules", false); | 764 convertFilterAddRules(rules, filter, "ignore-previous-rules", false); |
| 470 for (let filter of this.requestFilters) | 765 for (let filter of this.requestFilters) |
| 471 convertFilterAddRules(rules, filter, "block", true); | 766 convertFilterAddRules(rules, filter, "block", true); |
| 472 for (let filter of this.requestExceptions) | 767 for (let filter of this.requestExceptions) |
| 473 convertFilterAddRules(rules, filter, "ignore-previous-rules", true); | 768 convertFilterAddRules(rules, filter, "ignore-previous-rules", true); |
| 474 | 769 |
| 475 return rules.filter(rule => !hasNonASCI(rule)); | 770 rules = rules.filter(rule => !hasNonASCI(rule)); |
| 771 | |
| 772 if (merge) | |
| 773 rules = mergeCloselyMatchingRules(rules, {multi: multiMerge}); | |
| 774 | |
| 775 return rules; | |
| 476 }; | 776 }; |
| OLD | NEW |