OLD | NEW |
1 /* | 1 /* |
2 * This file is part of Adblock Plus <https://adblockplus.org/>, | 2 * This file is part of Adblock Plus <https://adblockplus.org/>, |
3 * Copyright (C) 2006-2017 eyeo GmbH | 3 * Copyright (C) 2006-2017 eyeo GmbH |
4 * | 4 * |
5 * Adblock Plus is free software: you can redistribute it and/or modify | 5 * Adblock Plus is free software: you can redistribute it and/or modify |
6 * it under the terms of the GNU General Public License version 3 as | 6 * it under the terms of the GNU General Public License version 3 as |
7 * published by the Free Software Foundation. | 7 * published by the Free Software Foundation. |
8 * | 8 * |
9 * Adblock Plus is distributed in the hope that it will be useful, | 9 * Adblock Plus is distributed in the hope that it will be useful, |
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of | 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
(...skipping 348 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
359 { | 359 { |
360 newSelector.push(selector.substring(i, pos.start)); | 360 newSelector.push(selector.substring(i, pos.start)); |
361 newSelector.push('[id=', selector.substring(pos.start + 1, pos.end), ']'); | 361 newSelector.push('[id=', selector.substring(pos.start + 1, pos.end), ']'); |
362 i = pos.end; | 362 i = pos.end; |
363 } | 363 } |
364 newSelector.push(selector.substring(i)); | 364 newSelector.push(selector.substring(i)); |
365 | 365 |
366 return newSelector.join(""); | 366 return newSelector.join(""); |
367 } | 367 } |
368 | 368 |
| 369 function closeMatch(s, t) |
| 370 { |
| 371 // This function returns an edit operation (one of "substitute", "delete", |
| 372 // and "insert") along with an index in the source string where the edit |
| 373 // should occur in order to arrive at the target string. |
| 374 |
| 375 let diff = s.length - t.length; |
| 376 |
| 377 // If the string lenghts differ by more than one character, we cannot arrive |
| 378 // at target from source in a single edit operation. |
| 379 if (diff < -1 || diff > 1) |
| 380 return null; |
| 381 |
| 382 // If target is longer than source, swap them for the purpose of our |
| 383 // calculation. |
| 384 if (diff == -1) |
| 385 { |
| 386 let tmp = s; |
| 387 s = t; |
| 388 t = tmp; |
| 389 } |
| 390 |
| 391 let edit = null; |
| 392 |
| 393 for (let i = 0, j = 0; i < s.length; i++) |
| 394 { |
| 395 if (s[i] == t[j]) |
| 396 { |
| 397 j++; |
| 398 } |
| 399 else if (edit) |
| 400 { |
| 401 // Since we want one and only one edit operation, we must bail here. |
| 402 return null; |
| 403 } |
| 404 else if ((s[i] == "." || s[i] == "+" || s[i] == "$" || s[i] == "?" || |
| 405 s[i] == "{" || s[i] == "}" || s[i] == "(" || s[i] == ")" || |
| 406 s[i] == "[" || s[i] == "]" || s[i] == "\\") || |
| 407 (t[j] == "." || t[j] == "+" || t[j] == "$" || t[j] == "?" || |
| 408 t[j] == "{" || t[j] == "}" || t[j] == "(" || t[j] == ")" || |
| 409 t[j] == "[" || t[j] == "]" || t[j] == "\\")) |
| 410 { |
| 411 // We don't deal with special characters for now. |
| 412 return null; |
| 413 } |
| 414 else |
| 415 { |
| 416 switch (diff) |
| 417 { |
| 418 case 0: |
| 419 // If both strings are equal in length, this is a substitution. |
| 420 edit = {type: "substitute", index: i}; |
| 421 j++; |
| 422 break; |
| 423 case 1: |
| 424 // If the source string is longer, this is a deletion. |
| 425 edit = {type: "delete", index: i}; |
| 426 break; |
| 427 default: |
| 428 edit = {type: "insert", index: i}; |
| 429 } |
| 430 } |
| 431 } |
| 432 |
| 433 return edit; |
| 434 } |
| 435 |
| 436 function ruleWithoutURLFilter(rule) |
| 437 { |
| 438 let copy = { |
| 439 trigger: Object.assign({}, rule.trigger), |
| 440 action: Object.assign({}, rule.action) |
| 441 }; |
| 442 |
| 443 delete copy.trigger["url-filter"]; |
| 444 |
| 445 return copy; |
| 446 } |
| 447 |
| 448 function mergeCloselyMatchingRules(rules) |
| 449 { |
| 450 // Closely matching rules are likely to be within a certain range. We only |
| 451 // look for matches within this range. If we increase this value, it can give |
| 452 // us more matches and a smaller resulting rule set, but possibly at a |
| 453 // significant performance cost. |
| 454 const heuristicRange = 100; |
| 455 |
| 456 let rulesInfo = new Array(rules.length); |
| 457 |
| 458 rules.forEach((rule, index) => |
| 459 { |
| 460 rulesInfo[index] = {rule}; |
| 461 |
| 462 if (rule.action.type == "ignore-previous-rules") |
| 463 { |
| 464 rulesInfo[index].skip = true; |
| 465 } |
| 466 else |
| 467 { |
| 468 // Save a stringified version of the rule, but without the URL filter. We |
| 469 // use this for comparison later. |
| 470 rulesInfo[index].stringifiedWithoutURLFilter = |
| 471 JSON.stringify(ruleWithoutURLFilter(rule)); |
| 472 } |
| 473 }); |
| 474 |
| 475 for (let i = 0; i < rules.length; i++) |
| 476 { |
| 477 if (rulesInfo[i].skip) |
| 478 continue; |
| 479 |
| 480 for (let j = i + 1; j < i + heuristicRange && j < rules.length; j++) |
| 481 { |
| 482 if (rulesInfo[j].skip) |
| 483 continue; |
| 484 |
| 485 // Check if the rules are identical except for the URL filter. |
| 486 if (rulesInfo[i].stringifiedWithoutURLFilter == |
| 487 rulesInfo[j].stringifiedWithoutURLFilter) |
| 488 { |
| 489 let source = rules[i].trigger["url-filter"]; |
| 490 let target = rules[j].trigger["url-filter"]; |
| 491 |
| 492 // Find out if the Levenshtein distance between the rules is 1. |
| 493 let edit = closeMatch(source, target); |
| 494 |
| 495 if (edit) |
| 496 { |
| 497 let urlFilter, ruleInfo, match = {edit}; |
| 498 |
| 499 if (edit.type == "insert") |
| 500 { |
| 501 // Convert the insertion into a deletion and stick it on the target |
| 502 // rule instead. We can only group deletions and substitutions; |
| 503 // therefore insertions must be treated as deletions on the target |
| 504 // rule, to be dealt with later. |
| 505 urlFilter = target; |
| 506 ruleInfo = rulesInfo[j]; |
| 507 match.index = i; |
| 508 edit.type = "delete"; |
| 509 } |
| 510 else |
| 511 { |
| 512 urlFilter = source; |
| 513 ruleInfo = rulesInfo[i]; |
| 514 match.index = j; |
| 515 } |
| 516 |
| 517 if (!ruleInfo.matches) |
| 518 ruleInfo.matches = new Array(urlFilter.length + 1); |
| 519 |
| 520 let matchesForIndex = ruleInfo.matches[edit.index]; |
| 521 |
| 522 if (matchesForIndex) |
| 523 { |
| 524 matchesForIndex.push(match); |
| 525 } |
| 526 else |
| 527 { |
| 528 matchesForIndex = [match]; |
| 529 ruleInfo.matches[edit.index] = matchesForIndex; |
| 530 } |
| 531 |
| 532 if (!ruleInfo.bestMatches || |
| 533 matchesForIndex.length > ruleInfo.bestMatches.length) |
| 534 ruleInfo.bestMatches = matchesForIndex; |
| 535 } |
| 536 } |
| 537 } |
| 538 } |
| 539 |
| 540 let candidateRulesInfo = rulesInfo.filter(ruleInfo => ruleInfo.bestMatches); |
| 541 |
| 542 // For best results, we have to sort the candidates by the number of matches. |
| 543 // For example, we want "ads", "bds", "adv", "bdv", and "bdx" to generate |
| 544 // "ad[sv]" and "bd[svx]" (2 rules), not "[ab]ds", "[ab]dv", and "bdx" (3 |
| 545 // rules). |
| 546 candidateRulesInfo.sort((ruleInfo1, ruleInfo2) => |
| 547 { |
| 548 return ruleInfo2.bestMatches.length - ruleInfo1.bestMatches.length; |
| 549 }); |
| 550 |
| 551 for (let ruleInfo of candidateRulesInfo) |
| 552 { |
| 553 let rule = ruleInfo.rule; |
| 554 |
| 555 if (rule._merged) |
| 556 continue; |
| 557 |
| 558 // Find the best set of rules to group, which is simply the largest set. |
| 559 let best = (ruleInfo.matches || []).reduce((best, matchesForIndex) => |
| 560 { |
| 561 matchesForIndex = (matchesForIndex || []).filter(match => |
| 562 { |
| 563 // Filter out rules that have either already been merged into other |
| 564 // rules or have had other rules merged into them. |
| 565 return !rules[match.index]._merged && |
| 566 !rulesInfo[match.index].mergedInto; |
| 567 }); |
| 568 |
| 569 return matchesForIndex.length > best.length ? matchesForIndex : best; |
| 570 }, |
| 571 []); |
| 572 |
| 573 if (best.length > 0) |
| 574 { |
| 575 // Merge all the matching rules into this one. |
| 576 |
| 577 let editIndex = best[0].edit.index; |
| 578 |
| 579 let characters = []; |
| 580 let quantifier = ""; |
| 581 |
| 582 for (let match of best) |
| 583 { |
| 584 if (match.edit.type == "delete") |
| 585 quantifier = "?"; |
| 586 else |
| 587 characters.push(rules[match.index].trigger["url-filter"][editIndex]); |
| 588 |
| 589 rules[match.index]._merged = true; |
| 590 } |
| 591 |
| 592 let urlFilter = rule.trigger["url-filter"]; |
| 593 |
| 594 urlFilter = urlFilter.substring(0, editIndex + 1) + quantifier + |
| 595 urlFilter.substring(editIndex + 1); |
| 596 if (characters.length > 0) |
| 597 { |
| 598 urlFilter = urlFilter.substring(0, editIndex) + "[" + |
| 599 urlFilter[editIndex] + characters.join("") + "]" + |
| 600 urlFilter.substring(editIndex + 1); |
| 601 } |
| 602 |
| 603 rule.trigger["url-filter"] = urlFilter; |
| 604 |
| 605 ruleInfo.mergedInto = true; |
| 606 } |
| 607 } |
| 608 |
| 609 return rules.filter(rule => !rule._merged); |
| 610 } |
| 611 |
369 let ContentBlockerList = | 612 let ContentBlockerList = |
370 /** | 613 /** |
371 * Create a new Adblock Plus filter to content blocker list converter | 614 * Create a new Adblock Plus filter to content blocker list converter |
372 * | 615 * |
373 * @constructor | 616 * @constructor |
374 */ | 617 */ |
375 exports.ContentBlockerList = function () | 618 exports.ContentBlockerList = function () |
376 { | 619 { |
377 this.requestFilters = []; | 620 this.requestFilters = []; |
378 this.requestExceptions = []; | 621 this.requestExceptions = []; |
(...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
417 | 660 |
418 parseDomains(filter.domains, domains, []); | 661 parseDomains(filter.domains, domains, []); |
419 } | 662 } |
420 }; | 663 }; |
421 | 664 |
422 /** | 665 /** |
423 * Generate content blocker list for all filters that were added | 666 * Generate content blocker list for all filters that were added |
424 * | 667 * |
425 * @returns {Filter} filter Filter to convert | 668 * @returns {Filter} filter Filter to convert |
426 */ | 669 */ |
427 ContentBlockerList.prototype.generateRules = function(filter) | 670 ContentBlockerList.prototype.generateRules = function({merge = false} = {}) |
428 { | 671 { |
429 let rules = []; | 672 let rules = []; |
430 | 673 |
431 let groupedElemhideFilters = new Map(); | 674 let groupedElemhideFilters = new Map(); |
432 for (let filter of this.elemhideFilters) | 675 for (let filter of this.elemhideFilters) |
433 { | 676 { |
434 let result = convertElemHideFilter(filter, this.elemhideSelectorExceptions); | 677 let result = convertElemHideFilter(filter, this.elemhideSelectorExceptions); |
435 if (!result) | 678 if (!result) |
436 continue; | 679 continue; |
437 | 680 |
(...skipping 27 matching lines...) Expand all Loading... |
465 } | 708 } |
466 }); | 709 }); |
467 | 710 |
468 for (let filter of this.elemhideExceptions) | 711 for (let filter of this.elemhideExceptions) |
469 convertFilterAddRules(rules, filter, "ignore-previous-rules", false); | 712 convertFilterAddRules(rules, filter, "ignore-previous-rules", false); |
470 for (let filter of this.requestFilters) | 713 for (let filter of this.requestFilters) |
471 convertFilterAddRules(rules, filter, "block", true); | 714 convertFilterAddRules(rules, filter, "block", true); |
472 for (let filter of this.requestExceptions) | 715 for (let filter of this.requestExceptions) |
473 convertFilterAddRules(rules, filter, "ignore-previous-rules", true); | 716 convertFilterAddRules(rules, filter, "ignore-previous-rules", true); |
474 | 717 |
475 return rules.filter(rule => !hasNonASCI(rule)); | 718 rules = rules.filter(rule => !hasNonASCI(rule)); |
| 719 |
| 720 if (merge) |
| 721 rules = mergeCloselyMatchingRules(rules); |
| 722 |
| 723 return rules; |
476 }; | 724 }; |
OLD | NEW |