Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Side by Side Diff: lib/abp2blocklist.js

Issue 29426594: Issue 3673 - Merge closely matching rules (Closed) Base URL: https://hg.adblockplus.org/abp2blocklist
Patch Set: Created May 1, 2017, 7:37 p.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | test/abp2blocklist.js » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * This file is part of Adblock Plus <https://adblockplus.org/>, 2 * This file is part of Adblock Plus <https://adblockplus.org/>,
3 * Copyright (C) 2006-2017 eyeo GmbH 3 * Copyright (C) 2006-2017 eyeo GmbH
4 * 4 *
5 * Adblock Plus is free software: you can redistribute it and/or modify 5 * Adblock Plus is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 3 as 6 * it under the terms of the GNU General Public License version 3 as
7 * published by the Free Software Foundation. 7 * published by the Free Software Foundation.
8 * 8 *
9 * Adblock Plus is distributed in the hope that it will be useful, 9 * Adblock Plus is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
(...skipping 348 matching lines...) Expand 10 before | Expand all | Expand 10 after
359 { 359 {
360 newSelector.push(selector.substring(i, pos.start)); 360 newSelector.push(selector.substring(i, pos.start));
361 newSelector.push('[id=', selector.substring(pos.start + 1, pos.end), ']'); 361 newSelector.push('[id=', selector.substring(pos.start + 1, pos.end), ']');
362 i = pos.end; 362 i = pos.end;
363 } 363 }
364 newSelector.push(selector.substring(i)); 364 newSelector.push(selector.substring(i));
365 365
366 return newSelector.join(""); 366 return newSelector.join("");
367 } 367 }
368 368
369 function closeMatch(s, t)
370 {
371 // This function returns an edit operation (one of "substitute", "delete",
372 // and "insert") along with an index in the source string where the edit
373 // should occur in order to arrive at the target string.
374
375 let diff = s.length - t.length;
376
377 // If the string lenghts differ by more than one character, we cannot arrive
378 // at target from source in a single edit operation.
379 if (diff < -1 || diff > 1)
380 return null;
381
382 // If target is longer than source, swap them for the purpose of our
383 // calculation.
384 if (diff == -1)
385 {
386 let tmp = s;
387 s = t;
388 t = tmp;
389 }
390
391 let edit = null;
392
393 for (let i = 0, j = 0; i < s.length; i++)
394 {
395 if (s[i] == t[j])
396 {
397 j++;
398 }
399 else if (edit)
400 {
401 // Since we want one and only one edit operation, we must bail here.
402 return null;
403 }
404 else
405 {
406 switch (diff)
407 {
408 case 0:
409 // If both strings are equal in length, this is a substitution.
410 edit = {type: "substitute", index: i};
411 j++;
412 break;
413 case 1:
414 // If the source string is longer, this is a deletion.
415 edit = {type: "delete", index: i};
416 break;
417 default:
418 edit = {type: "insert", index: i};
419 }
420 }
421 }
422
423 return edit;
424 }
425
426 function ruleWithoutURLFilter(rule)
427 {
428 let copy = {
429 trigger: Object.assign({}, rule.trigger),
430 action: Object.assign({}, rule.action)
431 };
432
433 delete copy.trigger["url-filter"];
434
435 return copy;
436 }
437
438 function mergeCloselyMatchingRules(rules)
439 {
440 let rulesInfo = new Array(rules.length);
441
442 rules.forEach((rule, index) =>
443 {
444 let skip = false;
445
446 // For these rules we're getting dimishing returns. i.e. there aren't too
447 // many of these that qualify anyway, they just slow us down.
448 if (rule.action.type == "css-display-none" ||
449 rule.action.type == "ignore-previous-rules" ||
450 rule.trigger["url-filter-is-case-sensitive"] ||
451 rule.trigger["load-type"] ||
452 rule.trigger["if-domain"] ||
453 rule.trigger["unless-domain"])
454 skip = true;
455
456 if (skip)
457 {
458 rulesInfo[index] = {skip: true};
459 }
460 else
461 {
462 // Save a stringified version of the rule, but without the URL filter. We
463 // use this for comparison later.
464 rulesInfo[index] = {
465 stringifiedWithoutURLFilter: JSON.stringify(ruleWithoutURLFilter(rule))
466 };
467 }
468 });
469
470 for (let i = 0; i < rules.length; i++)
471 {
472 if (rulesInfo[i].skip)
473 continue;
474
475 for (let j = i + 1; j < rules.length; j++)
476 {
477 if (rulesInfo[j].skip)
478 continue;
479
480 // Check if the rules are identical except for the URL filter.
481 if (rulesInfo[i].stringifiedWithoutURLFilter ==
482 rulesInfo[j].stringifiedWithoutURLFilter)
483 {
484 let source = rules[i].trigger["url-filter"];
485 let target = rules[j].trigger["url-filter"];
486
487 // Find out if the Levenshtein distance between the rules is 1.
488 let edit = closeMatch(source, target);
489
490 if (edit)
491 {
492 let urlFilter, ruleInfo, match = {edit};
493
494 if (edit.type == "insert")
495 {
496 // Convert the insertion into a deletion and stick it on the target
497 // rule instead. We can only group deletions and substitutions;
498 // therefore insertions must be treated as deletions on the target
499 // rule, to be dealt with later.
500 urlFilter = target;
501 ruleInfo = rulesInfo[j];
502 match.index = i;
503 edit.type = "delete";
504 }
505 else
506 {
507 urlFilter = source;
508 ruleInfo = rulesInfo[i];
509 match.index = j;
510 }
511
512 if (!ruleInfo.matches)
513 ruleInfo.matches = new Array(urlFilter.length + 1);
514
515 let matchesForIndex = ruleInfo.matches[edit.index];
516
517 if (matchesForIndex)
518 matchesForIndex.push(match);
519 else
520 ruleInfo.matches[edit.index] = [match];
521 }
522 }
523 }
524 }
525
526 let mergedRules = [];
527
528 rules.forEach((rule, index) =>
529 {
530 let ruleInfo = rulesInfo[index];
531
532 if (ruleInfo.merged)
533 return;
534
535 // Find the best set of rules to group, which is simply the largest set.
536 let best = null;
537 for (let matchesForIndex of ruleInfo.matches || [])
538 {
539 if (matchesForIndex && (!best || matchesForIndex.length > best.length))
540 best = matchesForIndex;
541 }
542
543 if (best)
544 {
545 // Merge all the matching rules into this one.
546
547 let editIndex = best[0].edit.index;
548
549 let characters = [];
550 let quantifier = "";
551
552 for (let match of best)
553 {
554 if (match.edit.type == "delete")
555 quantifier = "?";
556 else
557 characters.push(rules[match.index].trigger["url-filter"][editIndex]);
558
559 rulesInfo[match.index].merged = true;
560 }
561
562 let urlFilter = rule.trigger["url-filter"];
563
564 urlFilter = urlFilter.substring(0, editIndex + 1) + quantifier +
565 urlFilter.substring(editIndex + 1);
566 if (characters.length > 0)
567 {
568 urlFilter = urlFilter.substring(0, editIndex) + "[" +
569 urlFilter[editIndex] + characters.join("") + "]" +
570 urlFilter.substring(editIndex + 1);
571 }
572
573 rule.trigger["url-filter"] = urlFilter;
574 }
575
576 mergedRules.push(rule);
577 });
578
579 return mergedRules;
580 }
581
369 let ContentBlockerList = 582 let ContentBlockerList =
370 /** 583 /**
371 * Create a new Adblock Plus filter to content blocker list converter 584 * Create a new Adblock Plus filter to content blocker list converter
372 * 585 *
373 * @constructor 586 * @constructor
374 */ 587 */
375 exports.ContentBlockerList = function () 588 exports.ContentBlockerList = function ()
376 { 589 {
377 this.requestFilters = []; 590 this.requestFilters = [];
378 this.requestExceptions = []; 591 this.requestExceptions = [];
(...skipping 86 matching lines...) Expand 10 before | Expand all | Expand 10 after
465 } 678 }
466 }); 679 });
467 680
468 for (let filter of this.elemhideExceptions) 681 for (let filter of this.elemhideExceptions)
469 convertFilterAddRules(rules, filter, "ignore-previous-rules", false); 682 convertFilterAddRules(rules, filter, "ignore-previous-rules", false);
470 for (let filter of this.requestFilters) 683 for (let filter of this.requestFilters)
471 convertFilterAddRules(rules, filter, "block", true); 684 convertFilterAddRules(rules, filter, "block", true);
472 for (let filter of this.requestExceptions) 685 for (let filter of this.requestExceptions)
473 convertFilterAddRules(rules, filter, "ignore-previous-rules", true); 686 convertFilterAddRules(rules, filter, "ignore-previous-rules", true);
474 687
475 return rules.filter(rule => !hasNonASCI(rule)); 688 rules = rules.filter(rule => !hasNonASCI(rule));
689
690 rules = mergeCloselyMatchingRules(rules);
691
692 return rules;
476 }; 693 };
OLDNEW
« no previous file with comments | « no previous file | test/abp2blocklist.js » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld