| Index: lib/filterText.js |
| =================================================================== |
| new file mode 100644 |
| --- /dev/null |
| +++ b/lib/filterText.js |
| @@ -0,0 +1,158 @@ |
| +/* |
| + * This file is part of Adblock Plus <https://adblockplus.org/>, |
| + * Copyright (C) 2006-present eyeo GmbH |
| + * |
| + * Adblock Plus is free software: you can redistribute it and/or modify |
| + * it under the terms of the GNU General Public License version 3 as |
| + * published by the Free Software Foundation. |
| + * |
| + * Adblock Plus is distributed in the hope that it will be useful, |
| + * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| + * GNU General Public License for more details. |
| + * |
| + * You should have received a copy of the GNU General Public License |
| + * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. |
| + */ |
| + |
| +"use strict"; |
| + |
| +/** |
| + * @fileOverview Filter text operations. |
| + */ |
| + |
| +// The memory optimizations in this file work as of V8 7.1.314. |
| + |
| +/** |
| + * The minimum length of a domain string for it to be considered long enough to |
| + * be eligible for optimization. |
| + * @type {number} |
| + */ |
| +const LONG_DOMAINS_THRESHOLD = 1000; |
| + |
| +/** |
| + * Cached strings. |
| + * @type {Map.<string,string>} |
| + */ |
| +let strings = new Map(); |
| + |
| +/** |
| + * Slices a string out of its internal parent string, thus allowing the memory |
| + * occupied by the parent string to be freed up. |
| + * |
| + * JavaScript engines like V8 tend to hold on to the parent of a sliced string |
| + * even if there are no references to the parent string. While this |
| + * optimization is enormously beneficial in general, in certain cases it has |
| + * the opposite effect of needlessly holding on to large amounts of unused |
| + * memory (V8 issue #2869). This operation creates an entirely new internal |
| + * string with its own copy of the original string's memory, thus allowing the |
| + * original string and any of its parents to be freed up. |
| + * |
| + * Note: This is a relatively expensive operation and should be used only when |
| + * the benefit outweighs the cost. |
| + * |
| + * @param {string} string The string to slice. |
| + * |
| + * @returns {string} An entirely new copy of the original string. |
| + */ |
| +function trueSlice(string) |
| +{ |
| + return JSON.parse(JSON.stringify(string)); |
| +} |
| + |
| +/** |
| + * Optimizes content filter text. |
| + * |
| + * @param {string} text The filter text. |
| + * @param {?string} [domains] The domains part of the filter text. |
| + * @param {?string} [type] The type part of the filter text, either |
| + * <code>null</code> or <code>undefined</code>, or one of <code>@</code>, |
| + * <code>?</code>, and <code>$</code>. |
| + * @param {string} body The body part of the filter text. |
| + * |
| + * @returns {Array.<string>} An array containing the optimized filter text and |
| + * its optimized parts. |
| + */ |
| +function optimizeContentFilterText(text, domains, type, body) |
| +{ |
| + // In EasyList+AA there are a handful of filters with very long domain |
| + // strings of 1,000 characters and even up to 100,000 characters. These tend |
| + // to take up a lot of memory. We can restructure the text here to optimize |
| + // for better memory usage on V8. |
| + if (domains && domains.length >= LONG_DOMAINS_THRESHOLD) |
| + { |
| + let copy = strings.get(domains); |
| + if (copy) |
| + { |
| + // Point to the cached copy. |
| + domains = copy; |
| + |
| + // V8 tends to hold on to the parent of a sliced string even if there are |
| + // no references to it. We must "slice" out these strings properly so the |
| + // original filter text is freed up. |
| + // https://bugs.chromium.org/p/v8/issues/detail?id=2869 |
| + if (typeof type == "string") |
| + type = trueSlice(type); |
| + body = trueSlice(body); |
| + |
| + // Reconstruct the text with an optimized layout. |
| + text = domains + "#" + (type || "") + "#" + body; |
| + } |
| + else |
| + { |
| + strings.set(domains, domains); |
| + } |
| + } |
| + |
| + return [text, domains, type, body]; |
| +} |
| + |
| +exports.optimizeContentFilterText = optimizeContentFilterText; |
| + |
| +/** |
| + * Optimizes blocking and whitelist filter text. |
| + * |
| + * @param {string} text The filter text. |
| + * @param {string} pattern The pattern part of the filter text. |
| + * @param {?string} [domains] The domains part of the filter text. |
| + * @param {?string} [sitekeys] The sitekeys part of the filter text. |
| + * @param {?string} [csp] The CSP part of the filter text. |
| + * @param {?string} [rewrite] The rewrite pattern part of the filter text. |
| + * |
| + * @returns {Array.<string>} An array containing the optimized filter text and |
| + * its optimized parts. |
| + */ |
| +function optimizeRegExpFilterText(text, pattern, domains, sitekeys, csp, |
| + rewrite) |
| +{ |
| + if (!sitekeys && !csp && !rewrite && |
| + domains && domains.length >= LONG_DOMAINS_THRESHOLD && |
| + text.endsWith(domains)) |
| + { |
| + let copy = strings.get(domains); |
| + if (copy) |
| + { |
| + domains = copy; |
| + |
| + text = trueSlice(text.substring(0, text.length - domains.length)); |
| + |
| + if (text[0] == "@" && text[1] == "@") |
| + pattern = text.substring(2, 2 + pattern.length); |
| + else |
| + pattern = text.substring(0, pattern.length); |
| + |
| + // Note: This must be the last operation on the text in order for this |
| + // optimization to work. Any further operations on the text may undo the |
| + // optimization. |
| + text += domains; |
| + } |
| + else |
| + { |
| + strings.set(domains, domains); |
| + } |
| + } |
| + |
| + return [text, pattern, domains, sitekeys, csp, rewrite]; |
| +} |
| + |
| +exports.optimizeRegExpFilterText = optimizeRegExpFilterText; |