Index: lib/filterText.js |
=================================================================== |
new file mode 100644 |
--- /dev/null |
+++ b/lib/filterText.js |
@@ -0,0 +1,158 @@ |
+/* |
+ * This file is part of Adblock Plus <https://adblockplus.org/>, |
+ * Copyright (C) 2006-present eyeo GmbH |
+ * |
+ * Adblock Plus is free software: you can redistribute it and/or modify |
+ * it under the terms of the GNU General Public License version 3 as |
+ * published by the Free Software Foundation. |
+ * |
+ * Adblock Plus is distributed in the hope that it will be useful, |
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of |
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
+ * GNU General Public License for more details. |
+ * |
+ * You should have received a copy of the GNU General Public License |
+ * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. |
+ */ |
+ |
+"use strict"; |
+ |
+/** |
+ * @fileOverview Filter text operations. |
+ */ |
+ |
+// The memory optimizations in this file work as of V8 7.1.314. |
+ |
+/** |
+ * The minimum length of a domain string for it to be considered long enough to |
+ * be eligible for optimization. |
+ * @type {number} |
+ */ |
+const LONG_DOMAINS_THRESHOLD = 1000; |
+ |
+/** |
+ * Cached strings. |
+ * @type {Map.<string,string>} |
+ */ |
+let strings = new Map(); |
+ |
+/** |
+ * Slices a string out of its internal parent string, thus allowing the memory |
+ * occupied by the parent string to be freed up. |
+ * |
+ * JavaScript engines like V8 tend to hold on to the parent of a sliced string |
+ * even if there are no references to the parent string. While this |
+ * optimization is enormously beneficial in general, in certain cases it has |
+ * the opposite effect of needlessly holding on to large amounts of unused |
+ * memory (V8 issue #2869). This operation creates an entirely new internal |
+ * string with its own copy of the original string's memory, thus allowing the |
+ * original string and any of its parents to be freed up. |
+ * |
+ * Note: This is a relatively expensive operation and should be used only when |
+ * the benefit outweighs the cost. |
+ * |
+ * @param {string} string The string to slice. |
+ * |
+ * @returns {string} An entirely new copy of the original string. |
+ */ |
+function trueSlice(string) |
+{ |
+ return JSON.parse(JSON.stringify(string)); |
+} |
+ |
+/** |
+ * Optimizes content filter text. |
+ * |
+ * @param {string} text The filter text. |
+ * @param {string} domains The domains part of the filter text. |
+ * @param {?string} [type] The type part of the filter text, either |
+ * <code>null</code> or <code>undefined</code>, or one of <code>@</code>, |
+ * <code>?</code>, and <code>$</code>. |
+ * @param {string} body The body part of the filter text. |
+ * |
+ * @returns {Array.<string>} An array containing the optimized filter text and |
+ * its optimized parts. |
+ */ |
+function optimizeContentFilterText(text, domains, type, body) |
+{ |
+ // In EasyList+AA there are a handful of filters with very long domain |
+ // strings of 1,000 characters and even up to 100,000 characters. These tend |
+ // to take up a lot of memory. We can restructure the text here to optimize |
+ // for better memory usage on V8. |
+ if (domains && domains.length >= LONG_DOMAINS_THRESHOLD) |
+ { |
+ let copy = strings.get(domains); |
+ if (copy) |
+ { |
+ // Point to the cached copy. |
+ domains = copy; |
+ |
+ // V8 tends to hold on to the parent of a sliced string even if there are |
+ // no references to it. We must "slice" out these strings properly so the |
+ // original filter text is freed up. |
+ // https://bugs.chromium.org/p/v8/issues/detail?id=2869 |
+ if (typeof type == "string") |
+ type = trueSlice(type); |
+ body = trueSlice(body); |
+ |
+ // Reconstruct the text with an optimized layout. |
+ text = domains + "#" + (type || "") + "#" + body; |
+ } |
+ else |
+ { |
+ strings.set(domains, domains); |
+ } |
+ } |
+ |
+ return [text, domains, type, body]; |
+} |
+ |
+exports.optimizeContentFilterText = optimizeContentFilterText; |
+ |
+/** |
+ * Optimizes blocking and whitelist filter text. |
+ * |
+ * @param {string} text The filter text. |
+ * @param {string} pattern The pattern part of the filter text. |
+ * @param {?string} [domains] The domains part of the filter text. |
+ * @param {?string} [sitekeys] The sitekeys part of the filter text. |
+ * @param {?string} [csp] The CSP part of the filter text. |
+ * @param {?string} [rewrite] The rewrite pattern part of the filter text. |
+ * |
+ * @returns {Array.<string>} An array containing the optimized filter text and |
+ * its optimized parts. |
+ */ |
+function optimizeRegExpFilterText(text, pattern, domains, sitekeys, csp, |
+ rewrite) |
+{ |
+ if (!sitekeys && !csp && !rewrite && |
+ domains && domains.length >= LONG_DOMAINS_THRESHOLD && |
+ text.endsWith(domains)) |
+ { |
+ let copy = strings.get(domains); |
+ if (copy) |
+ { |
+ domains = copy; |
+ |
+ text = trueSlice(text.substring(0, text.length - domains.length)); |
+ |
+ if (text[0] == "@" && text[1] == "@") |
+ pattern = text.substring(2, 2 + pattern.length); |
+ else |
+ pattern = text.substring(0, pattern.length); |
+ |
+ // Note: This must be the last operation on the text in order for this |
+ // optimization to work. Any further operations on the text may undo the |
+ // optimization. |
+ text += domains; |
+ } |
+ else |
+ { |
+ strings.set(domains, domains); |
+ } |
+ } |
+ |
+ return [text, pattern, domains, sitekeys, csp, rewrite]; |
+} |
+ |
+exports.optimizeRegExpFilterText = optimizeRegExpFilterText; |