Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Unified Diff: lib/filterText.js

Issue 29909576: [experiment] Issue 7045 - Optimize V8 memory layout of filter text Base URL: https://hg.adblockplus.org/adblockpluscore/
Patch Set: Remove check in lib/filterClasses.js Created Oct. 15, 2018, 9:25 p.m.
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « lib/filterClasses.js ('k') | test/filterText.js » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: lib/filterText.js
===================================================================
new file mode 100644
--- /dev/null
+++ b/lib/filterText.js
@@ -0,0 +1,158 @@
+/*
+ * This file is part of Adblock Plus <https://adblockplus.org/>,
+ * Copyright (C) 2006-present eyeo GmbH
+ *
+ * Adblock Plus is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 3 as
+ * published by the Free Software Foundation.
+ *
+ * Adblock Plus is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+"use strict";
+
+/**
+ * @fileOverview Filter text operations.
+ */
+
+// The memory optimizations in this file work as of V8 7.1.314.
+
+/**
+ * The minimum length of a domain string for it to be considered long enough to
+ * be eligible for optimization.
+ * @type {number}
+ */
+const LONG_DOMAINS_THRESHOLD = 1000;
+
+/**
+ * Cached strings.
+ * @type {Map.<string,string>}
+ */
+let strings = new Map();
+
+/**
+ * Slices a string out of its internal parent string, thus allowing the memory
+ * occupied by the parent string to be freed up.
+ *
+ * JavaScript engines like V8 tend to hold on to the parent of a sliced string
+ * even if there are no references to the parent string. While this
+ * optimization is enormously beneficial in general, in certain cases it has
+ * the opposite effect of needlessly holding on to large amounts of unused
+ * memory (V8 issue #2869). This operation creates an entirely new internal
+ * string with its own copy of the original string's memory, thus allowing the
+ * original string and any of its parents to be freed up.
+ *
+ * Note: This is a relatively expensive operation and should be used only when
+ * the benefit outweighs the cost.
+ *
+ * @param {string} string The string to slice.
+ *
+ * @returns {string} An entirely new copy of the original string.
+ */
+function trueSlice(string)
+{
+ return JSON.parse(JSON.stringify(string));
+}
+
+/**
+ * Optimizes content filter text.
+ *
+ * @param {string} text The filter text.
+ * @param {string} domains The domains part of the filter text.
+ * @param {?string} [type] The type part of the filter text, either
+ * <code>null</code> or <code>undefined</code>, or one of <code>@</code>,
+ * <code>?</code>, and <code>$</code>.
+ * @param {string} body The body part of the filter text.
+ *
+ * @returns {Array.<string>} An array containing the optimized filter text and
+ * its optimized parts.
+ */
+function optimizeContentFilterText(text, domains, type, body)
+{
+ // In EasyList+AA there are a handful of filters with very long domain
+ // strings of 1,000 characters and even up to 100,000 characters. These tend
+ // to take up a lot of memory. We can restructure the text here to optimize
+ // for better memory usage on V8.
+ if (domains && domains.length >= LONG_DOMAINS_THRESHOLD)
+ {
+ let copy = strings.get(domains);
+ if (copy)
+ {
+ // Point to the cached copy.
+ domains = copy;
+
+ // V8 tends to hold on to the parent of a sliced string even if there are
+ // no references to it. We must "slice" out these strings properly so the
+ // original filter text is freed up.
+ // https://bugs.chromium.org/p/v8/issues/detail?id=2869
+ if (typeof type == "string")
+ type = trueSlice(type);
+ body = trueSlice(body);
+
+ // Reconstruct the text with an optimized layout.
+ text = domains + "#" + (type || "") + "#" + body;
+ }
+ else
+ {
+ strings.set(domains, domains);
+ }
+ }
+
+ return [text, domains, type, body];
+}
+
+exports.optimizeContentFilterText = optimizeContentFilterText;
+
+/**
+ * Optimizes blocking and whitelist filter text.
+ *
+ * @param {string} text The filter text.
+ * @param {string} pattern The pattern part of the filter text.
+ * @param {?string} [domains] The domains part of the filter text.
+ * @param {?string} [sitekeys] The sitekeys part of the filter text.
+ * @param {?string} [csp] The CSP part of the filter text.
+ * @param {?string} [rewrite] The rewrite pattern part of the filter text.
+ *
+ * @returns {Array.<string>} An array containing the optimized filter text and
+ * its optimized parts.
+ */
+function optimizeRegExpFilterText(text, pattern, domains, sitekeys, csp,
+ rewrite)
+{
+ if (!sitekeys && !csp && !rewrite &&
+ domains && domains.length >= LONG_DOMAINS_THRESHOLD &&
+ text.endsWith(domains))
+ {
+ let copy = strings.get(domains);
+ if (copy)
+ {
+ domains = copy;
+
+ text = trueSlice(text.substring(0, text.length - domains.length));
+
+ if (text[0] == "@" && text[1] == "@")
+ pattern = text.substring(2, 2 + pattern.length);
+ else
+ pattern = text.substring(0, pattern.length);
+
+ // Note: This must be the last operation on the text in order for this
+ // optimization to work. Any further operations on the text may undo the
+ // optimization.
+ text += domains;
+ }
+ else
+ {
+ strings.set(domains, domains);
+ }
+ }
+
+ return [text, pattern, domains, sitekeys, csp, rewrite];
+}
+
+exports.optimizeRegExpFilterText = optimizeRegExpFilterText;
« no previous file with comments | « lib/filterClasses.js ('k') | test/filterText.js » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld