lib/filterText.js - Issue 29909576: [experiment] Issue 7045 - Optimize V8 memory layout of filter text

Unified Diff: lib/filterText.js

Issue 29909576: [experiment] Issue 7045 - Optimize V8 memory layout of filter text Base URL: https://hg.adblockplus.org/adblockpluscore/

Patch Set: Created Oct. 15, 2018, 5:04 a.m.

Use n/p to move between diff chunks; N/P to move between comments.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: lib/filterText.js

===================================================================

new file mode 100644

--- /dev/null

+++ b/lib/filterText.js

@@ -0,0 +1,158 @@

+/*

+ * This file is part of Adblock Plus <https://adblockplus.org/>,

+ *

+ * Adblock Plus is free software: you can redistribute it and/or modify

+ * it under the terms of the GNU General Public License version 3 as

+ * published by the Free Software Foundation.

+ *

+ * Adblock Plus is distributed in the hope that it will be useful,

+ * but WITHOUT ANY WARRANTY; without even the implied warranty of

+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

+ * GNU General Public License for more details.

+ *

+ * You should have received a copy of the GNU General Public License

+ * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.

+ */

+"use strict";

+/**

+ * @fileOverview Filter text operations.

+ */

+// The memory optimizations in this file work as of V8 7.1.314.

+/**

+ * The minimum length of a domain string for it to be considered long enough to

+ * be eligible for optimization.

+ * @type {number}

+ */

+const LONG_DOMAINS_THRESHOLD = 1000;

+/**

+ * Cached strings.

+ * @type {Map.<string,string>}

+ */

+let strings = new Map();

+/**

+ * Slices a string out of its internal parent string, thus allowing the memory

+ * occupied by the parent string to be freed up.

+ *

+ * JavaScript engines like V8 tend to hold on to the parent of a sliced string

+ * even if there are no references to the parent string. While this

+ * optimization is enormously beneficial in general, in certain cases it has

+ * the opposite effect of needlessly holding on to large amounts of unused

+ * memory (V8 issue #2869). This operation creates an entirely new internal

+ * string with its own copy of the original string's memory, thus allowing the

+ * original string and any of its parents to be freed up.

+ *

+ * Note: This is a relatively expensive operation and should be used only when

+ * the benefit outweighs the cost.

+ *

+ * @param {string} string The string to slice.

+ *

+ * @returns {string} An entirely new copy of the original string.

+ */

+function trueSlice(string)

+ return JSON.parse(JSON.stringify(string));

+/**

+ * Optimizes content filter text.

+ *

+ * @param {string} text The filter text.

+ * @param {string} domains The domains part of the filter text.

+ * @param {?string} [type] The type part of the filter text, either

+ * <code>null</code> or <code>undefined</code>, or one of <code>@</code>,

+ * <code>?</code>, and <code>$</code>.

+ * @param {string} body The body part of the filter text.

+ *

+ * @returns {Array.<string>} An array containing the optimized filter text and

+ * its optimized parts.

+ */

+function optimizeContentFilterText(text, domains, type, body)

+ // In EasyList+AA there are a handful of filters with very long domain

+ // strings of 1,000 characters and even up to 100,000 characters. These tend

+ // to take up a lot of memory. We can restructure the text here to optimize

+ // for better memory usage on V8.

+ if (domains && domains.length >= LONG_DOMAINS_THRESHOLD)

+ {

+ let copy = strings.get(domains);

+ if (copy)

+ {

+ // Point to the cached copy.

+ domains = copy;

+ // V8 tends to hold on to the parent of a sliced string even if there are

+ // no references to it. We must "slice" out these strings properly so the

+ // original filter text is freed up.

+ // https://bugs.chromium.org/p/v8/issues/detail?id=2869

+ if (typeof type == "string")

+ type = trueSlice(type);

+ body = trueSlice(body);

+ // Reconstruct the text with an optimized layout.

+ text = domains + "#" + (type || "") + "#" + body;

+ }

+ else

+ {

+ strings.set(domains, domains);

+ }

+ return [text, domains, type, body];

+exports.optimizeContentFilterText = optimizeContentFilterText;

+/**

+ * Optimizes blocking and whitelist filter text.

+ *

+ * @param {string} text The filter text.

+ * @param {string} pattern The pattern part of the filter text.

+ * @param {?string} [domains] The domains part of the filter text.

+ * @param {?string} [sitekeys] The sitekeys part of the filter text.

+ * @param {?string} [csp] The CSP part of the filter text.

+ * @param {?string} [rewrite] The rewrite pattern part of the filter text.

+ *

+ * @returns {Array.<string>} An array containing the optimized filter text and

+ * its optimized parts.

+ */

+function optimizeRegExpFilterText(text, pattern, domains, sitekeys, csp,

+ rewrite)

+ if (!sitekeys && !csp && !rewrite &&

+ domains && domains.length >= LONG_DOMAINS_THRESHOLD &&

+ text.endsWith(domains))

+ {

+ let copy = strings.get(domains);

+ if (copy)

+ {

+ domains = copy;

+ text = trueSlice(text.substring(0, text.length - domains.length));

+ if (text[0] == "@" && text[1] == "@")

+ pattern = text.substring(2, 2 + pattern.length);

+ else

+ pattern = text.substring(0, pattern.length);

+ // Note: This must be the last operation on the text in order for this

+ // optimization to work. Any further operations on the text may undo the

+ // optimization.

+ text += domains;

+ }

+ else

+ {

+ strings.set(domains, domains);

+ }

+ return [text, pattern, domains, sitekeys, csp, rewrite];

+exports.optimizeRegExpFilterText = optimizeRegExpFilterText;

« no previous file with comments | « lib/filterClasses.js ('k') | test/filterText.js » ('j') | no next file with comments »