Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Unified Diff: csv-export.js

Issue 29636585: Issue 6171 - create CSV exporter and importer for translations (Closed)
Patch Set: Added the copyright header Created Dec. 20, 2017, 5:46 p.m.
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « README.md ('k') | no next file » | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: csv-export.js
===================================================================
new file mode 100644
--- /dev/null
+++ b/csv-export.js
@@ -0,0 +1,477 @@
+/*
Thomas Greiner 2018/01/22 19:49:52 While we're waiting for the final results from the
saroyanm 2018/02/28 20:57:33 I'll address this in a separate patch. Moving this
+ * This file is part of Adblock Plus <https://adblockplus.org/>,
+ * Copyright (C) 2006-present eyeo GmbH
+ *
+ * Adblock Plus is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 3 as
+ * published by the Free Software Foundation.
+ *
+ * Adblock Plus is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+const fs = require("fs");
+const {exec} = require("child_process");
+
+const localesDir = "locale";
+const defaultLocale = "en_US";
+
+let filesNames = []; // ex.: desktop-options.json
Thomas Greiner 2018/01/22 19:49:52 Detail: Usually you'd call this variable "fileName
saroyanm 2018/02/28 20:48:26 Done.
+let locales = []; // List of all available locale codes
Thomas Greiner 2018/01/22 19:49:53 Detail: We tend to put comments in their own line
saroyanm 2018/02/28 20:48:25 Done.
+let headers = ["StringID", "Description", "Placeholders", defaultLocale];
+let outputFileName = "translations-{repo}-{hash}.csv";
Thomas Greiner 2018/01/22 19:49:53 Detail: This custom template seems redundant since
saroyanm 2018/02/28 20:48:22 We can't pass Template literals using CLI, until I
Thomas Greiner 2018/03/19 18:28:01 This is not a template literal. What I meant is t
+
+/**
+ * Export existing translation files into CSV file
+ * @param {[type]} filesFilter Optional parameter which allow include only
Thomas Greiner 2018/01/22 19:49:49 Suggestion: What I tend to do to visually differen
Thomas Greiner 2018/01/22 19:49:50 Detail: No need to include the text "Optional" bec
Thomas Greiner 2018/01/22 19:49:51 Detail: "[type]" is not a valid type. Also applie
saroyanm 2018/02/28 20:48:24 Right, it's the default value generated by DocBloc
saroyanm 2018/02/28 20:48:27 Done.
saroyanm 2018/02/28 20:48:31 Done.
+ * fileNames in the array, if ommited all files
Thomas Greiner 2018/01/22 19:49:51 Typo: Replace "ommited" with "omitted" Also appli
saroyanm 2018/02/28 20:48:30 Done.
+ * will be exported
+ */
+function exportTranslations(filesFilter)
+{
+ let mercurialCommands = [];
+ mercurialCommands.push(executeMercurial("hg id -i")); // Get Hash
+ mercurialCommands.push(executeMercurial("hg paths default")); // Get repo path
+ Promise.all(mercurialCommands).then((outputs) =>
Thomas Greiner 2018/01/22 19:49:50 You've nicely split up each step in the process in
saroyanm 2018/02/28 20:48:23 I have simplified this function, can try to simpli
Thomas Greiner 2018/03/19 18:54:12 That'd be great, thanks. The more we can disconnec
+ {
+ // Remove line endings and "+" sign from the end of the hash
Thomas Greiner 2018/01/22 19:49:49 Mercurial revision IDs are written in hexadecimal
saroyanm 2018/02/28 20:48:32 I do use "hg id -i" on my local machine and it ret
Thomas Greiner 2018/03/19 18:28:01 According to `hg id --help`: Print a summary
+ let [hash, path] = outputs.map((item) => item.replace(/\+\n|\n$/, ""));
Thomas Greiner 2018/01/22 19:49:52 Detail: Seems like you'd want to use `item.trim()`
saroyanm 2018/02/28 20:48:24 If not the "+" sign the trim would have worked her
+ // Update name of the file to be outputted
Thomas Greiner 2018/01/22 19:49:54 Typo: Replace "outputted" with "output"
saroyanm 2018/02/28 20:48:24 Done.
+ outputFileName = outputFileName.replace("{hash}", hash);
+ outputFileName = outputFileName.replace("{repo}", path.split("/").pop());
Thomas Greiner 2018/01/22 19:49:48 This code is OS-specific so let's instead use Node
saroyanm 2018/02/28 20:48:24 Done.
+
+ // Prepare to read all available locales and default files
+ let readDirectories = [];
Thomas Greiner 2018/01/22 19:49:51 Suggestion: You can avoid this temporary variable
saroyanm 2018/02/28 20:48:28 Done.
+ readDirectories.push(readDir(`${localesDir}/${defaultLocale}`));
Thomas Greiner 2018/01/22 19:49:52 This code is OS-specific so let's instead use Node
saroyanm 2018/02/28 20:48:23 Done.
+ readDirectories.push(readDir(localesDir));
+ return Promise.all(readDirectories);
+ }).then((files) =>
Thomas Greiner 2018/01/22 19:49:48 Detail: This variable is redundant because you cou
saroyanm 2018/02/28 20:48:32 I think I can't as I assigning the variables to th
Thomas Greiner 2018/03/19 18:28:01 Then you should use a different name for one of th
+ {
+ [filesNames, locales] = files;
+ // Filter files
+ if (filesFilter.length)
+ filesNames = filesNames.filter((item) => filesFilter.includes(item));
+
+ let readJsonPromises = [];
+ for(let file of filesNames)
Thomas Greiner 2018/01/22 19:49:50 Coding style: Technically, this is not a violation
saroyanm 2018/02/28 20:48:31 Done.
+ for(let locale of locales)
+ readJsonPromises.push(readJson(locale, file));
+
+ // Reading all existing translations files
+ return Promise.all(readJsonPromises);
+ }).then((fileObjects) =>
+ {
+ // Create Object tree from the Objects array, for easier search
+ // ex.: {dektop-options.json: {en_US: {...}, {de: {...}, {ru: {...}}}
+ let dataTreeObj = fileObjects.reduce((acc, fileObject) =>
Thomas Greiner 2018/01/22 19:49:54 Detail: Please use descriptive variable names beca
saroyanm 2018/02/28 20:48:29 Done.
+ {
+ if (!fileObject)
+ return acc;
+
+ let filename = fileObject.filename;
+ let locale = fileObject.locale;
Thomas Greiner 2018/01/22 19:49:50 Detail: This becomes a bit simpler when using dest
saroyanm 2018/02/28 20:48:31 Done.
+ if (!acc[filename])
+ {
+ acc[filename] = {};
+ }
+ acc[filename][locale] = fileObject.strings;
+ return acc;
+ }, {});
+
+ // Create two dimentional strings array that reflects CSV structure
Thomas Greiner 2018/01/22 19:49:49 Typo: Replace "dimentional" with "dimensional" Al
saroyanm 2018/02/28 20:48:25 Done.
+ let localesWithoutDefault = locales.filter((item) => item != defaultLocale);
Thomas Greiner 2018/01/22 19:49:54 Detail: Why do you call it "item" here when you kn
saroyanm 2018/02/28 20:48:31 Done.
+ let csvArray = [headers.concat(localesWithoutDefault)];
+ for (let file of filesNames)
Thomas Greiner 2018/01/22 19:49:53 Detail: "file" is a bit ambiguous since it could r
saroyanm 2018/02/28 20:48:23 Done.
+ {
+ csvArray.push([file]);
+ for (let stringID in dataTreeObj[file][defaultLocale])
+ {
+ let fileObj = dataTreeObj[file];
+ let stringObj = fileObj[defaultLocale][stringID];
+ let {description, message, placeholders} = stringObj;
+
+ // Use yaml-like format for easy extraction, rather sensitive char hacks
Thomas Greiner 2018/01/22 19:49:54 Instead of going with a custom syntax, we could si
saroyanm 2018/02/28 20:48:32 Done.
+ let yamlPlaceholder = "";
+ for (let placeholder in placeholders)
+ {
+ yamlPlaceholder += `${placeholder}:\n`;
+ let {content, example} = placeholders[placeholder];
+ yamlPlaceholder += ` content: ${content}\n`;
+ yamlPlaceholder += ` example: ${example}\n`;
+ }
+
+ let row = [stringID, description || "", yamlPlaceholder, message];
+ for (let locale of localesWithoutDefault)
+ {
+ let localeFileObj = fileObj[locale];
+ let isTranslated = localeFileObj && localeFileObj[stringID];
Thomas Greiner 2018/01/22 19:49:53 Detail: The name "isTranslated" implies that its v
saroyanm 2018/02/28 20:48:26 Done.
+ row.push(isTranslated ? localeFileObj[stringID].message : "");
+ }
+ csvArray.push(row);
+ }
+ }
+ arrayToCsv(csvArray); // Convert matrix to CSV
+ });
+}
+
+/**
+ * Import strings from the CSV file
+ * @param {[type]} filePath CSV file path to import from
+ */
+function importTranslations(filePath)
+{
+ readCsv(filePath).then((fileObjects) =>
+ {
+ let dataMatrix = csvToArray(fileObjects);
Thomas Greiner 2018/01/22 19:49:51 Let's investigate whether there's a Node module al
saroyanm 2018/02/28 20:48:26 Done, with finding csvToArray module. Haven't thou
+ let headers = dataMatrix.splice(0, 1)[0];
Thomas Greiner 2018/01/22 19:49:51 Detail: This is equivalent to `dataMatrix.shift()`
saroyanm 2018/02/28 20:48:30 Done.
+ let dataTreeObj = {};
+ let currentFilename = "";
+ for(let rowId in dataMatrix)
+ {
+ let row = dataMatrix[rowId];
+ let [stringId, description, placeholder] = row;
+ if (!stringId)
+ continue;
+
+ stringId = stringId.trim();
+ if (stringId.endsWith(".json")) // Check if it's the filename row
Thomas Greiner 2018/01/22 19:49:52 A note for later: We probably want to retrieve the
saroyanm 2018/02/28 20:48:22 I agree, but you are right let's first address all
+ {
+ currentFilename = stringId;
+ dataTreeObj[currentFilename] = {};
+ continue;
+ }
+
+ description = description.trim();
+ placeholder = placeholder.trim();
+ for (let i = 3; i < headers.length; i++)
Thomas Greiner 2018/01/22 19:49:50 This value depends on how many columns precede the
saroyanm 2018/02/28 20:48:25 Done.
+ {
+ let locale = headers[i].trim();
+ let message = row[i].trim();
+ if (!message)
+ continue;
+
+ // Create Object tree from the Objects array, for easier search
+ // ex.: {dektop-options.json: {en_US: {...}, {de: {...}, {ru: {...}}}
+ if (!dataTreeObj[currentFilename][locale])
+ dataTreeObj[currentFilename][locale] = {};
+
+ let localeObj = dataTreeObj[currentFilename][locale];
+ localeObj[stringId] = {};
+
+ // We keep string descriptions only in default locale files
+ if (locale == defaultLocale)
+ localeObj[stringId].description = description;
+
+ localeObj[stringId].message = message;
+ if (placeholder)
+ {
+ let placeholders = placeholder.split("\n");
+ let placeholderName = "";
+ localeObj[stringId].placeholders = placeholders.reduce((acc, item) =>
+ {
+ /*
+ Placeholders use YAML like syntax in CSV files, ex:
+ tracking:
+ content: $1
+ example: Block additional tracking
+ acceptableAds:
+ content: $2
+ example: Allow Acceptable Ads
+ */
+ if (item.startsWith(" "))
+ {
+ let [key, value] = item.trim().split(":");
+ acc[placeholderName][key] = value.trim();
+ }
+ else
+ {
+ placeholderName = item.trim().replace(":", "");
+ acc[placeholderName] = {};
+ }
+ return acc;
+ }, {});
+ }
+ }
+ }
+ writeJson(dataTreeObj);
+ });
+}
+
+/**
+ * Write locale files according to dataTreeObj which look like:
Thomas Greiner 2018/01/22 19:49:49 Detail: There's a redundant "which look like:" in
saroyanm 2018/02/28 20:48:30 Done.
+ * @param {Object} dataTreeObj which look like:
+ * {dektop-options.json: {en_US: {...}, {de: {...}, {ru: {...}}}
+ */
+function writeJson(dataTreeObj)
+{
+ for (let filename in dataTreeObj)
+ {
+ for (let locale in dataTreeObj[filename])
+ {
+ let path = `${localesDir}/${locale}/${filename}`;
+ let fileString = JSON.stringify(dataTreeObj[filename][locale], null, 2);
+ fileString += "\n"; // Newline at end of file to match Coding Style
+ fs.writeFile(path, fileString, 'utf8', (err)=>
Thomas Greiner 2018/01/22 19:49:52 Coding style: "arrow-spacing" ESLint rule violatio
Thomas Greiner 2018/01/22 19:49:54 Coding style: "Double-quoted strings (e.g. "foo")
saroyanm 2018/02/28 20:48:26 Done.
saroyanm 2018/02/28 20:48:30 Done.
+ {
+ if (!err)
Thomas Greiner 2018/01/22 19:49:54 Detail: It's usually simpler to avoid negation whe
saroyanm 2018/02/28 20:48:31 Done.
+ {
+ console.log(`Updated: ${path}`);
+ }
+ else
+ {
+ console.log(err);
Thomas Greiner 2018/01/22 19:49:53 You're outputting an error here so why not use `co
saroyanm 2018/02/28 20:48:27 Done.
+ }
+ });
+ }
+ }
+}
+
+/**
+ * Parse CSV string and return array
+ * @param {String} csvText Array to convert from
Thomas Greiner 2018/01/22 19:49:49 Detail: We're writing native types in lower-case i
saroyanm 2018/02/28 20:48:30 Done.
+ * @return {Array} two dimentional array
Thomas Greiner 2018/01/22 19:49:50 Detail: We're defining arrays using the alternativ
saroyanm 2018/02/28 20:48:30 Done.
+ */
+function csvToArray(csvText)
+{
+ let previouseChar = "";
Thomas Greiner 2018/01/22 19:49:50 Typo: Replace "previouseChar" with "previousChar".
saroyanm 2018/02/28 20:48:25 Done.
+ let row = []; // Holds parsed CSV data representing a row/line
+ let column = 0; // Pointer of the column in the row
+ let csvArray = []; // Two dimentional array that holds rows
+ let parseSpecialChars = true; // Like comma(,) and quotation(")
Thomas Greiner 2018/01/22 19:49:49 I doubt that we want to get involved into sanitizi
saroyanm 2018/02/28 20:48:23 Done.
+ for (let charIndex in csvText)
+ {
+ currentChar = csvText[charIndex];
+ if (!row[column])
+ row[column] = "";
+
+ if ('"' == currentChar)
+ {
+ // Double quote is like escaping quote char in CSV
+ if (currentChar === previouseChar && parseSpecialChars)
+ row[column] += currentChar;
+
+ parseSpecialChars = !parseSpecialChars;
+ }
+ else if (currentChar == "," && parseSpecialChars)
+ {
+ currentChar = "";
+ column++; // Update columns, because comma(,) separates columns
+ }
+ else if (currentChar == "\n" && parseSpecialChars)
+ {
+ if ("\r" === previouseChar) // In case of \r\n
+ row[column] = row[column].slice(0, -1);
+
+ csvArray.push(row);
+ // Reset pointers for the new row
+ row = [];
+ column = 0;
+ currentChar = "";
+ }
+ else
+ {
+ row[column] += currentChar;
+ }
+ previouseChar = currentChar;
+ }
+ csvArray.push(row);
+ return csvArray;
+}
+
+
+/**
+ * Convert two dimentional array to the CSV file
+ * @param {Array} csvArray Array to convert from
+ */
+function arrayToCsv(csvArray)
+{
+ let dataToWrite = "";
+ for (let row of csvArray)
+ {
+ let columnString = row.reduce((accum, col) =>
+ {
+ // Escape single quote with quote before
+ accum += `","${col.replace(/\"/g, '""')}`;
+ return accum;
+ });
+ dataToWrite += `"${columnString}"\r\n`;
+ }
+ dataToWrite += "\r\n";
+ fs.writeFile(outputFileName, dataToWrite, "utf8", function (err)
+ {
+ if (!err)
+ console.log(`${outputFileName} is created`);
+ });
+}
+
+/**
+ * Reads JSON file and assign filename and locale to it
+ * @param {String} locale ex.: "en_US", "de"...
+ * @param {String} fileName ex.: "desktop-options.json"
+ * @return {Promise} Promise object
Thomas Greiner 2018/01/22 19:49:51 Detail: This is very non-descriptive because it co
saroyanm 2018/02/28 20:48:32 Done.
+ */
+function readJson(locale, file)
Thomas Greiner 2018/01/22 19:49:51 Detail: "file" is not what you call it in the JSDo
saroyanm 2018/02/28 20:48:26 Done.
+{
+ let path = `${localesDir}/${locale}/${file}`;
Thomas Greiner 2018/01/22 19:49:49 Detail: This variable is only being used once beca
saroyanm 2018/02/28 20:48:25 We are already using path module here, so I think
Thomas Greiner 2018/03/19 18:28:01 Acknowledged.
+ return new Promise((resolve, reject) =>
+ {
+ fs.readFile(path, (err, data) => {
Thomas Greiner 2018/01/22 19:49:52 Coding style: "Opening braces always go on their o
saroyanm 2018/02/28 20:48:27 Done.
+ if (err)
+ {
+ reject(err);
+ }
+ else
+ {
+ let json = {};
+ json.filename = file;
+ json.locale = locale;
+ json.strings = JSON.parse(data);
+ resolve(json);
Thomas Greiner 2018/01/22 19:49:50 Detail: Let's simplify this a bit and avoid this t
saroyanm 2018/02/28 20:48:32 Done.
+ }
+ });
+ }).catch(reason => // Continue Promise.All even if rejected.
Thomas Greiner 2018/01/22 19:49:52 This function should not be aware of where it's ca
saroyanm 2018/02/28 20:48:24 Not sure if it's yet necessary, while we are not o
+ {
+ // Commented out log not to spam the output.
+ // TODO: Think about more meaningful output without spaming
+ // console.log(`Reading ${path} was rejected: ${reason}`);
Thomas Greiner 2018/01/22 19:49:49 Coding style: "Don't leave debug printfs or dumps
saroyanm 2018/02/28 20:48:28 Done.
+ });
+}
+
+/**
+ * Reads CSV file
+ * @param {String} file path
+ * @return {Promise} Promise object
+ */
+function readCsv(filePath)
Thomas Greiner 2018/01/22 19:49:51 This name is a bit misleading since it reads any f
saroyanm 2018/02/28 20:48:28 Done.
+{
+ return new Promise((resolve, reject) =>
+ {
+ fs.readFile(filePath, "utf8", (err, data) => {
+ if (err)
+ reject(err);
+ else
+ resolve(data);
+ });
+ });
+}
+
+/**
+ * Read files and folder names inside of the directory
+ * @param {String} dir patch of the folder
Thomas Greiner 2018/01/22 19:49:53 Typo: Replace "patch" with "path"
saroyanm 2018/02/28 20:48:25 Done.
+ * @return {Promise} Promise object
+ */
+function readDir(dir)
+{
+ return new Promise((resolve, reject) =>
+ {
+ fs.readdir(dir, (err, folders) => {
+ if (err)
+ reject(err);
+ else
+ resolve(folders);
+ });
+ });
+}
+
+/**
+ * Executing mercurial commands on the system level
+ * @param {String} command mercurial command ex.:"hg ..."
+ * @return {Promise} Promise object containing output from the command
+ */
+function executeMercurial(command)
+{
+ // Limit only to Mercurial commands to minimize the missuse risk
+ if (command.substring(0, 3) !== "hg ")
+ {
+ console.error("You are only allowed to run Mercurial commands('hg ...')");
Thomas Greiner 2018/01/22 19:49:53 Why do you allow to pass arbitrary commands if you
saroyanm 2018/02/28 20:48:30 Agree, done.
+ return;
+ }
+
+ return new Promise((resolve, reject) =>
+ {
+ exec(command, (err, output) =>
+ {
+ if (err)
+ reject(err);
+ else
+ resolve(output);
+ });
+ });
+}
+
+// CLI
+let helpText = `
+About: This script exports locales into .csv format
Thomas Greiner 2018/01/22 19:49:48 It can also export locales into JSON format so why
saroyanm 2018/02/28 20:48:27 Done.
+Usage: node csv-export.js [option] [argument]
Thomas Greiner 2018/01/22 19:49:51 Detail: AFAIK it's not common to prefix scripts wi
saroyanm 2018/02/28 20:48:24 Done.
+Options:
+ -f Name of the files to be exported ex.: -f firstRun.json
Thomas Greiner 2018/01/22 19:49:49 Only writing "-f" makes it look like it's a binary
saroyanm 2018/02/28 20:48:27 Done.
+ option can be used multiple timeString.
Thomas Greiner 2018/01/22 19:49:49 Typo: Replace "timeString" with "times".
saroyanm 2018/02/28 20:48:31 Done.
+ If ommited all files are being exported
+
+ -o Output filename ex.:
+ -f firstRun.json -o {hash}-firstRun.csv
+ Placeholders:
+ {hash} - Mercurial current revision hash
+ {repo} - Name of the "Default" repository
Thomas Greiner 2018/01/22 19:49:50 I wouldn't know what "default repository" refers t
saroyanm 2018/02/28 20:48:27 It's the name of the repository set as default in
Thomas Greiner 2018/03/19 18:28:01 Then why not add that to the description? For exam
+ If ommited the output fileName is set to
+ translations-{repo}-{hash}.csv
+
+ -i Import file path ex: -i issue-reporter.csv
Thomas Greiner 2018/01/22 19:49:51 From what I see, there are two parameters for spec
saroyanm 2018/02/28 20:48:29 It's suppose to be used with "-f" or without, but
Thomas Greiner 2018/03/19 18:28:01 Ok, let's tackle this separately.
+`;
+
+let arguments = process.argv.slice(2);
+let stopExportScript = false;
+let filesFilter = []; // Filter to be used export to the fileNames inside
+
+for (let i = 0; i < arguments.length; i++)
+{
+ switch (arguments[i])
+ {
+ case "-h":
+ console.log(helpText);
+ stopExportScript = true;
+ break;
+ case "-f":
+ if (!arguments[i + 1]) // check if argument following option is specified
+ {
+ console.error("Please specify the input filename");
+ stopExportScript = true;
Thomas Greiner 2018/01/22 19:49:52 In Node you can stop the process via `process.exit
saroyanm 2018/02/28 20:48:23 Done, I think the code will become clearer if I se
+ }
+ else
+ {
+ filesFilter.push(arguments[i + 1]);
+ }
+ break;
+ case "-o":
+ if (!arguments[i + 1])
+ {
+ console.error("Please specify the output filename");
+ stopExportScript = true;
+ }
+ else
+ {
+ outputFileName = arguments[i + 1];
+ }
+ break;
+ case "-i":
+ if (!arguments[i + 1])
+ {
+ console.error("Please specify the input filename");
+ }
+ else
+ {
+ let importFile = arguments[i + 1];
+ importTranslations(importFile);
+ }
+ stopExportScript = true;
+ break;
+ }
+}
+
+if (!stopExportScript)
+ exportTranslations(filesFilter);
« no previous file with comments | « README.md ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld