Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Side by Side Diff: csv-export.js

Issue 29636585: Issue 6171 - create CSV exporter and importer for translations (Closed)
Patch Set: Added the copyright header Created Dec. 20, 2017, 5:46 p.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View unified diff | Download patch
« no previous file with comments | « README.md ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 /*
Thomas Greiner 2018/01/22 19:49:52 While we're waiting for the final results from the
saroyanm 2018/02/28 20:57:33 I'll address this in a separate patch. Moving this
2 * This file is part of Adblock Plus <https://adblockplus.org/>,
3 * Copyright (C) 2006-present eyeo GmbH
4 *
5 * Adblock Plus is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 3 as
7 * published by the Free Software Foundation.
8 *
9 * Adblock Plus is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.
16 */
17
18 const fs = require("fs");
19 const {exec} = require("child_process");
20
21 const localesDir = "locale";
22 const defaultLocale = "en_US";
23
24 let filesNames = []; // ex.: desktop-options.json
Thomas Greiner 2018/01/22 19:49:52 Detail: Usually you'd call this variable "fileName
saroyanm 2018/02/28 20:48:26 Done.
25 let locales = []; // List of all available locale codes
Thomas Greiner 2018/01/22 19:49:53 Detail: We tend to put comments in their own line
saroyanm 2018/02/28 20:48:25 Done.
26 let headers = ["StringID", "Description", "Placeholders", defaultLocale];
27 let outputFileName = "translations-{repo}-{hash}.csv";
Thomas Greiner 2018/01/22 19:49:53 Detail: This custom template seems redundant since
saroyanm 2018/02/28 20:48:22 We can't pass Template literals using CLI, until I
Thomas Greiner 2018/03/19 18:28:01 This is not a template literal. What I meant is t
28
29 /**
30 * Export existing translation files into CSV file
31 * @param {[type]} filesFilter Optional parameter which allow include only
Thomas Greiner 2018/01/22 19:49:49 Suggestion: What I tend to do to visually differen
Thomas Greiner 2018/01/22 19:49:50 Detail: No need to include the text "Optional" bec
Thomas Greiner 2018/01/22 19:49:51 Detail: "[type]" is not a valid type. Also applie
saroyanm 2018/02/28 20:48:24 Right, it's the default value generated by DocBloc
saroyanm 2018/02/28 20:48:27 Done.
saroyanm 2018/02/28 20:48:31 Done.
32 * fileNames in the array, if ommited all files
Thomas Greiner 2018/01/22 19:49:51 Typo: Replace "ommited" with "omitted" Also appli
saroyanm 2018/02/28 20:48:30 Done.
33 * will be exported
34 */
35 function exportTranslations(filesFilter)
36 {
37 let mercurialCommands = [];
38 mercurialCommands.push(executeMercurial("hg id -i")); // Get Hash
39 mercurialCommands.push(executeMercurial("hg paths default")); // Get repo path
40 Promise.all(mercurialCommands).then((outputs) =>
Thomas Greiner 2018/01/22 19:49:50 You've nicely split up each step in the process in
saroyanm 2018/02/28 20:48:23 I have simplified this function, can try to simpli
Thomas Greiner 2018/03/19 18:54:12 That'd be great, thanks. The more we can disconnec
41 {
42 // Remove line endings and "+" sign from the end of the hash
Thomas Greiner 2018/01/22 19:49:49 Mercurial revision IDs are written in hexadecimal
saroyanm 2018/02/28 20:48:32 I do use "hg id -i" on my local machine and it ret
Thomas Greiner 2018/03/19 18:28:01 According to `hg id --help`: Print a summary
43 let [hash, path] = outputs.map((item) => item.replace(/\+\n|\n$/, ""));
Thomas Greiner 2018/01/22 19:49:52 Detail: Seems like you'd want to use `item.trim()`
saroyanm 2018/02/28 20:48:24 If not the "+" sign the trim would have worked her
44 // Update name of the file to be outputted
Thomas Greiner 2018/01/22 19:49:54 Typo: Replace "outputted" with "output"
saroyanm 2018/02/28 20:48:24 Done.
45 outputFileName = outputFileName.replace("{hash}", hash);
46 outputFileName = outputFileName.replace("{repo}", path.split("/").pop());
Thomas Greiner 2018/01/22 19:49:48 This code is OS-specific so let's instead use Node
saroyanm 2018/02/28 20:48:24 Done.
47
48 // Prepare to read all available locales and default files
49 let readDirectories = [];
Thomas Greiner 2018/01/22 19:49:51 Suggestion: You can avoid this temporary variable
saroyanm 2018/02/28 20:48:28 Done.
50 readDirectories.push(readDir(`${localesDir}/${defaultLocale}`));
Thomas Greiner 2018/01/22 19:49:52 This code is OS-specific so let's instead use Node
saroyanm 2018/02/28 20:48:23 Done.
51 readDirectories.push(readDir(localesDir));
52 return Promise.all(readDirectories);
53 }).then((files) =>
Thomas Greiner 2018/01/22 19:49:48 Detail: This variable is redundant because you cou
saroyanm 2018/02/28 20:48:32 I think I can't as I assigning the variables to th
Thomas Greiner 2018/03/19 18:28:01 Then you should use a different name for one of th
54 {
55 [filesNames, locales] = files;
56 // Filter files
57 if (filesFilter.length)
58 filesNames = filesNames.filter((item) => filesFilter.includes(item));
59
60 let readJsonPromises = [];
61 for(let file of filesNames)
Thomas Greiner 2018/01/22 19:49:50 Coding style: Technically, this is not a violation
saroyanm 2018/02/28 20:48:31 Done.
62 for(let locale of locales)
63 readJsonPromises.push(readJson(locale, file));
64
65 // Reading all existing translations files
66 return Promise.all(readJsonPromises);
67 }).then((fileObjects) =>
68 {
69 // Create Object tree from the Objects array, for easier search
70 // ex.: {dektop-options.json: {en_US: {...}, {de: {...}, {ru: {...}}}
71 let dataTreeObj = fileObjects.reduce((acc, fileObject) =>
Thomas Greiner 2018/01/22 19:49:54 Detail: Please use descriptive variable names beca
saroyanm 2018/02/28 20:48:29 Done.
72 {
73 if (!fileObject)
74 return acc;
75
76 let filename = fileObject.filename;
77 let locale = fileObject.locale;
Thomas Greiner 2018/01/22 19:49:50 Detail: This becomes a bit simpler when using dest
saroyanm 2018/02/28 20:48:31 Done.
78 if (!acc[filename])
79 {
80 acc[filename] = {};
81 }
82 acc[filename][locale] = fileObject.strings;
83 return acc;
84 }, {});
85
86 // Create two dimentional strings array that reflects CSV structure
Thomas Greiner 2018/01/22 19:49:49 Typo: Replace "dimentional" with "dimensional" Al
saroyanm 2018/02/28 20:48:25 Done.
87 let localesWithoutDefault = locales.filter((item) => item != defaultLocale);
Thomas Greiner 2018/01/22 19:49:54 Detail: Why do you call it "item" here when you kn
saroyanm 2018/02/28 20:48:31 Done.
88 let csvArray = [headers.concat(localesWithoutDefault)];
89 for (let file of filesNames)
Thomas Greiner 2018/01/22 19:49:53 Detail: "file" is a bit ambiguous since it could r
saroyanm 2018/02/28 20:48:23 Done.
90 {
91 csvArray.push([file]);
92 for (let stringID in dataTreeObj[file][defaultLocale])
93 {
94 let fileObj = dataTreeObj[file];
95 let stringObj = fileObj[defaultLocale][stringID];
96 let {description, message, placeholders} = stringObj;
97
98 // Use yaml-like format for easy extraction, rather sensitive char hacks
Thomas Greiner 2018/01/22 19:49:54 Instead of going with a custom syntax, we could si
saroyanm 2018/02/28 20:48:32 Done.
99 let yamlPlaceholder = "";
100 for (let placeholder in placeholders)
101 {
102 yamlPlaceholder += `${placeholder}:\n`;
103 let {content, example} = placeholders[placeholder];
104 yamlPlaceholder += ` content: ${content}\n`;
105 yamlPlaceholder += ` example: ${example}\n`;
106 }
107
108 let row = [stringID, description || "", yamlPlaceholder, message];
109 for (let locale of localesWithoutDefault)
110 {
111 let localeFileObj = fileObj[locale];
112 let isTranslated = localeFileObj && localeFileObj[stringID];
Thomas Greiner 2018/01/22 19:49:53 Detail: The name "isTranslated" implies that its v
saroyanm 2018/02/28 20:48:26 Done.
113 row.push(isTranslated ? localeFileObj[stringID].message : "");
114 }
115 csvArray.push(row);
116 }
117 }
118 arrayToCsv(csvArray); // Convert matrix to CSV
119 });
120 }
121
122 /**
123 * Import strings from the CSV file
124 * @param {[type]} filePath CSV file path to import from
125 */
126 function importTranslations(filePath)
127 {
128 readCsv(filePath).then((fileObjects) =>
129 {
130 let dataMatrix = csvToArray(fileObjects);
Thomas Greiner 2018/01/22 19:49:51 Let's investigate whether there's a Node module al
saroyanm 2018/02/28 20:48:26 Done, with finding csvToArray module. Haven't thou
131 let headers = dataMatrix.splice(0, 1)[0];
Thomas Greiner 2018/01/22 19:49:51 Detail: This is equivalent to `dataMatrix.shift()`
saroyanm 2018/02/28 20:48:30 Done.
132 let dataTreeObj = {};
133 let currentFilename = "";
134 for(let rowId in dataMatrix)
135 {
136 let row = dataMatrix[rowId];
137 let [stringId, description, placeholder] = row;
138 if (!stringId)
139 continue;
140
141 stringId = stringId.trim();
142 if (stringId.endsWith(".json")) // Check if it's the filename row
Thomas Greiner 2018/01/22 19:49:52 A note for later: We probably want to retrieve the
saroyanm 2018/02/28 20:48:22 I agree, but you are right let's first address all
143 {
144 currentFilename = stringId;
145 dataTreeObj[currentFilename] = {};
146 continue;
147 }
148
149 description = description.trim();
150 placeholder = placeholder.trim();
151 for (let i = 3; i < headers.length; i++)
Thomas Greiner 2018/01/22 19:49:50 This value depends on how many columns precede the
saroyanm 2018/02/28 20:48:25 Done.
152 {
153 let locale = headers[i].trim();
154 let message = row[i].trim();
155 if (!message)
156 continue;
157
158 // Create Object tree from the Objects array, for easier search
159 // ex.: {dektop-options.json: {en_US: {...}, {de: {...}, {ru: {...}}}
160 if (!dataTreeObj[currentFilename][locale])
161 dataTreeObj[currentFilename][locale] = {};
162
163 let localeObj = dataTreeObj[currentFilename][locale];
164 localeObj[stringId] = {};
165
166 // We keep string descriptions only in default locale files
167 if (locale == defaultLocale)
168 localeObj[stringId].description = description;
169
170 localeObj[stringId].message = message;
171 if (placeholder)
172 {
173 let placeholders = placeholder.split("\n");
174 let placeholderName = "";
175 localeObj[stringId].placeholders = placeholders.reduce((acc, item) =>
176 {
177 /*
178 Placeholders use YAML like syntax in CSV files, ex:
179 tracking:
180 content: $1
181 example: Block additional tracking
182 acceptableAds:
183 content: $2
184 example: Allow Acceptable Ads
185 */
186 if (item.startsWith(" "))
187 {
188 let [key, value] = item.trim().split(":");
189 acc[placeholderName][key] = value.trim();
190 }
191 else
192 {
193 placeholderName = item.trim().replace(":", "");
194 acc[placeholderName] = {};
195 }
196 return acc;
197 }, {});
198 }
199 }
200 }
201 writeJson(dataTreeObj);
202 });
203 }
204
205 /**
206 * Write locale files according to dataTreeObj which look like:
Thomas Greiner 2018/01/22 19:49:49 Detail: There's a redundant "which look like:" in
saroyanm 2018/02/28 20:48:30 Done.
207 * @param {Object} dataTreeObj which look like:
208 * {dektop-options.json: {en_US: {...}, {de: {...}, {ru: {...}}}
209 */
210 function writeJson(dataTreeObj)
211 {
212 for (let filename in dataTreeObj)
213 {
214 for (let locale in dataTreeObj[filename])
215 {
216 let path = `${localesDir}/${locale}/${filename}`;
217 let fileString = JSON.stringify(dataTreeObj[filename][locale], null, 2);
218 fileString += "\n"; // Newline at end of file to match Coding Style
219 fs.writeFile(path, fileString, 'utf8', (err)=>
Thomas Greiner 2018/01/22 19:49:52 Coding style: "arrow-spacing" ESLint rule violatio
Thomas Greiner 2018/01/22 19:49:54 Coding style: "Double-quoted strings (e.g. "foo")
saroyanm 2018/02/28 20:48:26 Done.
saroyanm 2018/02/28 20:48:30 Done.
220 {
221 if (!err)
Thomas Greiner 2018/01/22 19:49:54 Detail: It's usually simpler to avoid negation whe
saroyanm 2018/02/28 20:48:31 Done.
222 {
223 console.log(`Updated: ${path}`);
224 }
225 else
226 {
227 console.log(err);
Thomas Greiner 2018/01/22 19:49:53 You're outputting an error here so why not use `co
saroyanm 2018/02/28 20:48:27 Done.
228 }
229 });
230 }
231 }
232 }
233
234 /**
235 * Parse CSV string and return array
236 * @param {String} csvText Array to convert from
Thomas Greiner 2018/01/22 19:49:49 Detail: We're writing native types in lower-case i
saroyanm 2018/02/28 20:48:30 Done.
237 * @return {Array} two dimentional array
Thomas Greiner 2018/01/22 19:49:50 Detail: We're defining arrays using the alternativ
saroyanm 2018/02/28 20:48:30 Done.
238 */
239 function csvToArray(csvText)
240 {
241 let previouseChar = "";
Thomas Greiner 2018/01/22 19:49:50 Typo: Replace "previouseChar" with "previousChar".
saroyanm 2018/02/28 20:48:25 Done.
242 let row = []; // Holds parsed CSV data representing a row/line
243 let column = 0; // Pointer of the column in the row
244 let csvArray = []; // Two dimentional array that holds rows
245 let parseSpecialChars = true; // Like comma(,) and quotation(")
Thomas Greiner 2018/01/22 19:49:49 I doubt that we want to get involved into sanitizi
saroyanm 2018/02/28 20:48:23 Done.
246 for (let charIndex in csvText)
247 {
248 currentChar = csvText[charIndex];
249 if (!row[column])
250 row[column] = "";
251
252 if ('"' == currentChar)
253 {
254 // Double quote is like escaping quote char in CSV
255 if (currentChar === previouseChar && parseSpecialChars)
256 row[column] += currentChar;
257
258 parseSpecialChars = !parseSpecialChars;
259 }
260 else if (currentChar == "," && parseSpecialChars)
261 {
262 currentChar = "";
263 column++; // Update columns, because comma(,) separates columns
264 }
265 else if (currentChar == "\n" && parseSpecialChars)
266 {
267 if ("\r" === previouseChar) // In case of \r\n
268 row[column] = row[column].slice(0, -1);
269
270 csvArray.push(row);
271 // Reset pointers for the new row
272 row = [];
273 column = 0;
274 currentChar = "";
275 }
276 else
277 {
278 row[column] += currentChar;
279 }
280 previouseChar = currentChar;
281 }
282 csvArray.push(row);
283 return csvArray;
284 }
285
286
287 /**
288 * Convert two dimentional array to the CSV file
289 * @param {Array} csvArray Array to convert from
290 */
291 function arrayToCsv(csvArray)
292 {
293 let dataToWrite = "";
294 for (let row of csvArray)
295 {
296 let columnString = row.reduce((accum, col) =>
297 {
298 // Escape single quote with quote before
299 accum += `","${col.replace(/\"/g, '""')}`;
300 return accum;
301 });
302 dataToWrite += `"${columnString}"\r\n`;
303 }
304 dataToWrite += "\r\n";
305 fs.writeFile(outputFileName, dataToWrite, "utf8", function (err)
306 {
307 if (!err)
308 console.log(`${outputFileName} is created`);
309 });
310 }
311
312 /**
313 * Reads JSON file and assign filename and locale to it
314 * @param {String} locale ex.: "en_US", "de"...
315 * @param {String} fileName ex.: "desktop-options.json"
316 * @return {Promise} Promise object
Thomas Greiner 2018/01/22 19:49:51 Detail: This is very non-descriptive because it co
saroyanm 2018/02/28 20:48:32 Done.
317 */
318 function readJson(locale, file)
Thomas Greiner 2018/01/22 19:49:51 Detail: "file" is not what you call it in the JSDo
saroyanm 2018/02/28 20:48:26 Done.
319 {
320 let path = `${localesDir}/${locale}/${file}`;
Thomas Greiner 2018/01/22 19:49:49 Detail: This variable is only being used once beca
saroyanm 2018/02/28 20:48:25 We are already using path module here, so I think
Thomas Greiner 2018/03/19 18:28:01 Acknowledged.
321 return new Promise((resolve, reject) =>
322 {
323 fs.readFile(path, (err, data) => {
Thomas Greiner 2018/01/22 19:49:52 Coding style: "Opening braces always go on their o
saroyanm 2018/02/28 20:48:27 Done.
324 if (err)
325 {
326 reject(err);
327 }
328 else
329 {
330 let json = {};
331 json.filename = file;
332 json.locale = locale;
333 json.strings = JSON.parse(data);
334 resolve(json);
Thomas Greiner 2018/01/22 19:49:50 Detail: Let's simplify this a bit and avoid this t
saroyanm 2018/02/28 20:48:32 Done.
335 }
336 });
337 }).catch(reason => // Continue Promise.All even if rejected.
Thomas Greiner 2018/01/22 19:49:52 This function should not be aware of where it's ca
saroyanm 2018/02/28 20:48:24 Not sure if it's yet necessary, while we are not o
338 {
339 // Commented out log not to spam the output.
340 // TODO: Think about more meaningful output without spaming
341 // console.log(`Reading ${path} was rejected: ${reason}`);
Thomas Greiner 2018/01/22 19:49:49 Coding style: "Don't leave debug printfs or dumps
saroyanm 2018/02/28 20:48:28 Done.
342 });
343 }
344
345 /**
346 * Reads CSV file
347 * @param {String} file path
348 * @return {Promise} Promise object
349 */
350 function readCsv(filePath)
Thomas Greiner 2018/01/22 19:49:51 This name is a bit misleading since it reads any f
saroyanm 2018/02/28 20:48:28 Done.
351 {
352 return new Promise((resolve, reject) =>
353 {
354 fs.readFile(filePath, "utf8", (err, data) => {
355 if (err)
356 reject(err);
357 else
358 resolve(data);
359 });
360 });
361 }
362
363 /**
364 * Read files and folder names inside of the directory
365 * @param {String} dir patch of the folder
Thomas Greiner 2018/01/22 19:49:53 Typo: Replace "patch" with "path"
saroyanm 2018/02/28 20:48:25 Done.
366 * @return {Promise} Promise object
367 */
368 function readDir(dir)
369 {
370 return new Promise((resolve, reject) =>
371 {
372 fs.readdir(dir, (err, folders) => {
373 if (err)
374 reject(err);
375 else
376 resolve(folders);
377 });
378 });
379 }
380
381 /**
382 * Executing mercurial commands on the system level
383 * @param {String} command mercurial command ex.:"hg ..."
384 * @return {Promise} Promise object containing output from the command
385 */
386 function executeMercurial(command)
387 {
388 // Limit only to Mercurial commands to minimize the missuse risk
389 if (command.substring(0, 3) !== "hg ")
390 {
391 console.error("You are only allowed to run Mercurial commands('hg ...')");
Thomas Greiner 2018/01/22 19:49:53 Why do you allow to pass arbitrary commands if you
saroyanm 2018/02/28 20:48:30 Agree, done.
392 return;
393 }
394
395 return new Promise((resolve, reject) =>
396 {
397 exec(command, (err, output) =>
398 {
399 if (err)
400 reject(err);
401 else
402 resolve(output);
403 });
404 });
405 }
406
407 // CLI
408 let helpText = `
409 About: This script exports locales into .csv format
Thomas Greiner 2018/01/22 19:49:48 It can also export locales into JSON format so why
saroyanm 2018/02/28 20:48:27 Done.
410 Usage: node csv-export.js [option] [argument]
Thomas Greiner 2018/01/22 19:49:51 Detail: AFAIK it's not common to prefix scripts wi
saroyanm 2018/02/28 20:48:24 Done.
411 Options:
412 -f Name of the files to be exported ex.: -f firstRun.json
Thomas Greiner 2018/01/22 19:49:49 Only writing "-f" makes it look like it's a binary
saroyanm 2018/02/28 20:48:27 Done.
413 option can be used multiple timeString.
Thomas Greiner 2018/01/22 19:49:49 Typo: Replace "timeString" with "times".
saroyanm 2018/02/28 20:48:31 Done.
414 If ommited all files are being exported
415
416 -o Output filename ex.:
417 -f firstRun.json -o {hash}-firstRun.csv
418 Placeholders:
419 {hash} - Mercurial current revision hash
420 {repo} - Name of the "Default" repository
Thomas Greiner 2018/01/22 19:49:50 I wouldn't know what "default repository" refers t
saroyanm 2018/02/28 20:48:27 It's the name of the repository set as default in
Thomas Greiner 2018/03/19 18:28:01 Then why not add that to the description? For exam
421 If ommited the output fileName is set to
422 translations-{repo}-{hash}.csv
423
424 -i Import file path ex: -i issue-reporter.csv
Thomas Greiner 2018/01/22 19:49:51 From what I see, there are two parameters for spec
saroyanm 2018/02/28 20:48:29 It's suppose to be used with "-f" or without, but
Thomas Greiner 2018/03/19 18:28:01 Ok, let's tackle this separately.
425 `;
426
427 let arguments = process.argv.slice(2);
428 let stopExportScript = false;
429 let filesFilter = []; // Filter to be used export to the fileNames inside
430
431 for (let i = 0; i < arguments.length; i++)
432 {
433 switch (arguments[i])
434 {
435 case "-h":
436 console.log(helpText);
437 stopExportScript = true;
438 break;
439 case "-f":
440 if (!arguments[i + 1]) // check if argument following option is specified
441 {
442 console.error("Please specify the input filename");
443 stopExportScript = true;
Thomas Greiner 2018/01/22 19:49:52 In Node you can stop the process via `process.exit
saroyanm 2018/02/28 20:48:23 Done, I think the code will become clearer if I se
444 }
445 else
446 {
447 filesFilter.push(arguments[i + 1]);
448 }
449 break;
450 case "-o":
451 if (!arguments[i + 1])
452 {
453 console.error("Please specify the output filename");
454 stopExportScript = true;
455 }
456 else
457 {
458 outputFileName = arguments[i + 1];
459 }
460 break;
461 case "-i":
462 if (!arguments[i + 1])
463 {
464 console.error("Please specify the input filename");
465 }
466 else
467 {
468 let importFile = arguments[i + 1];
469 importTranslations(importFile);
470 }
471 stopExportScript = true;
472 break;
473 }
474 }
475
476 if (!stopExportScript)
477 exportTranslations(filesFilter);
OLDNEW
« no previous file with comments | « README.md ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld