Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Side by Side Diff: cms/bin/translate.py

Issue 29317015: Issue 2625 - [cms] Crowdin synchronisation script (Closed)
Patch Set: Addressed Wladimir's feedback Created July 2, 2015, 12:29 p.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View unified diff | Download patch
« no previous file with comments | « README.md ('k') | cms/converters.py » ('j') | cms/converters.py » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 # coding: utf-8
2
3 # This file is part of the Adblock Plus web scripts,
4 # Copyright (C) 2006-2015 Eyeo GmbH
5 #
6 # Adblock Plus is free software: you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License version 3 as
8 # published by the Free Software Foundation.
9 #
10 # Adblock Plus is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
14 #
15 # You should have received a copy of the GNU General Public License
16 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.
17
18 import io
19 import itertools
20 import json
21 import logging
22 import os
23 import sys
24 import zipfile
25
26 import requests
Sebastian Noack 2015/07/08 13:03:19 I see, we use "requests", because urllib doesn't h
Sebastian Noack 2015/07/08 14:23:00 I just realized that simply using built-in urllib2
kzar 2015/07/08 15:26:41 I discussed using the requests library with Wladim
Sebastian Noack 2015/07/08 15:43:50 I'm certainly curious about the reason, you decide
Wladimir Palant 2015/07/08 23:11:05 Doing multipart encoding manually is very awkward.
Sebastian Noack 2015/07/09 21:26:55 Personally, I'd still prefer to go with built-in u
Wladimir Palant 2015/07/10 21:24:04 Nope, they are both hacky and I'd definitely prefe
kzar 2015/07/11 19:21:17 Done.
27
28 import cms.utils
29 from cms.sources import FileSource
30
31 logger = logging.getLogger("cms.bin.translate")
32
33 class CrowdinAPI:
34 FILES_PER_REQUEST = 20
35
36 def __init__(self, api_key, project_name, defaultlocale):
37 self.api_key = api_key
38 self.project_name = project_name
39 self.defaultlocale = defaultlocale
Wladimir Palant 2015/07/08 23:11:06 It doesn't look like this field is ever used - and
kzar 2015/07/11 19:21:16 Done.
40
41 def request(self, request_method, api_endpoint, **kwargs):
42 url = "https://api.crowdin.com/api/project/%s/%s?key=%s&json=1" % (
43 self.project_name, api_endpoint, self.api_key
44 )
45 try:
46 response = requests.request(request_method, url, **kwargs)
47 response.raise_for_status()
48 except requests.exceptions.HTTPError as e:
49 logger.error("API call to %s failed:\n%s" % (url, response.text))
50 raise
51 except requests.exceptions.ConnectionError:
Wladimir Palant 2015/07/08 23:11:06 There are more exception classes - ConnectionError
kzar 2015/07/11 19:21:17 Done.
52 logger.error("Connection to API failed for endpoint %s" % url)
53 raise
54
55 try:
56 return response.json()
57 except ValueError:
58 logger.error("Invalid response returned by API endpoint %s" % url)
59 raise
60
61
62 def grouper(iterable, n):
63 iterator = iter(iterable)
64 while True:
65 chunk = tuple(itertools.islice(iterator, n))
66 if chunk:
Sebastian Noack 2015/07/08 13:03:19 Nit: You could get rid of the else block: if not
kzar 2015/07/11 19:21:17 Done.
67 yield chunk
68 else:
69 break
70
71 def extract_strings(source, defaultlocale):
72 logger.info("Extracting page strings (please be patient)...")
73 page_strings = {}
74
75 def record_string(page, name, default, comment, fixed_strings):
76 store = page_strings.setdefault(page, {})
Sebastian Noack 2015/07/08 13:03:20 Do we care to not unnecessarily change the order o
Wladimir Palant 2015/07/08 23:11:06 Not necessarily relevant for diffs but Crowdin wil
kzar 2015/07/11 19:21:16 Done.
77 store[name] = {"message": default}
78
79 if fixed_strings:
80 comment = comment + "\n" if comment else ""
Sebastian Noack 2015/07/08 13:03:20 Nit. The ternary operator just adds uneeded comple
kzar 2015/07/11 19:21:17 `comment` might be None, hence this logic.
81 comment += ", ".join("{%d}: %s" % (i, s)
Sebastian Noack 2015/07/08 13:03:20 Nit: No reason to pack/unpack the sequence here:
kzar 2015/07/11 19:21:17 Done.
82 for i, s in enumerate(fixed_strings, 1))
83 if comment:
84 store[name]["description"] = comment
85
86 for page, format in source.list_pages():
87 cms.utils.process_page(source, defaultlocale, page,
88 format=format, record_default_strings=record_string)
Sebastian Noack 2015/07/09 21:26:55 Recording the default strings is what we do here.
kzar 2015/07/11 19:21:16 Done.
89 return page_strings
90
91 def configure_locales(crowdin_api, required_locales, enabled_locales,
92 defaultlocale):
93 logger.info("Checking which locales are supported by Crowdin...")
94 response = crowdin_api.request("GET", "supported-languages")
95
96 supported_locales = {l["crowdin_code"] for l in response}
97 skipped_locales = required_locales - supported_locales
98
99 if skipped_locales:
100 logger.warning("Ignoring locales that Crowdin doesn't support: %s" % (
101 ", ".join(skipped_locales)
102 ))
103 required_locales -= skipped_locales
104
105 # It's useful to have a list of all locales to skip
106 skipped_locales.add(defaultlocale)
107
108 if not required_locales.issubset(enabled_locales):
109 logger.info("Enabling the required locales for the Crowdin project...")
110 crowdin_api.request(
111 "POST", "edit-project",
112 data={"languages[]": list(enabled_locales | required_locales)}
113 )
114
115 return required_locales, skipped_locales
116
117 def list_remote_files(project_info):
118 def parse_file_node(node, path=""):
119 if node["node_type"] == "file":
120 remote_files.add(path + node["name"])
121 elif node["node_type"] == "directory":
122 dir_name = path + node["name"]
123 remote_directories.add(dir_name)
124 for file in node.get("files", []):
125 parse_file_node(file, dir_name + "/")
126
127 remote_files = set()
128 remote_directories = set()
129 for node in project_info["files"]:
130 parse_file_node(node)
131 return remote_files, remote_directories
132
133 def list_local_files(page_strings):
134 local_files = set()
135 local_directories = set()
136 for page, strings in page_strings.iteritems():
137 if strings:
138 local_files.add(page + ".json")
139 while "/" in page:
140 page = page.rsplit("/", 1)[0]
141 local_directories.add(page)
142 return local_files, local_directories
143
144 def create_directories(crowdin_api, directories):
145 for directory in directories:
146 logger.info("Creating directory %s" % directory)
147 crowdin_api.request("POST", "add-directory", data={"name": directory})
148
149 def add_update_files(crowdin_api, api_endpoint, message, files, page_strings):
150 for group in grouper(files, CrowdinAPI.FILES_PER_REQUEST):
Wladimir Palant 2015/07/08 23:11:08 No need to assume that crowdin_api is a CrowdinAPI
kzar 2015/07/11 19:21:16 Done.
151 files = {}
152 for file_name in group:
153 page = os.path.splitext(file_name)[0]
154 files["files[%s]" % file_name] = (file_name, json.dumps(page_strings[page] ))
155 del page_strings[page]
156 logger.info(message % len(files))
157 crowdin_api.request("POST", api_endpoint, files=files)
158
159 def upload_new_files(crowdin_api, new_files, page_strings):
160 add_update_files(crowdin_api, "add-file", "Uploading %d new pages...",
161 new_files, page_strings)
162
163 def update_existing_files(crowdin_api, existing_files, page_strings):
164 add_update_files(crowdin_api, "update-file", "Updating %d existing pages...",
165 existing_files, page_strings)
166
167 def upload_translations(crowdin_api, source_dir, new_files, required_locales):
168 def open_locale_files(locale, files):
169 for file in files:
170 path = os.path.join(source_dir, "locales", locale, file)
171 if os.path.isfile(path):
172 yield ("files[%s]" % file, open(path, "r"))
173
174 if new_files:
175 for locale in required_locales:
176 for files in grouper(open_locale_files(locale, new_files),
Sebastian Noack 2015/07/08 13:03:20 You should better first get the chunk of filenames
kzar 2015/07/11 19:21:16 `open_locale_files` is a generator and I consume t
177 CrowdinAPI.FILES_PER_REQUEST):
Wladimir Palant 2015/07/08 23:11:05 As above, crowdin_api.FILES_PER_REQUEST please.
kzar 2015/07/11 19:21:16 Done.
178 try:
179 logger.info("Uploading %d existing translation "
180 "files for locale %s..." % (len(files), locale))
181 crowdin_api.request("POST", "upload-translation", files=dict(files),
182 data={"language": locale})
183 finally:
184 for file_name, file in files:
185 file.close()
186
187 def remove_old_files(crowdin_api, old_files):
188 for file_name in old_files:
189 logger.info("Removing old file %s" % file_name)
190 crowdin_api.request("POST", "delete-file", data={"file": file_name})
191
192 def remove_old_directories(crowdin_api, old_directories):
193 for directory in reversed(sorted(old_directories, key=len)):
194 logger.info("Removing old directory %s" % directory)
195 crowdin_api.request("POST", "delete-directory", data={"name": directory})
196
197 def download_translations(crowdin_api, source_dir,
198 skipped_locales, required_locales):
199 logger.info("Requesting generation of fresh translations archive...")
200 result = crowdin_api.request("GET", "export")
201 if result.get("success", {}).get("status") == "skipped":
202 logger.warning("Archive generation skipped, either "
203 "no changes or API usage excessive")
204
205 logger.info("Downloading translations archive...")
206 response = requests.get(
207 "https://api.crowdin.com/api/project/%s/download/all.zip?key=%s" % (
208 crowdin_api.project_name, crowdin_api.api_key
209 )
210 )
211 response.raise_for_status()
212 logger.info("Extracting translations archive...")
213 with zipfile.ZipFile(io.BytesIO(response.content), "r") as archive:
214 locale_path = os.path.join(source_dir, "locales")
215 # First clear existing translation files
216 for root, dirs, files in os.walk(locale_path, topdown=True):
217 if root == locale_path:
218 # Don't delete locale files for unsupported locales or the default
219 dirs[:] = [d for d in dirs if d not in skipped_locales]
220 for f in files:
221 if f.endswith(".json"):
222 os.remove(os.path.join(root, f))
223 # Then extract the new ones in place
224 for member in archive.namelist():
225 path, file_name = os.path.split(member)
226 ext = os.path.splitext(file_name)[1]
227 locale = os.path.normpath(path).split(os.sep)[0]
228 if ext == ".json" and locale in required_locales:
229 archive.extract(member, locale_path)
Wladimir Palant 2015/07/08 23:11:08 Please use posixpath module here rather than os.pa
kzar 2015/07/11 19:21:16 Done.
230
231 def crowdin_sync(source_dir, crowdin_api_key):
232 with FileSource(source_dir) as source:
233 config = source.read_config()
234 defaultlocale = config.get("general", "defaultlocale")
235 crowdin_project_name = config.get("general", "crowdin-project-name")
236
237 crowdin_api = CrowdinAPI(crowdin_api_key, crowdin_project_name,
238 defaultlocale)
239
240 logger.info("Requesting project information...")
241 project_info = crowdin_api.request("GET", "info")
242 page_strings = extract_strings(source, defaultlocale)
243
244 required_locales = {l for l in source.list_locales() if l != defaultlocale}
245 enabled_locales = {l["code"] for l in project_info["languages"]}
246
247 required_locales, skipped_locales = configure_locales(
248 crowdin_api, required_locales, enabled_locales, defaultlocale
249 )
250 remote_files, remote_directories = list_remote_files(project_info)
251 local_files, local_directories = list_local_files(page_strings)
252
253 # Avoid deleting all remote content if there was a problem listing local files
254 if not local_files:
255 logger.error("No existing strings found, maybe the project directory is "
256 "not set up correctly? Aborting!")
257 sys.exit(1)
258
259 new_files = local_files - remote_files
260 new_directories = local_directories - remote_directories
261 create_directories(crowdin_api, new_directories)
262 upload_new_files(crowdin_api, new_files, page_strings)
263 upload_translations(crowdin_api, source_dir, new_files, required_locales)
264
265 existing_files = local_files - new_files
266 update_existing_files(crowdin_api, existing_files, page_strings)
267
268 old_files = remote_files - local_files
269 old_directories = remote_directories - local_directories
270 remove_old_files(crowdin_api, old_files)
271 remove_old_directories(crowdin_api, old_directories)
272
273 download_translations(crowdin_api, source_dir,
274 skipped_locales, required_locales)
275 logger.info("Crowdin sync completed.")
276
277 if __name__ == "__main__":
278 if len(sys.argv) < 3:
279 print >>sys.stderr, "Usage: python -m cms.bin.translate www_directory crowdi n_project_api_key [logging_level]"
280 sys.exit(1)
281
282 logging.basicConfig()
283 logger.setLevel(sys.argv[3] if len(sys.argv) > 3 else logging.INFO)
284
285 source_dir, crowdin_api_key = sys.argv[1:3]
286 crowdin_sync(source_dir, crowdin_api_key)
OLDNEW
« no previous file with comments | « README.md ('k') | cms/converters.py » ('j') | cms/converters.py » ('J')

Powered by Google App Engine
This is Rietveld