Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Side by Side Diff: cms/bin/translate.py

Issue 29317015: Issue 2625 - [cms] Crowdin synchronisation script (Closed)
Patch Set: Addressed even more feedback from Sebastian Created July 15, 2015, 11:07 a.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View unified diff | Download patch
« no previous file with comments | « README.md ('k') | cms/converters.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 # coding: utf-8
2
3 # This file is part of the Adblock Plus web scripts,
4 # Copyright (C) 2006-2015 Eyeo GmbH
5 #
6 # Adblock Plus is free software: you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License version 3 as
8 # published by the Free Software Foundation.
9 #
10 # Adblock Plus is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
14 #
15 # You should have received a copy of the GNU General Public License
16 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.
17
18 import collections
19 import io
20 import itertools
21 import json
22 import logging
23 import os
24 import posixpath
25 import sys
26 import urllib
27 import zipfile
28
29 import urllib3
30
31 import cms.utils
32 from cms.sources import FileSource
33
34 logger = logging.getLogger("cms.bin.translate")
35
36 class CrowdinAPI:
37 FILES_PER_REQUEST = 20
38
39 def __init__(self, api_key, project_name):
40 self.api_key = api_key
41 self.project_name = project_name
42 self.connection = urllib3.connection_from_url("https://api.crowdin.com/")
43
44 def raw_request(self, request_method, api_endpoint, query_params, **kwargs):
Wladimir Palant 2015/07/16 12:17:23 Nit: Have query_params default to [] and remove it
Sebastian Noack 2015/07/16 12:23:56 Mutable objects in default arguments are a footgun
kzar 2015/07/16 12:49:32 I remember that one from the interview, which is w
45 url = "/api/project/%s/%s?%s" % (
46 urllib.quote(self.project_name),
47 urllib.quote(api_endpoint),
48 urllib.urlencode([("key", self.api_key)] + query_params)
49 )
50 try:
51 response = self.connection.request(
52 request_method, str(url), **kwargs
53 )
54 except urllib3.exceptions.HTTPError:
55 logger.error("Connection to API endpoint %s failed", url)
56 raise
57 if response.status < 200 or response.status >= 300:
58 logger.error("API call to %s failed:\n%s", url, response.data)
59 raise urllib3.exceptions.HTTPError(response.status)
60 return response
61
62 def request(self, request_method, api_endpoint, data=None, files=None):
63 fields = []
64 if data:
65 for name, value in data.iteritems():
66 if isinstance(value, basestring):
67 fields.append((name, value))
68 else:
69 fields.extend((name + "[]", v) for v in value)
70 if files:
71 fields.extend(("files[%s]" % f[0], f) for f in files)
72
73 response = self.raw_request(
74 request_method, api_endpoint, [("json", "1")],
75 fields=fields, preload_content=False
76 )
77
78 try:
79 return json.load(response)
80 except ValueError:
81 logger.error("Invalid response returned by API endpoint %s", url)
82 raise
83
84
85 def grouper(iterable, n):
86 iterator = iter(iterable)
87 while True:
88 chunk = tuple(itertools.islice(iterator, n))
89 if not chunk:
90 break
91 yield chunk
92
93 def extract_strings(source, defaultlocale):
94 logger.info("Extracting page strings (please be patient)...")
95 page_strings = {}
96
97 def record_string(page, locale, name, value, comment, fixed_strings):
98 if locale != defaultlocale:
99 return
100
101 try:
102 store = page_strings[page]
103 except KeyError:
104 store = page_strings[page] = collections.OrderedDict()
105
106 store[name] = {"message": value}
107
108 if fixed_strings:
109 comment = comment + "\n" if comment else ""
110 comment += ", ".join("{%d}: %s" % i_s
111 for i_s in enumerate(fixed_strings, 1))
112 if comment:
113 store[name]["description"] = comment
114
115 for page, format in source.list_pages():
116 cms.utils.process_page(source, defaultlocale, page,
117 format=format, localized_string_callback=record_strin g)
118 return page_strings
119
120 def configure_locales(crowdin_api, required_locales, enabled_locales,
121 defaultlocale):
122 logger.info("Checking which locales are supported by Crowdin...")
123 response = crowdin_api.request("GET", "supported-languages")
124
125 supported_locales = {l["crowdin_code"] for l in response}
126 skipped_locales = required_locales - supported_locales
127
128 if skipped_locales:
129 logger.warning("Ignoring locales that Crowdin doesn't support: %s",
130 ", ".join(skipped_locales))
131 required_locales -= skipped_locales
132
133 if not required_locales.issubset(enabled_locales):
134 logger.info("Enabling the required locales for the Crowdin project...")
135 crowdin_api.request(
136 "POST", "edit-project",
137 data={"languages": enabled_locales | required_locales}
138 )
139
140 return required_locales
141
142 def list_remote_files(project_info):
143 def parse_file_node(node, path=""):
144 if node["node_type"] == "file":
145 remote_files.add(path + node["name"])
146 elif node["node_type"] == "directory":
147 dir_name = path + node["name"]
148 remote_directories.add(dir_name)
149 for file in node.get("files", []):
150 parse_file_node(file, dir_name + "/")
151
152 remote_files = set()
153 remote_directories = set()
154 for node in project_info["files"]:
155 parse_file_node(node)
156 return remote_files, remote_directories
157
158 def list_local_files(page_strings):
159 local_files = set()
160 local_directories = set()
161 for page, strings in page_strings.iteritems():
162 if strings:
163 local_files.add(page + ".json")
164 while "/" in page:
165 page = page.rsplit("/", 1)[0]
166 local_directories.add(page)
167 return local_files, local_directories
168
169 def create_directories(crowdin_api, directories):
170 for directory in directories:
171 logger.info("Creating directory %s", directory)
172 crowdin_api.request("POST", "add-directory", data={"name": directory})
173
174 def add_update_files(crowdin_api, api_endpoint, message, files, page_strings):
175 for group in grouper(files, crowdin_api.FILES_PER_REQUEST):
176 files = []
177 for file_name in group:
178 page = os.path.splitext(file_name)[0]
179 files.append((file_name, json.dumps(page_strings[page]), "application/json "))
180 del page_strings[page]
181 logger.info(message, len(files))
182 crowdin_api.request("POST", api_endpoint, files=files)
183
184 def upload_new_files(crowdin_api, new_files, page_strings):
185 add_update_files(crowdin_api, "add-file", "Uploading %d new pages...",
186 new_files, page_strings)
187
188 def update_existing_files(crowdin_api, existing_files, page_strings):
189 add_update_files(crowdin_api, "update-file", "Updating %d existing pages...",
190 existing_files, page_strings)
191
192 def upload_translations(crowdin_api, source_dir, new_files, required_locales):
193 def open_locale_files(locale, files):
194 for file_name in files:
195 path = os.path.join(source_dir, "locales", locale, file_name)
196 if os.path.isfile(path):
197 with open(path, "rb") as f:
198 yield (file_name, f.read(), "application/json")
199
200 if new_files:
201 for locale in required_locales:
202 for files in grouper(open_locale_files(locale, new_files),
203 crowdin_api.FILES_PER_REQUEST):
204 logger.info("Uploading %d existing translation "
205 "files for locale %s...", len(files), locale)
206 crowdin_api.request("POST", "upload-translation", files=files,
207 data={"language": locale})
208
209 def remove_old_files(crowdin_api, old_files):
210 for file_name in old_files:
211 logger.info("Removing old file %s", file_name)
212 crowdin_api.request("POST", "delete-file", data={"file": file_name})
213
214 def remove_old_directories(crowdin_api, old_directories):
215 for directory in reversed(sorted(old_directories, key=len)):
216 logger.info("Removing old directory %s", directory)
217 crowdin_api.request("POST", "delete-directory", data={"name": directory})
218
219 def download_translations(crowdin_api, source_dir, required_locales):
220 logger.info("Requesting generation of fresh translations archive...")
221 result = crowdin_api.request("GET", "export")
222 if result.get("success", {}).get("status") == "skipped":
223 logger.warning("Archive generation skipped, either "
224 "no changes or API usage excessive")
225
226 logger.info("Downloading translations archive...")
227 response = crowdin_api.raw_request("GET", "download/all.zip", [])
228
229 logger.info("Extracting translations archive...")
230 with zipfile.ZipFile(io.BytesIO(response.data), "r") as archive:
231 locale_path = os.path.join(source_dir, "locales")
232 # First clear existing translation files
233 for root, dirs, files in os.walk(locale_path, topdown=True):
234 if root == locale_path:
235 dirs[:] = [d for d in dirs if d in required_locales]
236 for f in files:
237 if f.lower().endswith(".json"):
238 os.remove(os.path.join(root, f))
239 # Then extract the new ones in place
240 for member in archive.namelist():
241 path, file_name = posixpath.split(member)
242 ext = posixpath.splitext(file_name)[1]
243 locale = path.split(posixpath.sep)[0]
244 if ext.lower() == ".json" and locale in required_locales:
245 archive.extract(member, locale_path)
246
247 def crowdin_sync(source_dir, crowdin_api_key):
248 with FileSource(source_dir) as source:
249 config = source.read_config()
250 defaultlocale = config.get("general", "defaultlocale")
251 crowdin_project_name = config.get("general", "crowdin-project-name")
252
253 crowdin_api = CrowdinAPI(crowdin_api_key, crowdin_project_name)
254
255 logger.info("Requesting project information...")
256 project_info = crowdin_api.request("GET", "info")
257 page_strings = extract_strings(source, defaultlocale)
258
259 required_locales = {l for l in source.list_locales() if l != defaultlocale}
260 enabled_locales = {l["code"] for l in project_info["languages"]}
261
262 required_locales = configure_locales(crowdin_api, required_locales,
263 enabled_locales, defaultlocale)
264
265 remote_files, remote_directories = list_remote_files(project_info)
266 local_files, local_directories = list_local_files(page_strings)
267
268 # Avoid deleting all remote content if there was a problem listing local files
269 if not local_files:
270 logger.error("No existing strings found, maybe the project directory is "
271 "not set up correctly? Aborting!")
272 sys.exit(1)
273
274 new_files = local_files - remote_files
275 new_directories = local_directories - remote_directories
276 create_directories(crowdin_api, new_directories)
277 upload_new_files(crowdin_api, new_files, page_strings)
278 upload_translations(crowdin_api, source_dir, new_files, required_locales)
279
280 existing_files = local_files - new_files
281 update_existing_files(crowdin_api, existing_files, page_strings)
282
283 old_files = remote_files - local_files
284 old_directories = remote_directories - local_directories
285 remove_old_files(crowdin_api, old_files)
286 remove_old_directories(crowdin_api, old_directories)
287
288 download_translations(crowdin_api, source_dir, required_locales)
289 logger.info("Crowdin sync completed.")
290
291 if __name__ == "__main__":
292 if len(sys.argv) < 3:
293 print >>sys.stderr, "Usage: python -m cms.bin.translate www_directory crowdi n_project_api_key [logging_level]"
294 sys.exit(1)
295
296 logging.basicConfig()
297 logger.setLevel(sys.argv[3] if len(sys.argv) > 3 else logging.INFO)
298
299 source_dir, crowdin_api_key = sys.argv[1:3]
300 crowdin_sync(source_dir, crowdin_api_key)
OLDNEW
« no previous file with comments | « README.md ('k') | cms/converters.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld