Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Side by Side Diff: cms/bin/translate.py

Issue 29317015: Issue 2625 - [cms] Crowdin synchronisation script (Closed)
Patch Set: Addressed Sebastian's feedback Created July 14, 2015, 12:50 p.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View unified diff | Download patch
« no previous file with comments | « README.md ('k') | cms/converters.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 # coding: utf-8
2
3 # This file is part of the Adblock Plus web scripts,
4 # Copyright (C) 2006-2015 Eyeo GmbH
5 #
6 # Adblock Plus is free software: you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License version 3 as
8 # published by the Free Software Foundation.
9 #
10 # Adblock Plus is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
14 #
15 # You should have received a copy of the GNU General Public License
16 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.
17
18 import collections
19 import io
20 import itertools
21 import json
22 import logging
23 import os
24 import posixpath
25 import sys
26 import urllib
27 import zipfile
28
29 import urllib3
30
31 import cms.utils
32 from cms.sources import FileSource
33
34 logger = logging.getLogger("cms.bin.translate")
35
36 class CrowdinAPI:
37 FILES_PER_REQUEST = 20
38
39 def __init__(self, api_key, project_name):
40 self.api_key = api_key
41 self.project_name = project_name
42 self.connection = urllib3.connection_from_url("https://api.crowdin.com/")
43
44 def request(self, request_method, api_endpoint, data=None, files=None):
45 url = "/api/project/%s/%s?%s" % (
46 urllib.quote(self.project_name), urllib.quote(api_endpoint),
47 urllib.urlencode([("key", self.api_key), ("json", "1")])
48 )
49
50 fields = []
51 if data:
52 for name, value in data.iteritems():
53 if isinstance(value, basestring):
54 fields.append((name, value))
55 else:
56 fields.extend((name + "[]", v) for v in value)
57 if files:
58 fields.extend(("files[%s]" % f[0], f) for f in files)
59
60 try:
61 response = self.connection.request(
62 request_method, str(url), fields=fields,
63 timeout=urllib3.Timeout(connect=5)
64 )
65 if response.status < 200 or response.status >= 300:
66 raise urllib3.exceptions.HTTPError(response.status)
67 except urllib3.exceptions.HTTPError:
68 logger.error("API call to %s failed:\n%s", url, response.data)
69 raise
70
71 try:
72 return json.loads(response.data)
73 except ValueError:
74 logger.error("Invalid response returned by API endpoint %s", url)
75 raise
76
77
78 def grouper(iterable, n):
79 iterator = iter(iterable)
80 while True:
81 chunk = tuple(itertools.islice(iterator, n))
82 if not chunk:
83 break
84 yield chunk
85
86 def extract_strings(source, defaultlocale):
87 logger.info("Extracting page strings (please be patient)...")
88 page_strings = {}
89
90 def record_string(page, locale, name, value, comment, fixed_strings):
91 if locale != defaultlocale:
92 return
93
94 try:
95 store = page_strings[page]
96 except KeyError:
97 store = page_strings[page] = collections.OrderedDict()
98
99 store[name] = {"message": value}
100
101 if fixed_strings:
102 comment = comment + "\n" if comment else ""
103 comment += ", ".join("{%d}: %s" % i_s
104 for i_s in enumerate(fixed_strings, 1))
105 if comment:
106 store[name]["description"] = comment
107
108 for page, format in source.list_pages():
109 cms.utils.process_page(source, defaultlocale, page,
110 format=format, localized_string_callback=record_strin g)
111 return page_strings
112
113 def configure_locales(crowdin_api, required_locales, enabled_locales,
114 defaultlocale):
115 logger.info("Checking which locales are supported by Crowdin...")
116 response = crowdin_api.request("GET", "supported-languages")
117
118 supported_locales = {l["crowdin_code"] for l in response}
119 skipped_locales = required_locales - supported_locales
120
121 if skipped_locales:
122 logger.warning("Ignoring locales that Crowdin doesn't support: %s",
123 ", ".join(skipped_locales))
124 required_locales -= skipped_locales
125
126 if not required_locales.issubset(enabled_locales):
127 logger.info("Enabling the required locales for the Crowdin project...")
128 crowdin_api.request(
129 "POST", "edit-project",
130 data={"languages": enabled_locales | required_locales}
131 )
132
133 return required_locales
134
135 def list_remote_files(project_info):
136 def parse_file_node(node, path=""):
137 if node["node_type"] == "file":
138 remote_files.add(path + node["name"])
139 elif node["node_type"] == "directory":
140 dir_name = path + node["name"]
141 remote_directories.add(dir_name)
142 for file in node.get("files", []):
143 parse_file_node(file, dir_name + "/")
144
145 remote_files = set()
146 remote_directories = set()
147 for node in project_info["files"]:
148 parse_file_node(node)
149 return remote_files, remote_directories
150
151 def list_local_files(page_strings):
152 local_files = set()
153 local_directories = set()
154 for page, strings in page_strings.iteritems():
155 if strings:
156 local_files.add(page + ".json")
157 while "/" in page:
158 page = page.rsplit("/", 1)[0]
159 local_directories.add(page)
160 return local_files, local_directories
161
162 def create_directories(crowdin_api, directories):
163 for directory in directories:
164 logger.info("Creating directory %s", directory)
165 crowdin_api.request("POST", "add-directory", data={"name": directory})
166
167 def add_update_files(crowdin_api, api_endpoint, message, files, page_strings):
168 for group in grouper(files, crowdin_api.FILES_PER_REQUEST):
169 files = []
170 for file_name in group:
171 page = os.path.splitext(file_name)[0]
172 files.append((file_name, json.dumps(page_strings[page]), "application/json "))
173 del page_strings[page]
174 logger.info(message, len(files))
175 crowdin_api.request("POST", api_endpoint, files=files)
176
177 def upload_new_files(crowdin_api, new_files, page_strings):
178 add_update_files(crowdin_api, "add-file", "Uploading %d new pages...",
179 new_files, page_strings)
180
181 def update_existing_files(crowdin_api, existing_files, page_strings):
182 add_update_files(crowdin_api, "update-file", "Updating %d existing pages...",
183 existing_files, page_strings)
184
185 def upload_translations(crowdin_api, source_dir, new_files, required_locales):
186 def open_locale_files(locale, files):
187 for file_name in files:
188 path = os.path.join(source_dir, "locales", locale, file_name)
189 if os.path.isfile(path):
190 with open(path, "r") as f:
Sebastian Noack 2015/07/14 14:39:28 Nit: mode="rb" for compatibility with Windows.
kzar 2015/07/15 09:51:24 Done.
191 yield (file_name, f.read(), "application/json")
192
193 if new_files:
194 for locale in required_locales:
195 for files in grouper(open_locale_files(locale, new_files),
196 crowdin_api.FILES_PER_REQUEST):
197 logger.info("Uploading %d existing translation "
198 "files for locale %s...", len(files), locale)
199 crowdin_api.request("POST", "upload-translation", files=files,
200 data={"language": locale})
201
202 def remove_old_files(crowdin_api, old_files):
203 for file_name in old_files:
204 logger.info("Removing old file %s", file_name)
205 crowdin_api.request("POST", "delete-file", data={"file": file_name})
206
207 def remove_old_directories(crowdin_api, old_directories):
208 for directory in reversed(sorted(old_directories, key=len)):
209 logger.info("Removing old directory %s", directory)
210 crowdin_api.request("POST", "delete-directory", data={"name": directory})
211
212 def download_translations(crowdin_api, source_dir, required_locales):
213 logger.info("Requesting generation of fresh translations archive...")
214 result = crowdin_api.request("GET", "export")
215 if result.get("success", {}).get("status") == "skipped":
216 logger.warning("Archive generation skipped, either "
217 "no changes or API usage excessive")
218
219 logger.info("Downloading translations archive...")
220 response = crowdin_api.connection.request(
221 "GET",
222 "/api/project/%s/download/all.zip?%s" % (
223 urllib.quote(crowdin_api.project_name),
224 urllib.urlencode([("key", crowdin_api.api_key)])
225 ), preload_content = False
226 )
227 if response.status < 200 or response.status >= 300:
228 raise urllib3.exceptions.HTTPError(response.status, response.data)
229
Sebastian Noack 2015/07/14 14:39:28 The logic above is duplicated in CrowdInApi.reques
kzar 2015/07/15 09:51:25 Done.
230 logger.info("Extracting translations archive...")
231 with zipfile.ZipFile(io.BytesIO(response.data), "r") as archive:
232 locale_path = os.path.join(source_dir, "locales")
233 # First clear existing translation files
234 for root, dirs, files in os.walk(locale_path, topdown=True):
235 if root == locale_path:
236 dirs[:] = [d for d in dirs if d in required_locales]
237 for f in files:
238 if f.endswith(".json"):
239 os.remove(os.path.join(root, f))
240 # Then extract the new ones in place
241 for member in archive.namelist():
242 path, file_name = posixpath.split(member)
243 ext = posixpath.splitext(file_name)[1]
244 locale = path.split(posixpath.sep)[0]
245 if ext == ".json" and locale in required_locales:
Sebastian Noack 2015/07/14 14:39:29 Nit: I think we should match the file extension ca
kzar 2015/07/15 09:51:25 Done.
246 archive.extract(member, locale_path)
247
248 def crowdin_sync(source_dir, crowdin_api_key):
249 with FileSource(source_dir) as source:
250 config = source.read_config()
251 defaultlocale = config.get("general", "defaultlocale")
252 crowdin_project_name = config.get("general", "crowdin-project-name")
253
254 crowdin_api = CrowdinAPI(crowdin_api_key, crowdin_project_name)
255
256 logger.info("Requesting project information...")
257 project_info = crowdin_api.request("GET", "info")
258 page_strings = extract_strings(source, defaultlocale)
259
260 required_locales = {l for l in source.list_locales() if l != defaultlocale}
261 enabled_locales = {l["code"] for l in project_info["languages"]}
262
263 required_locales = configure_locales(crowdin_api, required_locales,
264 enabled_locales, defaultlocale)
265
266 remote_files, remote_directories = list_remote_files(project_info)
267 local_files, local_directories = list_local_files(page_strings)
268
269 # Avoid deleting all remote content if there was a problem listing local files
270 if not local_files:
271 logger.error("No existing strings found, maybe the project directory is "
272 "not set up correctly? Aborting!")
273 sys.exit(1)
274
275 new_files = local_files - remote_files
276 new_directories = local_directories - remote_directories
277 create_directories(crowdin_api, new_directories)
278 upload_new_files(crowdin_api, new_files, page_strings)
279 upload_translations(crowdin_api, source_dir, new_files, required_locales)
280
281 existing_files = local_files - new_files
282 update_existing_files(crowdin_api, existing_files, page_strings)
283
284 old_files = remote_files - local_files
285 old_directories = remote_directories - local_directories
286 remove_old_files(crowdin_api, old_files)
287 remove_old_directories(crowdin_api, old_directories)
288
289 download_translations(crowdin_api, source_dir, required_locales)
290 logger.info("Crowdin sync completed.")
291
292 if __name__ == "__main__":
293 if len(sys.argv) < 3:
294 print >>sys.stderr, "Usage: python -m cms.bin.translate www_directory crowdi n_project_api_key [logging_level]"
295 sys.exit(1)
296
297 logging.basicConfig()
298 logger.setLevel(sys.argv[3] if len(sys.argv) > 3 else logging.INFO)
299
300 source_dir, crowdin_api_key = sys.argv[1:3]
301 crowdin_sync(source_dir, crowdin_api_key)
OLDNEW
« no previous file with comments | « README.md ('k') | cms/converters.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld