Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Side by Side Diff: cms/bin/translate.py

Issue 29317015: Issue 2625 - [cms] Crowdin synchronisation script (Closed)
Patch Set: Slightly simplified request exception logic Created July 12, 2015, 5:47 a.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View unified diff | Download patch
« no previous file with comments | « README.md ('k') | cms/converters.py » ('j') | cms/converters.py » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 # coding: utf-8
2
3 # This file is part of the Adblock Plus web scripts,
4 # Copyright (C) 2006-2015 Eyeo GmbH
5 #
6 # Adblock Plus is free software: you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License version 3 as
8 # published by the Free Software Foundation.
9 #
10 # Adblock Plus is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
14 #
15 # You should have received a copy of the GNU General Public License
16 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.
17
18 import collections
19 import io
20 import itertools
21 import json
22 import logging
23 import os
24 import posixpath
25 import sys
26 import zipfile
27
28 import urllib3
29
30 import cms.utils
31 from cms.sources import FileSource
32
33 logger = logging.getLogger("cms.bin.translate")
34
35 class CrowdinAPI:
36 FILES_PER_REQUEST = 20
37
38 def __init__(self, api_key, project_name):
39 self.api_key = api_key
40 self.project_name = project_name
41 self.connection = urllib3.connection_from_url("https://api.crowdin.com/")
42
43 def request(self, request_method, api_endpoint, data=None, files=None):
44 url = "/api/project/%s/%s?key=%s&json=1" % (
45 self.project_name, api_endpoint, self.api_key
Sebastian Noack 2015/07/14 11:31:05 Please encode the parameters properly: url = "/ap
kzar 2015/07/14 12:54:27 Done.
46 )
47
48 fields = []
49 if data:
50 for name, value in data.iteritems():
51 if isinstance(value, basestring):
52 fields.append((name, value))
53 else:
54 fields += [(name + "[]", v) for v in value]
Sebastian Noack 2015/07/14 11:31:05 Nit: fields.extend((name + "[]", v) for v in value
kzar 2015/07/14 12:54:27 Done.
55 if files:
56 fields += [("files[%s]" % f[0], f) for f in files]
Sebastian Noack 2015/07/14 11:31:04 Note that |'%s' % f| returns it object representat
Sebastian Noack 2015/07/14 11:31:05 Nit: Please use .extend() here as well.
kzar 2015/07/14 12:54:27 So f[0] is actually the file name and we need to p
kzar 2015/07/14 12:54:30 Done.
57
58 try:
59 response = self.connection.request(
60 request_method, str(url), fields=fields,
61 timeout=urllib3.Timeout(connect=5)
Sebastian Noack 2015/07/14 11:31:07 Any particular reason you specify a custom connect
kzar 2015/07/14 12:54:30 During testing I found that by default it didn't s
Sebastian Noack 2015/07/14 14:39:27 But differently than urllib/urllib2 which we use e
kzar 2015/07/15 09:51:24 Done.
62 )
63 if response.status < 200 or response.status >= 300:
Sebastian Noack 2015/07/14 11:31:07 How about |response.status not in xrange(200, 299)
kzar 2015/07/14 12:54:28 I think I prefer it as is.
64 raise urllib3.exceptions.HTTPError(response.status)
65 except urllib3.exceptions.HTTPError as e:
Sebastian Noack 2015/07/14 11:31:06 Nit: Since we don't use the variable e you can omi
kzar 2015/07/14 12:54:28 Done.
66 logger.error("API call to %s failed:\n%s" % (url, response.data))
Sebastian Noack 2015/07/14 11:31:05 You can pass the values for the placeholders direc
kzar 2015/07/14 12:54:30 Done.
67 raise
68
69 try:
70 return json.loads(response.data)
Sebastian Noack 2015/07/14 11:31:04 How about |json.load(response)|?
Sebastian Noack 2015/07/14 11:31:06 Note that like urllib/urllib2, urllib3's repsonse
kzar 2015/07/14 12:54:28 This doesn't work as you would expect, even though
Sebastian Noack 2015/07/14 14:39:27 I just tested it myself. And it did work. You have
Sebastian Noack 2015/07/14 14:39:28 For reference, I just realized that urllib3, magic
kzar 2015/07/15 09:51:23 Acknowledged.
kzar 2015/07/15 09:51:24 You're right, I forgot to set preload_content when
71 except ValueError:
72 logger.error("Invalid response returned by API endpoint %s" % url)
Sebastian Noack 2015/07/14 11:31:05 Same here: logger.error("Invalid response returned
kzar 2015/07/14 12:54:30 Done.
73 raise
74
75
76 def grouper(iterable, n):
77 iterator = iter(iterable)
78 while True:
79 chunk = tuple(itertools.islice(iterator, n))
80 if not chunk:
81 break
82 yield chunk
83
84 def extract_strings(source, defaultlocale):
85 logger.info("Extracting page strings (please be patient)...")
86 page_strings = {}
87
88 def record_string(page, locale, name, value, comment, fixed_strings):
89 if locale != defaultlocale:
90 return
91
92 try:
93 store = page_strings[page]
94 except KeyError:
95 store = page_strings[page] = collections.OrderedDict()
96
97 store[name] = {"message": value}
98
99 if fixed_strings:
100 comment = comment + "\n" if comment else ""
101 comment += ", ".join("{%d}: %s" % i_s
102 for i_s in enumerate(fixed_strings, 1))
103 if comment:
104 store[name]["description"] = comment
105
106 for page, format in source.list_pages():
107 cms.utils.process_page(source, defaultlocale, page,
108 format=format, localized_string_callback=record_strin g)
109 return page_strings
110
111 def configure_locales(crowdin_api, required_locales, enabled_locales,
112 defaultlocale):
113 logger.info("Checking which locales are supported by Crowdin...")
114 response = crowdin_api.request("GET", "supported-languages")
115
116 supported_locales = {l["crowdin_code"] for l in response}
117 skipped_locales = required_locales - supported_locales
118
119 if skipped_locales:
120 logger.warning("Ignoring locales that Crowdin doesn't support: %s" % (
121 ", ".join(skipped_locales)
122 ))
123 required_locales -= skipped_locales
124
125 if not required_locales.issubset(enabled_locales):
126 logger.info("Enabling the required locales for the Crowdin project...")
127 crowdin_api.request(
128 "POST", "edit-project",
129 data={"languages": list(enabled_locales | required_locales)}
Sebastian Noack 2015/07/14 11:31:05 Since .request() merely iterates over the value (i
kzar 2015/07/14 12:54:30 Done.
130 )
131
132 return required_locales
133
134 def list_remote_files(project_info):
135 def parse_file_node(node, path=""):
136 if node["node_type"] == "file":
137 remote_files.add(path + node["name"])
138 elif node["node_type"] == "directory":
139 dir_name = path + node["name"]
140 remote_directories.add(dir_name)
141 for file in node.get("files", []):
142 parse_file_node(file, dir_name + "/")
143
144 remote_files = set()
145 remote_directories = set()
146 for node in project_info["files"]:
147 parse_file_node(node)
148 return remote_files, remote_directories
149
150 def list_local_files(page_strings):
151 local_files = set()
152 local_directories = set()
153 for page, strings in page_strings.iteritems():
154 if strings:
155 local_files.add(page + ".json")
156 while "/" in page:
157 page = page.rsplit("/", 1)[0]
158 local_directories.add(page)
159 return local_files, local_directories
160
161 def create_directories(crowdin_api, directories):
162 for directory in directories:
163 logger.info("Creating directory %s" % directory)
164 crowdin_api.request("POST", "add-directory", data={"name": directory})
165
166 def add_update_files(crowdin_api, api_endpoint, message, files, page_strings):
167 for group in grouper(files, crowdin_api.FILES_PER_REQUEST):
168 files = []
169 for file_name in group:
170 page = os.path.splitext(file_name)[0]
171 files.append((file_name, json.dumps(page_strings[page]), "application/json "))
172 del page_strings[page]
173 logger.info(message % len(files))
174 crowdin_api.request("POST", api_endpoint, files=files)
175
176 def upload_new_files(crowdin_api, new_files, page_strings):
177 add_update_files(crowdin_api, "add-file", "Uploading %d new pages...",
178 new_files, page_strings)
179
180 def update_existing_files(crowdin_api, existing_files, page_strings):
181 add_update_files(crowdin_api, "update-file", "Updating %d existing pages...",
182 existing_files, page_strings)
183
184 def upload_translations(crowdin_api, source_dir, new_files, required_locales):
185 def open_locale_files(locale, files):
186 for file_name in files:
187 path = os.path.join(source_dir, "locales", locale, file_name)
188 if os.path.isfile(path):
189 with open(path, "r") as f:
190 yield (file_name, f.read(), "application/json")
191
192 if new_files:
193 for locale in required_locales:
194 for files in grouper(open_locale_files(locale, new_files),
195 crowdin_api.FILES_PER_REQUEST):
196 logger.info("Uploading %d existing translation "
197 "files for locale %s..." % (len(files), locale))
198 crowdin_api.request("POST", "upload-translation", files=files,
199 data={"language": locale})
200
201 def remove_old_files(crowdin_api, old_files):
202 for file_name in old_files:
203 logger.info("Removing old file %s" % file_name)
204 crowdin_api.request("POST", "delete-file", data={"file": file_name})
205
206 def remove_old_directories(crowdin_api, old_directories):
207 for directory in reversed(sorted(old_directories, key=len)):
208 logger.info("Removing old directory %s" % directory)
209 crowdin_api.request("POST", "delete-directory", data={"name": directory})
210
211 def download_translations(crowdin_api, source_dir, required_locales):
212 logger.info("Requesting generation of fresh translations archive...")
213 result = crowdin_api.request("GET", "export")
214 if result.get("success", {}).get("status") == "skipped":
215 logger.warning("Archive generation skipped, either "
216 "no changes or API usage excessive")
217
218 logger.info("Downloading translations archive...")
219 response = crowdin_api.connection.request(
220 "GET",
221 "/api/project/%s/download/all.zip?key=%s" % (
Sebastian Noack 2015/07/14 11:31:07 As above, please use urllib.quote and urllib.urlen
kzar 2015/07/14 12:54:27 Done.
222 crowdin_api.project_name, crowdin_api.api_key
223 ), preload_content = False
224 )
225 if response.status < 200 or response.status >= 300:
Sebastian Noack 2015/07/14 11:31:05 How about |response.status not in xrange(200, 299)
kzar 2015/07/14 12:54:28 See above, I prefer it as is.
226 raise urllib3.exceptions.HTTPError(response.status, response.data)
227
228 logger.info("Extracting translations archive...")
229 with zipfile.ZipFile(io.BytesIO(response.data), "r") as archive:
Sebastian Noack 2015/07/14 11:31:04 The response is a file-like object by itself. So i
kzar 2015/07/14 12:54:28 I agree this _should_ work but in practice it just
Sebastian Noack 2015/07/14 14:39:28 Ah right, ZipFile() requires a file-like object th
kzar 2015/07/15 09:51:24 Glad one of us understands :p
230 locale_path = os.path.join(source_dir, "locales")
231 # First clear existing translation files
232 for root, dirs, files in os.walk(locale_path, topdown=True):
233 if root == locale_path:
234 dirs[:] = [d for d in dirs if d in required_locales]
235 for f in files:
236 if f.endswith(".json"):
237 os.remove(os.path.join(root, f))
238 # Then extract the new ones in place
239 for member in archive.namelist():
240 path, file_name = posixpath.split(member)
241 ext = posixpath.splitext(file_name)[1]
242 locale = path.split(posixpath.sep)[0]
243 if ext == ".json" and locale in required_locales:
244 archive.extract(member, locale_path)
245
246 def crowdin_sync(source_dir, crowdin_api_key):
247 with FileSource(source_dir) as source:
248 config = source.read_config()
249 defaultlocale = config.get("general", "defaultlocale")
250 crowdin_project_name = config.get("general", "crowdin-project-name")
251
252 crowdin_api = CrowdinAPI(crowdin_api_key, crowdin_project_name)
253
254 logger.info("Requesting project information...")
255 project_info = crowdin_api.request("GET", "info")
256 page_strings = extract_strings(source, defaultlocale)
257
258 required_locales = {l for l in source.list_locales() if l != defaultlocale}
259 enabled_locales = {l["code"] for l in project_info["languages"]}
260
261 required_locales = configure_locales(crowdin_api, required_locales,
262 enabled_locales, defaultlocale)
263
264 remote_files, remote_directories = list_remote_files(project_info)
265 local_files, local_directories = list_local_files(page_strings)
266
267 # Avoid deleting all remote content if there was a problem listing local files
268 if not local_files:
269 logger.error("No existing strings found, maybe the project directory is "
270 "not set up correctly? Aborting!")
271 sys.exit(1)
272
273 new_files = local_files - remote_files
274 new_directories = local_directories - remote_directories
275 create_directories(crowdin_api, new_directories)
276 upload_new_files(crowdin_api, new_files, page_strings)
277 upload_translations(crowdin_api, source_dir, new_files, required_locales)
278
279 existing_files = local_files - new_files
280 update_existing_files(crowdin_api, existing_files, page_strings)
281
282 old_files = remote_files - local_files
283 old_directories = remote_directories - local_directories
284 remove_old_files(crowdin_api, old_files)
285 remove_old_directories(crowdin_api, old_directories)
286
287 download_translations(crowdin_api, source_dir, required_locales)
288 logger.info("Crowdin sync completed.")
289
290 if __name__ == "__main__":
291 if len(sys.argv) < 3:
292 print >>sys.stderr, "Usage: python -m cms.bin.translate www_directory crowdi n_project_api_key [logging_level]"
293 sys.exit(1)
294
295 logging.basicConfig()
296 logger.setLevel(sys.argv[3] if len(sys.argv) > 3 else logging.INFO)
297
298 source_dir, crowdin_api_key = sys.argv[1:3]
299 crowdin_sync(source_dir, crowdin_api_key)
OLDNEW
« no previous file with comments | « README.md ('k') | cms/converters.py » ('j') | cms/converters.py » ('J')

Powered by Google App Engine
This is Rietveld