Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Delta Between Two Patch Sets: cms/bin/translate.py

Issue 29317015: Issue 2625 - [cms] Crowdin synchronisation script (Closed)
Left Patch Set: Addressed Wladimir's feedback Created July 2, 2015, 12:29 p.m.
Right Patch Set: Give query_params a default value Created July 16, 2015, 12:47 p.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
Left: Side by side diff | Download
Right: Side by side diff | Download
« no previous file with change/comment | « README.md ('k') | cms/converters.py » ('j') | no next file with change/comment »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
LEFTRIGHT
1 # coding: utf-8 1 # coding: utf-8
2 2
3 # This file is part of the Adblock Plus web scripts, 3 # This file is part of the Adblock Plus web scripts,
4 # Copyright (C) 2006-2015 Eyeo GmbH 4 # Copyright (C) 2006-2015 Eyeo GmbH
5 # 5 #
6 # Adblock Plus is free software: you can redistribute it and/or modify 6 # Adblock Plus is free software: you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License version 3 as 7 # it under the terms of the GNU General Public License version 3 as
8 # published by the Free Software Foundation. 8 # published by the Free Software Foundation.
9 # 9 #
10 # Adblock Plus is distributed in the hope that it will be useful, 10 # Adblock Plus is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of 11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details. 13 # GNU General Public License for more details.
14 # 14 #
15 # You should have received a copy of the GNU General Public License 15 # You should have received a copy of the GNU General Public License
16 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. 16 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.
17 17
18 import collections
18 import io 19 import io
19 import itertools 20 import itertools
20 import json 21 import json
21 import logging 22 import logging
22 import os 23 import os
24 import posixpath
23 import sys 25 import sys
26 import urllib
24 import zipfile 27 import zipfile
25 28
26 import requests 29 import urllib3
Sebastian Noack 2015/07/08 13:03:19 I see, we use "requests", because urllib doesn't h
Sebastian Noack 2015/07/08 14:23:00 I just realized that simply using built-in urllib2
kzar 2015/07/08 15:26:41 I discussed using the requests library with Wladim
Sebastian Noack 2015/07/08 15:43:50 I'm certainly curious about the reason, you decide
Wladimir Palant 2015/07/08 23:11:05 Doing multipart encoding manually is very awkward.
Sebastian Noack 2015/07/09 21:26:55 Personally, I'd still prefer to go with built-in u
Wladimir Palant 2015/07/10 21:24:04 Nope, they are both hacky and I'd definitely prefe
kzar 2015/07/11 19:21:17 Done.
27 30
28 import cms.utils 31 import cms.utils
29 from cms.sources import FileSource 32 from cms.sources import FileSource
30 33
31 logger = logging.getLogger("cms.bin.translate") 34 logger = logging.getLogger("cms.bin.translate")
32 35
33 class CrowdinAPI: 36 class CrowdinAPI:
34 FILES_PER_REQUEST = 20 37 FILES_PER_REQUEST = 20
35 38
36 def __init__(self, api_key, project_name, defaultlocale): 39 def __init__(self, api_key, project_name):
37 self.api_key = api_key 40 self.api_key = api_key
38 self.project_name = project_name 41 self.project_name = project_name
39 self.defaultlocale = defaultlocale 42 self.connection = urllib3.connection_from_url("https://api.crowdin.com/")
Wladimir Palant 2015/07/08 23:11:06 It doesn't look like this field is ever used - and
kzar 2015/07/11 19:21:16 Done.
40 43
41 def request(self, request_method, api_endpoint, **kwargs): 44 def raw_request(self, request_method, api_endpoint, query_params=(), **kwargs) :
42 url = "https://api.crowdin.com/api/project/%s/%s?key=%s&json=1" % ( 45 url = "/api/project/%s/%s?%s" % (
43 self.project_name, api_endpoint, self.api_key 46 urllib.quote(self.project_name),
47 urllib.quote(api_endpoint),
48 urllib.urlencode((("key", self.api_key),) + query_params)
44 ) 49 )
45 try: 50 try:
46 response = requests.request(request_method, url, **kwargs) 51 response = self.connection.request(
47 response.raise_for_status() 52 request_method, str(url), **kwargs
48 except requests.exceptions.HTTPError as e: 53 )
49 logger.error("API call to %s failed:\n%s" % (url, response.text)) 54 except urllib3.exceptions.HTTPError:
55 logger.error("Connection to API endpoint %s failed", url)
50 raise 56 raise
51 except requests.exceptions.ConnectionError: 57 if response.status < 200 or response.status >= 300:
Wladimir Palant 2015/07/08 23:11:06 There are more exception classes - ConnectionError
kzar 2015/07/11 19:21:17 Done.
52 logger.error("Connection to API failed for endpoint %s" % url) 58 logger.error("API call to %s failed:\n%s", url, response.data)
53 raise 59 raise urllib3.exceptions.HTTPError(response.status)
60 return response
61
62 def request(self, request_method, api_endpoint, data=None, files=None):
63 fields = []
64 if data:
65 for name, value in data.iteritems():
66 if isinstance(value, basestring):
67 fields.append((name, value))
68 else:
69 fields.extend((name + "[]", v) for v in value)
70 if files:
71 fields.extend(("files[%s]" % f[0], f) for f in files)
72
73 response = self.raw_request(
74 request_method, api_endpoint, (("json", "1"),),
75 fields=fields, preload_content=False
76 )
54 77
55 try: 78 try:
56 return response.json() 79 return json.load(response)
57 except ValueError: 80 except ValueError:
58 logger.error("Invalid response returned by API endpoint %s" % url) 81 logger.error("Invalid response returned by API endpoint %s", url)
59 raise 82 raise
60 83
61 84
62 def grouper(iterable, n): 85 def grouper(iterable, n):
63 iterator = iter(iterable) 86 iterator = iter(iterable)
64 while True: 87 while True:
65 chunk = tuple(itertools.islice(iterator, n)) 88 chunk = tuple(itertools.islice(iterator, n))
66 if chunk: 89 if not chunk:
Sebastian Noack 2015/07/08 13:03:19 Nit: You could get rid of the else block: if not
kzar 2015/07/11 19:21:17 Done.
67 yield chunk
68 else:
69 break 90 break
91 yield chunk
70 92
71 def extract_strings(source, defaultlocale): 93 def extract_strings(source, defaultlocale):
72 logger.info("Extracting page strings (please be patient)...") 94 logger.info("Extracting page strings (please be patient)...")
73 page_strings = {} 95 page_strings = {}
74 96
75 def record_string(page, name, default, comment, fixed_strings): 97 def record_string(page, locale, name, value, comment, fixed_strings):
76 store = page_strings.setdefault(page, {}) 98 if locale != defaultlocale:
Sebastian Noack 2015/07/08 13:03:20 Do we care to not unnecessarily change the order o
Wladimir Palant 2015/07/08 23:11:06 Not necessarily relevant for diffs but Crowdin wil
kzar 2015/07/11 19:21:16 Done.
77 store[name] = {"message": default} 99 return
100
101 try:
102 store = page_strings[page]
103 except KeyError:
104 store = page_strings[page] = collections.OrderedDict()
105
106 store[name] = {"message": value}
78 107
79 if fixed_strings: 108 if fixed_strings:
80 comment = comment + "\n" if comment else "" 109 comment = comment + "\n" if comment else ""
Sebastian Noack 2015/07/08 13:03:20 Nit. The ternary operator just adds uneeded comple
kzar 2015/07/11 19:21:17 `comment` might be None, hence this logic.
81 comment += ", ".join("{%d}: %s" % (i, s) 110 comment += ", ".join("{%d}: %s" % i_s
Sebastian Noack 2015/07/08 13:03:20 Nit: No reason to pack/unpack the sequence here:
kzar 2015/07/11 19:21:17 Done.
82 for i, s in enumerate(fixed_strings, 1)) 111 for i_s in enumerate(fixed_strings, 1))
83 if comment: 112 if comment:
84 store[name]["description"] = comment 113 store[name]["description"] = comment
85 114
86 for page, format in source.list_pages(): 115 for page, format in source.list_pages():
87 cms.utils.process_page(source, defaultlocale, page, 116 cms.utils.process_page(source, defaultlocale, page,
88 format=format, record_default_strings=record_string) 117 format=format, localized_string_callback=record_strin g)
Sebastian Noack 2015/07/09 21:26:55 Recording the default strings is what we do here.
kzar 2015/07/11 19:21:16 Done.
89 return page_strings 118 return page_strings
90 119
91 def configure_locales(crowdin_api, required_locales, enabled_locales, 120 def configure_locales(crowdin_api, required_locales, enabled_locales,
92 defaultlocale): 121 defaultlocale):
93 logger.info("Checking which locales are supported by Crowdin...") 122 logger.info("Checking which locales are supported by Crowdin...")
94 response = crowdin_api.request("GET", "supported-languages") 123 response = crowdin_api.request("GET", "supported-languages")
95 124
96 supported_locales = {l["crowdin_code"] for l in response} 125 supported_locales = {l["crowdin_code"] for l in response}
97 skipped_locales = required_locales - supported_locales 126 skipped_locales = required_locales - supported_locales
98 127
99 if skipped_locales: 128 if skipped_locales:
100 logger.warning("Ignoring locales that Crowdin doesn't support: %s" % ( 129 logger.warning("Ignoring locales that Crowdin doesn't support: %s",
101 ", ".join(skipped_locales) 130 ", ".join(skipped_locales))
102 ))
103 required_locales -= skipped_locales 131 required_locales -= skipped_locales
104
105 # It's useful to have a list of all locales to skip
106 skipped_locales.add(defaultlocale)
107 132
108 if not required_locales.issubset(enabled_locales): 133 if not required_locales.issubset(enabled_locales):
109 logger.info("Enabling the required locales for the Crowdin project...") 134 logger.info("Enabling the required locales for the Crowdin project...")
110 crowdin_api.request( 135 crowdin_api.request(
111 "POST", "edit-project", 136 "POST", "edit-project",
112 data={"languages[]": list(enabled_locales | required_locales)} 137 data={"languages": enabled_locales | required_locales}
113 ) 138 )
114 139
115 return required_locales, skipped_locales 140 return required_locales
116 141
117 def list_remote_files(project_info): 142 def list_remote_files(project_info):
118 def parse_file_node(node, path=""): 143 def parse_file_node(node, path=""):
119 if node["node_type"] == "file": 144 if node["node_type"] == "file":
120 remote_files.add(path + node["name"]) 145 remote_files.add(path + node["name"])
121 elif node["node_type"] == "directory": 146 elif node["node_type"] == "directory":
122 dir_name = path + node["name"] 147 dir_name = path + node["name"]
123 remote_directories.add(dir_name) 148 remote_directories.add(dir_name)
124 for file in node.get("files", []): 149 for file in node.get("files", []):
125 parse_file_node(file, dir_name + "/") 150 parse_file_node(file, dir_name + "/")
(...skipping 10 matching lines...) Expand all
136 for page, strings in page_strings.iteritems(): 161 for page, strings in page_strings.iteritems():
137 if strings: 162 if strings:
138 local_files.add(page + ".json") 163 local_files.add(page + ".json")
139 while "/" in page: 164 while "/" in page:
140 page = page.rsplit("/", 1)[0] 165 page = page.rsplit("/", 1)[0]
141 local_directories.add(page) 166 local_directories.add(page)
142 return local_files, local_directories 167 return local_files, local_directories
143 168
144 def create_directories(crowdin_api, directories): 169 def create_directories(crowdin_api, directories):
145 for directory in directories: 170 for directory in directories:
146 logger.info("Creating directory %s" % directory) 171 logger.info("Creating directory %s", directory)
147 crowdin_api.request("POST", "add-directory", data={"name": directory}) 172 crowdin_api.request("POST", "add-directory", data={"name": directory})
148 173
149 def add_update_files(crowdin_api, api_endpoint, message, files, page_strings): 174 def add_update_files(crowdin_api, api_endpoint, message, files, page_strings):
150 for group in grouper(files, CrowdinAPI.FILES_PER_REQUEST): 175 for group in grouper(files, crowdin_api.FILES_PER_REQUEST):
Wladimir Palant 2015/07/08 23:11:08 No need to assume that crowdin_api is a CrowdinAPI
kzar 2015/07/11 19:21:16 Done.
151 files = {} 176 files = []
152 for file_name in group: 177 for file_name in group:
153 page = os.path.splitext(file_name)[0] 178 page = os.path.splitext(file_name)[0]
154 files["files[%s]" % file_name] = (file_name, json.dumps(page_strings[page] )) 179 files.append((file_name, json.dumps(page_strings[page]), "application/json "))
155 del page_strings[page] 180 del page_strings[page]
156 logger.info(message % len(files)) 181 logger.info(message, len(files))
157 crowdin_api.request("POST", api_endpoint, files=files) 182 crowdin_api.request("POST", api_endpoint, files=files)
158 183
159 def upload_new_files(crowdin_api, new_files, page_strings): 184 def upload_new_files(crowdin_api, new_files, page_strings):
160 add_update_files(crowdin_api, "add-file", "Uploading %d new pages...", 185 add_update_files(crowdin_api, "add-file", "Uploading %d new pages...",
161 new_files, page_strings) 186 new_files, page_strings)
162 187
163 def update_existing_files(crowdin_api, existing_files, page_strings): 188 def update_existing_files(crowdin_api, existing_files, page_strings):
164 add_update_files(crowdin_api, "update-file", "Updating %d existing pages...", 189 add_update_files(crowdin_api, "update-file", "Updating %d existing pages...",
165 existing_files, page_strings) 190 existing_files, page_strings)
166 191
167 def upload_translations(crowdin_api, source_dir, new_files, required_locales): 192 def upload_translations(crowdin_api, source_dir, new_files, required_locales):
168 def open_locale_files(locale, files): 193 def open_locale_files(locale, files):
169 for file in files: 194 for file_name in files:
170 path = os.path.join(source_dir, "locales", locale, file) 195 path = os.path.join(source_dir, "locales", locale, file_name)
171 if os.path.isfile(path): 196 if os.path.isfile(path):
172 yield ("files[%s]" % file, open(path, "r")) 197 with open(path, "rb") as f:
198 yield (file_name, f.read(), "application/json")
173 199
174 if new_files: 200 if new_files:
175 for locale in required_locales: 201 for locale in required_locales:
176 for files in grouper(open_locale_files(locale, new_files), 202 for files in grouper(open_locale_files(locale, new_files),
Sebastian Noack 2015/07/08 13:03:20 You should better first get the chunk of filenames
kzar 2015/07/11 19:21:16 `open_locale_files` is a generator and I consume t
177 CrowdinAPI.FILES_PER_REQUEST): 203 crowdin_api.FILES_PER_REQUEST):
Wladimir Palant 2015/07/08 23:11:05 As above, crowdin_api.FILES_PER_REQUEST please.
kzar 2015/07/11 19:21:16 Done.
178 try: 204 logger.info("Uploading %d existing translation "
179 logger.info("Uploading %d existing translation " 205 "files for locale %s...", len(files), locale)
180 "files for locale %s..." % (len(files), locale)) 206 crowdin_api.request("POST", "upload-translation", files=files,
181 crowdin_api.request("POST", "upload-translation", files=dict(files), 207 data={"language": locale})
182 data={"language": locale})
183 finally:
184 for file_name, file in files:
185 file.close()
186 208
187 def remove_old_files(crowdin_api, old_files): 209 def remove_old_files(crowdin_api, old_files):
188 for file_name in old_files: 210 for file_name in old_files:
189 logger.info("Removing old file %s" % file_name) 211 logger.info("Removing old file %s", file_name)
190 crowdin_api.request("POST", "delete-file", data={"file": file_name}) 212 crowdin_api.request("POST", "delete-file", data={"file": file_name})
191 213
192 def remove_old_directories(crowdin_api, old_directories): 214 def remove_old_directories(crowdin_api, old_directories):
193 for directory in reversed(sorted(old_directories, key=len)): 215 for directory in reversed(sorted(old_directories, key=len)):
194 logger.info("Removing old directory %s" % directory) 216 logger.info("Removing old directory %s", directory)
195 crowdin_api.request("POST", "delete-directory", data={"name": directory}) 217 crowdin_api.request("POST", "delete-directory", data={"name": directory})
196 218
197 def download_translations(crowdin_api, source_dir, 219 def download_translations(crowdin_api, source_dir, required_locales):
198 skipped_locales, required_locales):
199 logger.info("Requesting generation of fresh translations archive...") 220 logger.info("Requesting generation of fresh translations archive...")
200 result = crowdin_api.request("GET", "export") 221 result = crowdin_api.request("GET", "export")
201 if result.get("success", {}).get("status") == "skipped": 222 if result.get("success", {}).get("status") == "skipped":
202 logger.warning("Archive generation skipped, either " 223 logger.warning("Archive generation skipped, either "
203 "no changes or API usage excessive") 224 "no changes or API usage excessive")
204 225
205 logger.info("Downloading translations archive...") 226 logger.info("Downloading translations archive...")
206 response = requests.get( 227 response = crowdin_api.raw_request("GET", "download/all.zip")
207 "https://api.crowdin.com/api/project/%s/download/all.zip?key=%s" % ( 228
208 crowdin_api.project_name, crowdin_api.api_key
209 )
210 )
211 response.raise_for_status()
212 logger.info("Extracting translations archive...") 229 logger.info("Extracting translations archive...")
213 with zipfile.ZipFile(io.BytesIO(response.content), "r") as archive: 230 with zipfile.ZipFile(io.BytesIO(response.data), "r") as archive:
214 locale_path = os.path.join(source_dir, "locales") 231 locale_path = os.path.join(source_dir, "locales")
215 # First clear existing translation files 232 # First clear existing translation files
216 for root, dirs, files in os.walk(locale_path, topdown=True): 233 for root, dirs, files in os.walk(locale_path, topdown=True):
217 if root == locale_path: 234 if root == locale_path:
218 # Don't delete locale files for unsupported locales or the default 235 dirs[:] = [d for d in dirs if d in required_locales]
219 dirs[:] = [d for d in dirs if d not in skipped_locales]
220 for f in files: 236 for f in files:
221 if f.endswith(".json"): 237 if f.lower().endswith(".json"):
222 os.remove(os.path.join(root, f)) 238 os.remove(os.path.join(root, f))
223 # Then extract the new ones in place 239 # Then extract the new ones in place
224 for member in archive.namelist(): 240 for member in archive.namelist():
225 path, file_name = os.path.split(member) 241 path, file_name = posixpath.split(member)
226 ext = os.path.splitext(file_name)[1] 242 ext = posixpath.splitext(file_name)[1]
227 locale = os.path.normpath(path).split(os.sep)[0] 243 locale = path.split(posixpath.sep)[0]
228 if ext == ".json" and locale in required_locales: 244 if ext.lower() == ".json" and locale in required_locales:
229 archive.extract(member, locale_path) 245 archive.extract(member, locale_path)
Wladimir Palant 2015/07/08 23:11:08 Please use posixpath module here rather than os.pa
kzar 2015/07/11 19:21:16 Done.
230 246
231 def crowdin_sync(source_dir, crowdin_api_key): 247 def crowdin_sync(source_dir, crowdin_api_key):
232 with FileSource(source_dir) as source: 248 with FileSource(source_dir) as source:
233 config = source.read_config() 249 config = source.read_config()
234 defaultlocale = config.get("general", "defaultlocale") 250 defaultlocale = config.get("general", "defaultlocale")
235 crowdin_project_name = config.get("general", "crowdin-project-name") 251 crowdin_project_name = config.get("general", "crowdin-project-name")
236 252
237 crowdin_api = CrowdinAPI(crowdin_api_key, crowdin_project_name, 253 crowdin_api = CrowdinAPI(crowdin_api_key, crowdin_project_name)
238 defaultlocale)
239 254
240 logger.info("Requesting project information...") 255 logger.info("Requesting project information...")
241 project_info = crowdin_api.request("GET", "info") 256 project_info = crowdin_api.request("GET", "info")
242 page_strings = extract_strings(source, defaultlocale) 257 page_strings = extract_strings(source, defaultlocale)
243 258
244 required_locales = {l for l in source.list_locales() if l != defaultlocale} 259 required_locales = {l for l in source.list_locales() if l != defaultlocale}
245 enabled_locales = {l["code"] for l in project_info["languages"]} 260 enabled_locales = {l["code"] for l in project_info["languages"]}
246 261
247 required_locales, skipped_locales = configure_locales( 262 required_locales = configure_locales(crowdin_api, required_locales,
248 crowdin_api, required_locales, enabled_locales, defaultlocale 263 enabled_locales, defaultlocale)
249 ) 264
250 remote_files, remote_directories = list_remote_files(project_info) 265 remote_files, remote_directories = list_remote_files(project_info)
251 local_files, local_directories = list_local_files(page_strings) 266 local_files, local_directories = list_local_files(page_strings)
252 267
253 # Avoid deleting all remote content if there was a problem listing local files 268 # Avoid deleting all remote content if there was a problem listing local files
254 if not local_files: 269 if not local_files:
255 logger.error("No existing strings found, maybe the project directory is " 270 logger.error("No existing strings found, maybe the project directory is "
256 "not set up correctly? Aborting!") 271 "not set up correctly? Aborting!")
257 sys.exit(1) 272 sys.exit(1)
258 273
259 new_files = local_files - remote_files 274 new_files = local_files - remote_files
260 new_directories = local_directories - remote_directories 275 new_directories = local_directories - remote_directories
261 create_directories(crowdin_api, new_directories) 276 create_directories(crowdin_api, new_directories)
262 upload_new_files(crowdin_api, new_files, page_strings) 277 upload_new_files(crowdin_api, new_files, page_strings)
263 upload_translations(crowdin_api, source_dir, new_files, required_locales) 278 upload_translations(crowdin_api, source_dir, new_files, required_locales)
264 279
265 existing_files = local_files - new_files 280 existing_files = local_files - new_files
266 update_existing_files(crowdin_api, existing_files, page_strings) 281 update_existing_files(crowdin_api, existing_files, page_strings)
267 282
268 old_files = remote_files - local_files 283 old_files = remote_files - local_files
269 old_directories = remote_directories - local_directories 284 old_directories = remote_directories - local_directories
270 remove_old_files(crowdin_api, old_files) 285 remove_old_files(crowdin_api, old_files)
271 remove_old_directories(crowdin_api, old_directories) 286 remove_old_directories(crowdin_api, old_directories)
272 287
273 download_translations(crowdin_api, source_dir, 288 download_translations(crowdin_api, source_dir, required_locales)
274 skipped_locales, required_locales)
275 logger.info("Crowdin sync completed.") 289 logger.info("Crowdin sync completed.")
276 290
277 if __name__ == "__main__": 291 if __name__ == "__main__":
278 if len(sys.argv) < 3: 292 if len(sys.argv) < 3:
279 print >>sys.stderr, "Usage: python -m cms.bin.translate www_directory crowdi n_project_api_key [logging_level]" 293 print >>sys.stderr, "Usage: python -m cms.bin.translate www_directory crowdi n_project_api_key [logging_level]"
280 sys.exit(1) 294 sys.exit(1)
281 295
282 logging.basicConfig() 296 logging.basicConfig()
283 logger.setLevel(sys.argv[3] if len(sys.argv) > 3 else logging.INFO) 297 logger.setLevel(sys.argv[3] if len(sys.argv) > 3 else logging.INFO)
284 298
285 source_dir, crowdin_api_key = sys.argv[1:3] 299 source_dir, crowdin_api_key = sys.argv[1:3]
286 crowdin_sync(source_dir, crowdin_api_key) 300 crowdin_sync(source_dir, crowdin_api_key)
LEFTRIGHT

Powered by Google App Engine
This is Rietveld