Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Delta Between Two Patch Sets: cms/bin/translate.py

Issue 29317015: Issue 2625 - [cms] Crowdin synchronisation script (Closed)
Left Patch Set: Created June 15, 2015, 2:12 p.m.
Right Patch Set: Give query_params a default value Created July 16, 2015, 12:47 p.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
Left: Side by side diff | Download
Right: Side by side diff | Download
« no previous file with change/comment | « README.md ('k') | cms/converters.py » ('j') | no next file with change/comment »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
LEFTRIGHT
1 # coding: utf-8 1 # coding: utf-8
2 2
3 # This file is part of the Adblock Plus web scripts, 3 # This file is part of the Adblock Plus web scripts,
4 # Copyright (C) 2006-2015 Eyeo GmbH 4 # Copyright (C) 2006-2015 Eyeo GmbH
5 # 5 #
6 # Adblock Plus is free software: you can redistribute it and/or modify 6 # Adblock Plus is free software: you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License version 3 as 7 # it under the terms of the GNU General Public License version 3 as
8 # published by the Free Software Foundation. 8 # published by the Free Software Foundation.
9 # 9 #
10 # Adblock Plus is distributed in the hope that it will be useful, 10 # Adblock Plus is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of 11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details. 13 # GNU General Public License for more details.
14 # 14 #
15 # You should have received a copy of the GNU General Public License 15 # You should have received a copy of the GNU General Public License
16 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. 16 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.
17 17
18 from io import BytesIO 18 import collections
19 from itertools import islice 19 import io
Wladimir Palant 2015/06/29 19:05:36 Nit: I'm not a big fan of importing individual fun
kzar 2015/07/02 12:33:11 Done.
20 import itertools
20 import json 21 import json
21 import logging 22 import logging
22 import os 23 import os
24 import posixpath
23 import sys 25 import sys
26 import urllib
24 import zipfile 27 import zipfile
25 28
26 import requests 29 import urllib3
27 30
28 import cms.utils 31 import cms.utils
29 from cms.sources import FileSource 32 from cms.sources import FileSource
30 33
31 FILES_PER_REQUEST = 20
32
33 logger = logging.getLogger("cms.bin.translate") 34 logger = logging.getLogger("cms.bin.translate")
34 crowdin_api_key = None 35
35 crowdin_project_name = None 36 class CrowdinAPI:
36 defaultlocale = None 37 FILES_PER_REQUEST = 20
Wladimir Palant 2015/06/29 19:05:37 The three variables above shouldn't be globals - t
kzar 2015/07/02 12:33:11 Done.
38
39 def __init__(self, api_key, project_name):
40 self.api_key = api_key
41 self.project_name = project_name
42 self.connection = urllib3.connection_from_url("https://api.crowdin.com/")
43
44 def raw_request(self, request_method, api_endpoint, query_params=(), **kwargs) :
45 url = "/api/project/%s/%s?%s" % (
46 urllib.quote(self.project_name),
47 urllib.quote(api_endpoint),
48 urllib.urlencode((("key", self.api_key),) + query_params)
49 )
50 try:
51 response = self.connection.request(
52 request_method, str(url), **kwargs
53 )
54 except urllib3.exceptions.HTTPError:
55 logger.error("Connection to API endpoint %s failed", url)
56 raise
57 if response.status < 200 or response.status >= 300:
58 logger.error("API call to %s failed:\n%s", url, response.data)
59 raise urllib3.exceptions.HTTPError(response.status)
60 return response
61
62 def request(self, request_method, api_endpoint, data=None, files=None):
63 fields = []
64 if data:
65 for name, value in data.iteritems():
66 if isinstance(value, basestring):
67 fields.append((name, value))
68 else:
69 fields.extend((name + "[]", v) for v in value)
70 if files:
71 fields.extend(("files[%s]" % f[0], f) for f in files)
72
73 response = self.raw_request(
74 request_method, api_endpoint, (("json", "1"),),
75 fields=fields, preload_content=False
76 )
77
78 try:
79 return json.load(response)
80 except ValueError:
81 logger.error("Invalid response returned by API endpoint %s", url)
82 raise
83
37 84
38 def grouper(iterable, n): 85 def grouper(iterable, n):
39 iterator = iter(iterable) 86 iterator = iter(iterable)
40 while True: 87 while True:
41 chunk = tuple(islice(iterator, n)) 88 chunk = tuple(itertools.islice(iterator, n))
42 if chunk: 89 if not chunk:
43 yield chunk
44 else:
45 break 90 break
Wladimir Palant 2015/06/29 19:05:36 Please link to https://stackoverflow.com/questions
kzar 2015/07/02 12:33:10 I originally did take this function from Stackover
46 91 yield chunk
47 def crowdin_request(request_method, api_endpoint, **kwargs):
48 url = "https://api.crowdin.com/api/project/%s/%s?key=%s&json=1" % (
49 crowdin_project_name, api_endpoint, crowdin_api_key
50 )
51 response = requests.request(request_method, url, **kwargs)
Wladimir Palant 2015/06/29 19:05:36 You seem to be assuming that this will not throw a
kzar 2015/07/02 12:33:12 Done.
52 try:
53 response.raise_for_status()
54 except requests.exceptions.HTTPError as e:
55 logger.error("API call to %s failed:\n%s" % (url, response.text))
56 raise
57 return response.json()
Wladimir Palant 2015/06/29 19:05:37 This call might also raise an exception - if JSON
kzar 2015/07/02 12:33:12 Done.
58 92
59 def extract_strings(source, defaultlocale): 93 def extract_strings(source, defaultlocale):
60 logger.info("Extracting page strings (please be patient)...") 94 logger.info("Extracting page strings (please be patient)...")
61 page_strings = {} 95 page_strings = {}
96
97 def record_string(page, locale, name, value, comment, fixed_strings):
98 if locale != defaultlocale:
99 return
100
101 try:
102 store = page_strings[page]
103 except KeyError:
104 store = page_strings[page] = collections.OrderedDict()
105
106 store[name] = {"message": value}
107
108 if fixed_strings:
109 comment = comment + "\n" if comment else ""
110 comment += ", ".join("{%d}: %s" % i_s
111 for i_s in enumerate(fixed_strings, 1))
112 if comment:
113 store[name]["description"] = comment
114
62 for page, format in source.list_pages(): 115 for page, format in source.list_pages():
63 params = cms.utils.get_page_params(source, defaultlocale, page) 116 cms.utils.process_page(source, defaultlocale, page,
64 strings = params["localedata"] 117 format=format, localized_string_callback=record_strin g)
65 comments = params["localecomments"]
66 for string_name in strings.iterkeys():
67 strings[string_name] = {"message": strings[string_name]}
68 if string_name in comments:
69 strings[string_name]["description"] = comments[string_name]
70 page_strings[page] = strings
71 return page_strings 118 return page_strings
72 119
73 def ensure_required_locales(required_locales, enabled_locales, defaultlocale): 120 def configure_locales(crowdin_api, required_locales, enabled_locales,
Wladimir Palant 2015/06/29 19:05:36 Nit: configure_locales maybe? This is more about c
kzar 2015/07/02 12:33:11 Done.
121 defaultlocale):
74 logger.info("Checking which locales are supported by Crowdin...") 122 logger.info("Checking which locales are supported by Crowdin...")
75 response = crowdin_request("GET", "supported-languages") 123 response = crowdin_api.request("GET", "supported-languages")
76 124
77 supported_locales = {l["crowdin_code"] for l in response} 125 supported_locales = {l["crowdin_code"] for l in response}
78 skipped_locales = list(required_locales.difference(supported_locales)) 126 skipped_locales = required_locales - supported_locales
Wladimir Palant 2015/06/29 19:05:36 Nit: why convert a set to list here? Just change s
kzar 2015/07/02 12:33:12 Done.
79 127
80 if skipped_locales: 128 if skipped_locales:
81 logger.warning("Ignoring locales that Crowdin doesn't support: %s" % ( 129 logger.warning("Ignoring locales that Crowdin doesn't support: %s",
82 ", ".join(skipped_locales) 130 ", ".join(skipped_locales))
83 )) 131 required_locales -= skipped_locales
84 required_locales = required_locales.intersection(supported_locales)
Wladimir Palant 2015/06/29 19:05:36 Nit: required_locales = required_locales & support
kzar 2015/07/02 12:33:12 Done.
85
86 # It's useful to have a list of all locales to skip
87 skipped_locales.append(defaultlocale)
88 132
89 if not required_locales.issubset(enabled_locales): 133 if not required_locales.issubset(enabled_locales):
90 logger.info("Enabling the required locales for the Crowdin project...") 134 logger.info("Enabling the required locales for the Crowdin project...")
91 crowdin_request( 135 crowdin_api.request(
92 "POST", "edit-project", 136 "POST", "edit-project",
93 data={"languages[]": list(enabled_locales.union(required_locales))} 137 data={"languages": enabled_locales | required_locales}
Wladimir Palant 2015/06/29 19:05:36 Nit: list(enabled_locales | required_locales) plea
kzar 2015/07/02 12:33:11 Done.
94 ) 138 )
95 139
96 return required_locales, skipped_locales 140 return required_locales
97 141
98 def list_remote_files(project_info): 142 def list_remote_files(project_info):
99 def parse_file_node(node, path=""): 143 def parse_file_node(node, path=""):
100 if node["node_type"] == "file": 144 if node["node_type"] == "file":
101 remote_files.add(path + node["name"]) 145 remote_files.add(path + node["name"])
102 elif node["node_type"] == "directory": 146 elif node["node_type"] == "directory":
103 dir_name = path + node["name"] 147 dir_name = path + node["name"]
104 remote_directories.add(dir_name) 148 remote_directories.add(dir_name)
105 for file in node.get("files", []): 149 for file in node.get("files", []):
106 parse_file_node(file, dir_name + "/") 150 parse_file_node(file, dir_name + "/")
107 151
108 remote_files = set() 152 remote_files = set()
109 remote_directories = set() 153 remote_directories = set()
110 map(parse_file_node, project_info["files"]) 154 for node in project_info["files"]:
Wladimir Palant 2015/06/29 19:05:37 You are misusing map() here, its callback should n
kzar 2015/07/02 12:33:12 Done.
155 parse_file_node(node)
111 return remote_files, remote_directories 156 return remote_files, remote_directories
112 157
113 def list_local_files(page_strings): 158 def list_local_files(page_strings):
114 local_files = set() 159 local_files = set()
115 local_directories = set() 160 local_directories = set()
116 for page, strings in page_strings.iteritems(): 161 for page, strings in page_strings.iteritems():
117 if strings: 162 if strings:
118 local_files.add(page + ".json") 163 local_files.add(page + ".json")
119 if "/" in page: 164 while "/" in page:
120 parts = page.split("/")[:-1] 165 page = page.rsplit("/", 1)[0]
121 path = "" 166 local_directories.add(page)
122 while parts:
123 path += parts.pop(0)
124 local_directories.add(path)
125 path += "/"
Wladimir Palant 2015/06/29 19:05:36 I think the following should be simpler: local_
kzar 2015/07/02 12:33:10 Done.
126 return local_files, local_directories 167 return local_files, local_directories
127 168
128 def create_directories(directories): 169 def create_directories(crowdin_api, directories):
129 for directory in directories: 170 for directory in directories:
130 logger.info("Creating directory %s" % directory) 171 logger.info("Creating directory %s", directory)
131 crowdin_request("POST", "add-directory", data={"name": directory}) 172 crowdin_api.request("POST", "add-directory", data={"name": directory})
132 173
133 def add_update_files(api_endpoint, message, files, page_strings): 174 def add_update_files(crowdin_api, api_endpoint, message, files, page_strings):
134 for group in grouper(files, FILES_PER_REQUEST): 175 for group in grouper(files, crowdin_api.FILES_PER_REQUEST):
135 files = {} 176 files = []
136 for file_name in group: 177 for file_name in group:
137 page = file_name[:-5] 178 page = os.path.splitext(file_name)[0]
Wladimir Palant 2015/06/29 19:05:37 Use os.path.splitext()?
kzar 2015/07/02 12:33:12 Done.
138 files["files[%s]" % file_name] = (file_name, json.dumps(page_strings[page] )) 179 files.append((file_name, json.dumps(page_strings[page]), "application/json "))
139 del page_strings[page] 180 del page_strings[page]
Wladimir Palant 2015/06/29 19:05:37 Why delete the strings?
kzar 2015/07/02 12:33:11 We no longer need them, and as they might be quite
140 logger.info(message % len(files)) 181 logger.info(message, len(files))
141 crowdin_request("POST", api_endpoint, files=files) 182 crowdin_api.request("POST", api_endpoint, files=files)
142 183
143 def upload_new_files(new_files, page_strings): 184 def upload_new_files(crowdin_api, new_files, page_strings):
144 add_update_files("add-file", "Uploading %d new pages...", 185 add_update_files(crowdin_api, "add-file", "Uploading %d new pages...",
145 new_files, page_strings) 186 new_files, page_strings)
146 187
147 def update_existing_files(existing_files, page_strings): 188 def update_existing_files(crowdin_api, existing_files, page_strings):
148 add_update_files("update-file", "Updating %d existing pages...", 189 add_update_files(crowdin_api, "update-file", "Updating %d existing pages...",
149 existing_files, page_strings) 190 existing_files, page_strings)
150 191
151 def upload_translations(source_dir, new_files, required_locales): 192 def upload_translations(crowdin_api, source_dir, new_files, required_locales):
152 def open_locale_files(locale, files): 193 def open_locale_files(locale, files):
153 for file in files: 194 for file_name in files:
154 path = os.path.join(source_dir, "locales", locale, file) 195 path = os.path.join(source_dir, "locales", locale, file_name)
155 if os.path.isfile(path): 196 if os.path.isfile(path):
156 yield ("files[%s]" % file, open(path, "r")) 197 with open(path, "rb") as f:
198 yield (file_name, f.read(), "application/json")
157 199
158 if new_files: 200 if new_files:
159 for locale in required_locales: 201 for locale in required_locales:
160 for files in grouper(open_locale_files(locale, new_files), 202 for files in grouper(open_locale_files(locale, new_files),
161 FILES_PER_REQUEST): 203 crowdin_api.FILES_PER_REQUEST):
162 logger.info("Uploading %d existing translation " 204 logger.info("Uploading %d existing translation "
163 "files for locale %s..." % (len(files), locale)) 205 "files for locale %s...", len(files), locale)
Wladimir Palant 2015/06/29 19:05:37 Uploading per locale might result in doing one req
kzar 2015/07/02 12:33:12 This is deliberate, when uploading translations yo
164 crowdin_request("POST", "upload-translation", files=dict(files), 206 crowdin_api.request("POST", "upload-translation", files=files,
165 data={"language": locale}) 207 data={"language": locale})
166 for f in files: 208
167 f[1].close() 209 def remove_old_files(crowdin_api, old_files):
Wladimir Palant 2015/06/29 19:05:37 These files should be closed regardless of whether
kzar 2015/07/02 12:33:10 Done.
168
169 def remove_old_files(old_files):
170 for file_name in old_files: 210 for file_name in old_files:
171 logger.info("Removing old file %s" % file_name) 211 logger.info("Removing old file %s", file_name)
172 crowdin_request("POST", "delete-file", data={"file": file_name}) 212 crowdin_api.request("POST", "delete-file", data={"file": file_name})
173 213
174 def remove_old_directories(old_directories): 214 def remove_old_directories(crowdin_api, old_directories):
175 for directory in reversed(sorted(old_directories, key=len)): 215 for directory in reversed(sorted(old_directories, key=len)):
176 logger.info("Removing old directory %s" % directory) 216 logger.info("Removing old directory %s", directory)
177 crowdin_request("POST", "delete-directory", data={"name": directory}) 217 crowdin_api.request("POST", "delete-directory", data={"name": directory})
178 218
179 def download_translations(source_dir, skipped_locales): 219 def download_translations(crowdin_api, source_dir, required_locales):
180 logger.info("Requesting generation of fresh translations archive...") 220 logger.info("Requesting generation of fresh translations archive...")
181 result = crowdin_request("GET", "export") 221 result = crowdin_api.request("GET", "export")
182 if result.get("success", {}).get("status") == "skipped": 222 if result.get("success", {}).get("status") == "skipped":
183 logger.warning("Archive generation skipped, either " 223 logger.warning("Archive generation skipped, either "
184 "no changes or API usage excessive") 224 "no changes or API usage excessive")
185 225
186 logger.info("Downloading translations archive...") 226 logger.info("Downloading translations archive...")
187 response = requests.get( 227 response = crowdin_api.raw_request("GET", "download/all.zip")
188 "https://api.crowdin.com/api/project/%s/download/all.zip?key=%s" % ( 228
189 crowdin_project_name, crowdin_api_key
190 )
191 )
192 response.raise_for_status()
193 logger.info("Extracting translations archive...") 229 logger.info("Extracting translations archive...")
194 with zipfile.ZipFile(BytesIO(response.content), "r") as archive: 230 with zipfile.ZipFile(io.BytesIO(response.data), "r") as archive:
195 locale_path = os.path.join(source_dir, "locales") 231 locale_path = os.path.join(source_dir, "locales")
196 # First clear existing translation files 232 # First clear existing translation files
197 for root, dirs, files in os.walk(locale_path, topdown=True): 233 for root, dirs, files in os.walk(locale_path, topdown=True):
198 if root == locale_path: 234 if root == locale_path:
199 # Don't delete locale files for unsupported locales or the default 235 dirs[:] = [d for d in dirs if d in required_locales]
200 dirs[:] = [d for d in dirs if d not in skipped_locales]
201 for f in files: 236 for f in files:
202 if f.endswith(".json"): 237 if f.lower().endswith(".json"):
203 os.remove(os.path.join(root, f)) 238 os.remove(os.path.join(root, f))
204 # Then extract the new ones in place 239 # Then extract the new ones in place
205 archive.extractall(locale_path) 240 for member in archive.namelist():
Wladimir Palant 2015/06/29 19:05:37 Running extractall is pretty dangerous IMHO. I'd s
kzar 2015/07/02 12:33:12 Done.
241 path, file_name = posixpath.split(member)
242 ext = posixpath.splitext(file_name)[1]
243 locale = path.split(posixpath.sep)[0]
244 if ext.lower() == ".json" and locale in required_locales:
245 archive.extract(member, locale_path)
206 246
207 def crowdin_sync(source_dir, crowdin_api_key): 247 def crowdin_sync(source_dir, crowdin_api_key):
208 global crowdin_project_name, defaultlocale
209
210 with FileSource(source_dir) as source: 248 with FileSource(source_dir) as source:
211 config = source.read_config() 249 config = source.read_config()
212 defaultlocale = config.get("general", "defaultlocale") 250 defaultlocale = config.get("general", "defaultlocale")
213 crowdin_project_name = config.get("general", "crowdin-project-name") 251 crowdin_project_name = config.get("general", "crowdin-project-name")
214 252
253 crowdin_api = CrowdinAPI(crowdin_api_key, crowdin_project_name)
254
215 logger.info("Requesting project information...") 255 logger.info("Requesting project information...")
216 project_info = crowdin_request("GET", "info") 256 project_info = crowdin_api.request("GET", "info")
Wladimir Palant 2015/06/29 19:05:37 Nit: The two lines above and setting enabled_local
kzar 2015/07/02 12:33:11 I originally didn't request the project informatio
217 page_strings = extract_strings(source, defaultlocale) 257 page_strings = extract_strings(source, defaultlocale)
218 258
219 required_locales = {l for l in source.list_locales() if l != defaultlocale} 259 required_locales = {l for l in source.list_locales() if l != defaultlocale}
220 enabled_locales = {l["code"] for l in project_info["languages"]} 260 enabled_locales = {l["code"] for l in project_info["languages"]}
221 261
222 required_locales, skipped_locales = ensure_required_locales( 262 required_locales = configure_locales(crowdin_api, required_locales,
Wladimir Palant 2015/06/29 19:05:37 Why do we need to know the skipped locales here? T
kzar 2015/07/02 12:33:11 We need to avoid deleting locale files for skipped
Wladimir Palant 2015/07/08 23:11:05 You can (and should) limit deleting to required_lo
kzar 2015/07/11 19:21:15 Done.
223 required_locales, enabled_locales, defaultlocale 263 enabled_locales, defaultlocale)
224 ) 264
225 remote_files, remote_directories = list_remote_files(project_info) 265 remote_files, remote_directories = list_remote_files(project_info)
226 local_files, local_directories = list_local_files(page_strings) 266 local_files, local_directories = list_local_files(page_strings)
227 267
228 # Avoid deleting all remote content if there was a problem listing local files 268 # Avoid deleting all remote content if there was a problem listing local files
229 if not local_files: 269 if not local_files:
230 logger.error("No page strings found. (Wrong project directory?) Aborting!") 270 logger.error("No existing strings found, maybe the project directory is "
Wladimir Palant 2015/06/29 19:05:37 Nit: The settings.ini file is there, so it cannot
kzar 2015/07/02 12:33:11 Done.
271 "not set up correctly? Aborting!")
231 sys.exit(1) 272 sys.exit(1)
232 273
233 new_files = list(local_files.difference(remote_files)) 274 new_files = local_files - remote_files
234 new_directories = list(local_directories.difference(remote_directories)) 275 new_directories = local_directories - remote_directories
Wladimir Palant 2015/06/29 19:05:37 Nit: I don't see the point converting sets to list
kzar 2015/07/02 12:33:11 (Cool I found this one myself and later noticed yo
235 create_directories(new_directories) 276 create_directories(crowdin_api, new_directories)
236 upload_new_files(new_files, page_strings) 277 upload_new_files(crowdin_api, new_files, page_strings)
237 upload_translations(source_dir, new_files, required_locales) 278 upload_translations(crowdin_api, source_dir, new_files, required_locales)
238 279
239 existing_files = list(local_files.intersection(remote_files)) 280 existing_files = local_files - new_files
Wladimir Palant 2015/06/29 19:05:36 Nit: existing_files = local_files - new_files?
kzar 2015/07/02 12:33:10 Done.
240 update_existing_files(existing_files, page_strings) 281 update_existing_files(crowdin_api, existing_files, page_strings)
241 282
242 old_files = remote_files.difference(local_files) 283 old_files = remote_files - local_files
243 old_directories = remote_directories.difference(local_directories) 284 old_directories = remote_directories - local_directories
244 remove_old_files(old_files) 285 remove_old_files(crowdin_api, old_files)
245 remove_old_directories(old_directories) 286 remove_old_directories(crowdin_api, old_directories)
246 287
247 download_translations(source_dir, skipped_locales) 288 download_translations(crowdin_api, source_dir, required_locales)
248 logger.info("Crowdin sync completed.") 289 logger.info("Crowdin sync completed.")
249 290
250 if __name__ == "__main__": 291 if __name__ == "__main__":
251 if len(sys.argv) < 3: 292 if len(sys.argv) < 3:
252 print >>sys.stderr, "Usage: python -m cms.bin.translate www_directory crowdi n_project_api_key [logging_level]" 293 print >>sys.stderr, "Usage: python -m cms.bin.translate www_directory crowdi n_project_api_key [logging_level]"
253 sys.exit(1) 294 sys.exit(1)
254 295
255 logging.basicConfig() 296 logging.basicConfig()
256 logger.setLevel(sys.argv[3] if len(sys.argv) > 3 else logging.INFO) 297 logger.setLevel(sys.argv[3] if len(sys.argv) > 3 else logging.INFO)
257 298
258 source_dir, crowdin_api_key = sys.argv[1:3] 299 source_dir, crowdin_api_key = sys.argv[1:3]
259 crowdin_sync(source_dir, crowdin_api_key) 300 crowdin_sync(source_dir, crowdin_api_key)
LEFTRIGHT

Powered by Google App Engine
This is Rietveld