Left: | ||
Right: |
OLD | NEW |
---|---|
(Empty) | |
1 # coding: utf-8 | |
2 | |
3 # This file is part of the Adblock Plus web scripts, | |
4 # Copyright (C) 2006-2015 Eyeo GmbH | |
5 # | |
6 # Adblock Plus is free software: you can redistribute it and/or modify | |
7 # it under the terms of the GNU General Public License version 3 as | |
8 # published by the Free Software Foundation. | |
9 # | |
10 # Adblock Plus is distributed in the hope that it will be useful, | |
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
13 # GNU General Public License for more details. | |
14 # | |
15 # You should have received a copy of the GNU General Public License | |
16 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. | |
17 | |
18 import io | |
19 import itertools | |
20 import json | |
21 import logging | |
22 import os | |
23 import sys | |
24 import zipfile | |
25 | |
26 import requests | |
Sebastian Noack
2015/07/08 13:03:19
I see, we use "requests", because urllib doesn't h
Sebastian Noack
2015/07/08 14:23:00
I just realized that simply using built-in urllib2
kzar
2015/07/08 15:26:41
I discussed using the requests library with Wladim
Sebastian Noack
2015/07/08 15:43:50
I'm certainly curious about the reason, you decide
Wladimir Palant
2015/07/08 23:11:05
Doing multipart encoding manually is very awkward.
Sebastian Noack
2015/07/09 21:26:55
Personally, I'd still prefer to go with built-in u
Wladimir Palant
2015/07/10 21:24:04
Nope, they are both hacky and I'd definitely prefe
kzar
2015/07/11 19:21:17
Done.
| |
27 | |
28 import cms.utils | |
29 from cms.sources import FileSource | |
30 | |
31 logger = logging.getLogger("cms.bin.translate") | |
32 | |
33 class CrowdinAPI: | |
34 FILES_PER_REQUEST = 20 | |
35 | |
36 def __init__(self, api_key, project_name, defaultlocale): | |
37 self.api_key = api_key | |
38 self.project_name = project_name | |
39 self.defaultlocale = defaultlocale | |
Wladimir Palant
2015/07/08 23:11:06
It doesn't look like this field is ever used - and
kzar
2015/07/11 19:21:16
Done.
| |
40 | |
41 def request(self, request_method, api_endpoint, **kwargs): | |
42 url = "https://api.crowdin.com/api/project/%s/%s?key=%s&json=1" % ( | |
43 self.project_name, api_endpoint, self.api_key | |
44 ) | |
45 try: | |
46 response = requests.request(request_method, url, **kwargs) | |
47 response.raise_for_status() | |
48 except requests.exceptions.HTTPError as e: | |
49 logger.error("API call to %s failed:\n%s" % (url, response.text)) | |
50 raise | |
51 except requests.exceptions.ConnectionError: | |
Wladimir Palant
2015/07/08 23:11:06
There are more exception classes - ConnectionError
kzar
2015/07/11 19:21:17
Done.
| |
52 logger.error("Connection to API failed for endpoint %s" % url) | |
53 raise | |
54 | |
55 try: | |
56 return response.json() | |
57 except ValueError: | |
58 logger.error("Invalid response returned by API endpoint %s" % url) | |
59 raise | |
60 | |
61 | |
62 def grouper(iterable, n): | |
63 iterator = iter(iterable) | |
64 while True: | |
65 chunk = tuple(itertools.islice(iterator, n)) | |
66 if chunk: | |
Sebastian Noack
2015/07/08 13:03:19
Nit: You could get rid of the else block:
if not
kzar
2015/07/11 19:21:17
Done.
| |
67 yield chunk | |
68 else: | |
69 break | |
70 | |
71 def extract_strings(source, defaultlocale): | |
72 logger.info("Extracting page strings (please be patient)...") | |
73 page_strings = {} | |
74 | |
75 def record_string(page, name, default, comment, fixed_strings): | |
76 store = page_strings.setdefault(page, {}) | |
Sebastian Noack
2015/07/08 13:03:20
Do we care to not unnecessarily change the order o
Wladimir Palant
2015/07/08 23:11:06
Not necessarily relevant for diffs but Crowdin wil
kzar
2015/07/11 19:21:16
Done.
| |
77 store[name] = {"message": default} | |
78 | |
79 if fixed_strings: | |
80 comment = comment + "\n" if comment else "" | |
Sebastian Noack
2015/07/08 13:03:20
Nit. The ternary operator just adds uneeded comple
kzar
2015/07/11 19:21:17
`comment` might be None, hence this logic.
| |
81 comment += ", ".join("{%d}: %s" % (i, s) | |
Sebastian Noack
2015/07/08 13:03:20
Nit: No reason to pack/unpack the sequence here:
kzar
2015/07/11 19:21:17
Done.
| |
82 for i, s in enumerate(fixed_strings, 1)) | |
83 if comment: | |
84 store[name]["description"] = comment | |
85 | |
86 for page, format in source.list_pages(): | |
87 cms.utils.process_page(source, defaultlocale, page, | |
88 format=format, record_default_strings=record_string) | |
Sebastian Noack
2015/07/09 21:26:55
Recording the default strings is what we do here.
kzar
2015/07/11 19:21:16
Done.
| |
89 return page_strings | |
90 | |
91 def configure_locales(crowdin_api, required_locales, enabled_locales, | |
92 defaultlocale): | |
93 logger.info("Checking which locales are supported by Crowdin...") | |
94 response = crowdin_api.request("GET", "supported-languages") | |
95 | |
96 supported_locales = {l["crowdin_code"] for l in response} | |
97 skipped_locales = required_locales - supported_locales | |
98 | |
99 if skipped_locales: | |
100 logger.warning("Ignoring locales that Crowdin doesn't support: %s" % ( | |
101 ", ".join(skipped_locales) | |
102 )) | |
103 required_locales -= skipped_locales | |
104 | |
105 # It's useful to have a list of all locales to skip | |
106 skipped_locales.add(defaultlocale) | |
107 | |
108 if not required_locales.issubset(enabled_locales): | |
109 logger.info("Enabling the required locales for the Crowdin project...") | |
110 crowdin_api.request( | |
111 "POST", "edit-project", | |
112 data={"languages[]": list(enabled_locales | required_locales)} | |
113 ) | |
114 | |
115 return required_locales, skipped_locales | |
116 | |
117 def list_remote_files(project_info): | |
118 def parse_file_node(node, path=""): | |
119 if node["node_type"] == "file": | |
120 remote_files.add(path + node["name"]) | |
121 elif node["node_type"] == "directory": | |
122 dir_name = path + node["name"] | |
123 remote_directories.add(dir_name) | |
124 for file in node.get("files", []): | |
125 parse_file_node(file, dir_name + "/") | |
126 | |
127 remote_files = set() | |
128 remote_directories = set() | |
129 for node in project_info["files"]: | |
130 parse_file_node(node) | |
131 return remote_files, remote_directories | |
132 | |
133 def list_local_files(page_strings): | |
134 local_files = set() | |
135 local_directories = set() | |
136 for page, strings in page_strings.iteritems(): | |
137 if strings: | |
138 local_files.add(page + ".json") | |
139 while "/" in page: | |
140 page = page.rsplit("/", 1)[0] | |
141 local_directories.add(page) | |
142 return local_files, local_directories | |
143 | |
144 def create_directories(crowdin_api, directories): | |
145 for directory in directories: | |
146 logger.info("Creating directory %s" % directory) | |
147 crowdin_api.request("POST", "add-directory", data={"name": directory}) | |
148 | |
149 def add_update_files(crowdin_api, api_endpoint, message, files, page_strings): | |
150 for group in grouper(files, CrowdinAPI.FILES_PER_REQUEST): | |
Wladimir Palant
2015/07/08 23:11:08
No need to assume that crowdin_api is a CrowdinAPI
kzar
2015/07/11 19:21:16
Done.
| |
151 files = {} | |
152 for file_name in group: | |
153 page = os.path.splitext(file_name)[0] | |
154 files["files[%s]" % file_name] = (file_name, json.dumps(page_strings[page] )) | |
155 del page_strings[page] | |
156 logger.info(message % len(files)) | |
157 crowdin_api.request("POST", api_endpoint, files=files) | |
158 | |
159 def upload_new_files(crowdin_api, new_files, page_strings): | |
160 add_update_files(crowdin_api, "add-file", "Uploading %d new pages...", | |
161 new_files, page_strings) | |
162 | |
163 def update_existing_files(crowdin_api, existing_files, page_strings): | |
164 add_update_files(crowdin_api, "update-file", "Updating %d existing pages...", | |
165 existing_files, page_strings) | |
166 | |
167 def upload_translations(crowdin_api, source_dir, new_files, required_locales): | |
168 def open_locale_files(locale, files): | |
169 for file in files: | |
170 path = os.path.join(source_dir, "locales", locale, file) | |
171 if os.path.isfile(path): | |
172 yield ("files[%s]" % file, open(path, "r")) | |
173 | |
174 if new_files: | |
175 for locale in required_locales: | |
176 for files in grouper(open_locale_files(locale, new_files), | |
Sebastian Noack
2015/07/08 13:03:20
You should better first get the chunk of filenames
kzar
2015/07/11 19:21:16
`open_locale_files` is a generator and I consume t
| |
177 CrowdinAPI.FILES_PER_REQUEST): | |
Wladimir Palant
2015/07/08 23:11:05
As above, crowdin_api.FILES_PER_REQUEST please.
kzar
2015/07/11 19:21:16
Done.
| |
178 try: | |
179 logger.info("Uploading %d existing translation " | |
180 "files for locale %s..." % (len(files), locale)) | |
181 crowdin_api.request("POST", "upload-translation", files=dict(files), | |
182 data={"language": locale}) | |
183 finally: | |
184 for file_name, file in files: | |
185 file.close() | |
186 | |
187 def remove_old_files(crowdin_api, old_files): | |
188 for file_name in old_files: | |
189 logger.info("Removing old file %s" % file_name) | |
190 crowdin_api.request("POST", "delete-file", data={"file": file_name}) | |
191 | |
192 def remove_old_directories(crowdin_api, old_directories): | |
193 for directory in reversed(sorted(old_directories, key=len)): | |
194 logger.info("Removing old directory %s" % directory) | |
195 crowdin_api.request("POST", "delete-directory", data={"name": directory}) | |
196 | |
197 def download_translations(crowdin_api, source_dir, | |
198 skipped_locales, required_locales): | |
199 logger.info("Requesting generation of fresh translations archive...") | |
200 result = crowdin_api.request("GET", "export") | |
201 if result.get("success", {}).get("status") == "skipped": | |
202 logger.warning("Archive generation skipped, either " | |
203 "no changes or API usage excessive") | |
204 | |
205 logger.info("Downloading translations archive...") | |
206 response = requests.get( | |
207 "https://api.crowdin.com/api/project/%s/download/all.zip?key=%s" % ( | |
208 crowdin_api.project_name, crowdin_api.api_key | |
209 ) | |
210 ) | |
211 response.raise_for_status() | |
212 logger.info("Extracting translations archive...") | |
213 with zipfile.ZipFile(io.BytesIO(response.content), "r") as archive: | |
214 locale_path = os.path.join(source_dir, "locales") | |
215 # First clear existing translation files | |
216 for root, dirs, files in os.walk(locale_path, topdown=True): | |
217 if root == locale_path: | |
218 # Don't delete locale files for unsupported locales or the default | |
219 dirs[:] = [d for d in dirs if d not in skipped_locales] | |
220 for f in files: | |
221 if f.endswith(".json"): | |
222 os.remove(os.path.join(root, f)) | |
223 # Then extract the new ones in place | |
224 for member in archive.namelist(): | |
225 path, file_name = os.path.split(member) | |
226 ext = os.path.splitext(file_name)[1] | |
227 locale = os.path.normpath(path).split(os.sep)[0] | |
228 if ext == ".json" and locale in required_locales: | |
229 archive.extract(member, locale_path) | |
Wladimir Palant
2015/07/08 23:11:08
Please use posixpath module here rather than os.pa
kzar
2015/07/11 19:21:16
Done.
| |
230 | |
231 def crowdin_sync(source_dir, crowdin_api_key): | |
232 with FileSource(source_dir) as source: | |
233 config = source.read_config() | |
234 defaultlocale = config.get("general", "defaultlocale") | |
235 crowdin_project_name = config.get("general", "crowdin-project-name") | |
236 | |
237 crowdin_api = CrowdinAPI(crowdin_api_key, crowdin_project_name, | |
238 defaultlocale) | |
239 | |
240 logger.info("Requesting project information...") | |
241 project_info = crowdin_api.request("GET", "info") | |
242 page_strings = extract_strings(source, defaultlocale) | |
243 | |
244 required_locales = {l for l in source.list_locales() if l != defaultlocale} | |
245 enabled_locales = {l["code"] for l in project_info["languages"]} | |
246 | |
247 required_locales, skipped_locales = configure_locales( | |
248 crowdin_api, required_locales, enabled_locales, defaultlocale | |
249 ) | |
250 remote_files, remote_directories = list_remote_files(project_info) | |
251 local_files, local_directories = list_local_files(page_strings) | |
252 | |
253 # Avoid deleting all remote content if there was a problem listing local files | |
254 if not local_files: | |
255 logger.error("No existing strings found, maybe the project directory is " | |
256 "not set up correctly? Aborting!") | |
257 sys.exit(1) | |
258 | |
259 new_files = local_files - remote_files | |
260 new_directories = local_directories - remote_directories | |
261 create_directories(crowdin_api, new_directories) | |
262 upload_new_files(crowdin_api, new_files, page_strings) | |
263 upload_translations(crowdin_api, source_dir, new_files, required_locales) | |
264 | |
265 existing_files = local_files - new_files | |
266 update_existing_files(crowdin_api, existing_files, page_strings) | |
267 | |
268 old_files = remote_files - local_files | |
269 old_directories = remote_directories - local_directories | |
270 remove_old_files(crowdin_api, old_files) | |
271 remove_old_directories(crowdin_api, old_directories) | |
272 | |
273 download_translations(crowdin_api, source_dir, | |
274 skipped_locales, required_locales) | |
275 logger.info("Crowdin sync completed.") | |
276 | |
277 if __name__ == "__main__": | |
278 if len(sys.argv) < 3: | |
279 print >>sys.stderr, "Usage: python -m cms.bin.translate www_directory crowdi n_project_api_key [logging_level]" | |
280 sys.exit(1) | |
281 | |
282 logging.basicConfig() | |
283 logger.setLevel(sys.argv[3] if len(sys.argv) > 3 else logging.INFO) | |
284 | |
285 source_dir, crowdin_api_key = sys.argv[1:3] | |
286 crowdin_sync(source_dir, crowdin_api_key) | |
OLD | NEW |