Index: cms/bin/translate.py |
diff --git a/cms/bin/translate.py b/cms/bin/translate.py |
new file mode 100644 |
index 0000000000000000000000000000000000000000..857af2508f018332a8bd25305084f5f32e979be5 |
--- /dev/null |
+++ b/cms/bin/translate.py |
@@ -0,0 +1,286 @@ |
+# coding: utf-8 |
+ |
+# This file is part of the Adblock Plus web scripts, |
+# Copyright (C) 2006-2015 Eyeo GmbH |
+# |
+# Adblock Plus is free software: you can redistribute it and/or modify |
+# it under the terms of the GNU General Public License version 3 as |
+# published by the Free Software Foundation. |
+# |
+# Adblock Plus is distributed in the hope that it will be useful, |
+# but WITHOUT ANY WARRANTY; without even the implied warranty of |
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
+# GNU General Public License for more details. |
+# |
+# You should have received a copy of the GNU General Public License |
+# along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. |
+ |
+import io |
+import itertools |
+import json |
+import logging |
+import os |
+import sys |
+import zipfile |
+ |
+import requests |
Sebastian Noack
2015/07/08 13:03:19
I see, we use "requests", because urllib doesn't h
Sebastian Noack
2015/07/08 14:23:00
I just realized that simply using built-in urllib2
kzar
2015/07/08 15:26:41
I discussed using the requests library with Wladim
Sebastian Noack
2015/07/08 15:43:50
I'm certainly curious about the reason, you decide
Wladimir Palant
2015/07/08 23:11:05
Doing multipart encoding manually is very awkward.
Sebastian Noack
2015/07/09 21:26:55
Personally, I'd still prefer to go with built-in u
Wladimir Palant
2015/07/10 21:24:04
Nope, they are both hacky and I'd definitely prefe
kzar
2015/07/11 19:21:17
Done.
|
+ |
+import cms.utils |
+from cms.sources import FileSource |
+ |
+logger = logging.getLogger("cms.bin.translate") |
+ |
+class CrowdinAPI: |
+ FILES_PER_REQUEST = 20 |
+ |
+ def __init__(self, api_key, project_name, defaultlocale): |
+ self.api_key = api_key |
+ self.project_name = project_name |
+ self.defaultlocale = defaultlocale |
Wladimir Palant
2015/07/08 23:11:06
It doesn't look like this field is ever used - and
kzar
2015/07/11 19:21:16
Done.
|
+ |
+ def request(self, request_method, api_endpoint, **kwargs): |
+ url = "https://api.crowdin.com/api/project/%s/%s?key=%s&json=1" % ( |
+ self.project_name, api_endpoint, self.api_key |
+ ) |
+ try: |
+ response = requests.request(request_method, url, **kwargs) |
+ response.raise_for_status() |
+ except requests.exceptions.HTTPError as e: |
+ logger.error("API call to %s failed:\n%s" % (url, response.text)) |
+ raise |
+ except requests.exceptions.ConnectionError: |
Wladimir Palant
2015/07/08 23:11:06
There are more exception classes - ConnectionError
kzar
2015/07/11 19:21:17
Done.
|
+ logger.error("Connection to API failed for endpoint %s" % url) |
+ raise |
+ |
+ try: |
+ return response.json() |
+ except ValueError: |
+ logger.error("Invalid response returned by API endpoint %s" % url) |
+ raise |
+ |
+ |
+def grouper(iterable, n): |
+ iterator = iter(iterable) |
+ while True: |
+ chunk = tuple(itertools.islice(iterator, n)) |
+ if chunk: |
Sebastian Noack
2015/07/08 13:03:19
Nit: You could get rid of the else block:
if not
kzar
2015/07/11 19:21:17
Done.
|
+ yield chunk |
+ else: |
+ break |
+ |
+def extract_strings(source, defaultlocale): |
+ logger.info("Extracting page strings (please be patient)...") |
+ page_strings = {} |
+ |
+ def record_string(page, name, default, comment, fixed_strings): |
+ store = page_strings.setdefault(page, {}) |
Sebastian Noack
2015/07/08 13:03:20
Do we care to not unnecessarily change the order o
Wladimir Palant
2015/07/08 23:11:06
Not necessarily relevant for diffs but Crowdin wil
kzar
2015/07/11 19:21:16
Done.
|
+ store[name] = {"message": default} |
+ |
+ if fixed_strings: |
+ comment = comment + "\n" if comment else "" |
Sebastian Noack
2015/07/08 13:03:20
Nit. The ternary operator just adds uneeded comple
kzar
2015/07/11 19:21:17
`comment` might be None, hence this logic.
|
+ comment += ", ".join("{%d}: %s" % (i, s) |
Sebastian Noack
2015/07/08 13:03:20
Nit: No reason to pack/unpack the sequence here:
kzar
2015/07/11 19:21:17
Done.
|
+ for i, s in enumerate(fixed_strings, 1)) |
+ if comment: |
+ store[name]["description"] = comment |
+ |
+ for page, format in source.list_pages(): |
+ cms.utils.process_page(source, defaultlocale, page, |
+ format=format, record_default_strings=record_string) |
Sebastian Noack
2015/07/09 21:26:55
Recording the default strings is what we do here.
kzar
2015/07/11 19:21:16
Done.
|
+ return page_strings |
+ |
+def configure_locales(crowdin_api, required_locales, enabled_locales, |
+ defaultlocale): |
+ logger.info("Checking which locales are supported by Crowdin...") |
+ response = crowdin_api.request("GET", "supported-languages") |
+ |
+ supported_locales = {l["crowdin_code"] for l in response} |
+ skipped_locales = required_locales - supported_locales |
+ |
+ if skipped_locales: |
+ logger.warning("Ignoring locales that Crowdin doesn't support: %s" % ( |
+ ", ".join(skipped_locales) |
+ )) |
+ required_locales -= skipped_locales |
+ |
+ # It's useful to have a list of all locales to skip |
+ skipped_locales.add(defaultlocale) |
+ |
+ if not required_locales.issubset(enabled_locales): |
+ logger.info("Enabling the required locales for the Crowdin project...") |
+ crowdin_api.request( |
+ "POST", "edit-project", |
+ data={"languages[]": list(enabled_locales | required_locales)} |
+ ) |
+ |
+ return required_locales, skipped_locales |
+ |
+def list_remote_files(project_info): |
+ def parse_file_node(node, path=""): |
+ if node["node_type"] == "file": |
+ remote_files.add(path + node["name"]) |
+ elif node["node_type"] == "directory": |
+ dir_name = path + node["name"] |
+ remote_directories.add(dir_name) |
+ for file in node.get("files", []): |
+ parse_file_node(file, dir_name + "/") |
+ |
+ remote_files = set() |
+ remote_directories = set() |
+ for node in project_info["files"]: |
+ parse_file_node(node) |
+ return remote_files, remote_directories |
+ |
+def list_local_files(page_strings): |
+ local_files = set() |
+ local_directories = set() |
+ for page, strings in page_strings.iteritems(): |
+ if strings: |
+ local_files.add(page + ".json") |
+ while "/" in page: |
+ page = page.rsplit("/", 1)[0] |
+ local_directories.add(page) |
+ return local_files, local_directories |
+ |
+def create_directories(crowdin_api, directories): |
+ for directory in directories: |
+ logger.info("Creating directory %s" % directory) |
+ crowdin_api.request("POST", "add-directory", data={"name": directory}) |
+ |
+def add_update_files(crowdin_api, api_endpoint, message, files, page_strings): |
+ for group in grouper(files, CrowdinAPI.FILES_PER_REQUEST): |
Wladimir Palant
2015/07/08 23:11:08
No need to assume that crowdin_api is a CrowdinAPI
kzar
2015/07/11 19:21:16
Done.
|
+ files = {} |
+ for file_name in group: |
+ page = os.path.splitext(file_name)[0] |
+ files["files[%s]" % file_name] = (file_name, json.dumps(page_strings[page])) |
+ del page_strings[page] |
+ logger.info(message % len(files)) |
+ crowdin_api.request("POST", api_endpoint, files=files) |
+ |
+def upload_new_files(crowdin_api, new_files, page_strings): |
+ add_update_files(crowdin_api, "add-file", "Uploading %d new pages...", |
+ new_files, page_strings) |
+ |
+def update_existing_files(crowdin_api, existing_files, page_strings): |
+ add_update_files(crowdin_api, "update-file", "Updating %d existing pages...", |
+ existing_files, page_strings) |
+ |
+def upload_translations(crowdin_api, source_dir, new_files, required_locales): |
+ def open_locale_files(locale, files): |
+ for file in files: |
+ path = os.path.join(source_dir, "locales", locale, file) |
+ if os.path.isfile(path): |
+ yield ("files[%s]" % file, open(path, "r")) |
+ |
+ if new_files: |
+ for locale in required_locales: |
+ for files in grouper(open_locale_files(locale, new_files), |
Sebastian Noack
2015/07/08 13:03:20
You should better first get the chunk of filenames
kzar
2015/07/11 19:21:16
`open_locale_files` is a generator and I consume t
|
+ CrowdinAPI.FILES_PER_REQUEST): |
Wladimir Palant
2015/07/08 23:11:05
As above, crowdin_api.FILES_PER_REQUEST please.
kzar
2015/07/11 19:21:16
Done.
|
+ try: |
+ logger.info("Uploading %d existing translation " |
+ "files for locale %s..." % (len(files), locale)) |
+ crowdin_api.request("POST", "upload-translation", files=dict(files), |
+ data={"language": locale}) |
+ finally: |
+ for file_name, file in files: |
+ file.close() |
+ |
+def remove_old_files(crowdin_api, old_files): |
+ for file_name in old_files: |
+ logger.info("Removing old file %s" % file_name) |
+ crowdin_api.request("POST", "delete-file", data={"file": file_name}) |
+ |
+def remove_old_directories(crowdin_api, old_directories): |
+ for directory in reversed(sorted(old_directories, key=len)): |
+ logger.info("Removing old directory %s" % directory) |
+ crowdin_api.request("POST", "delete-directory", data={"name": directory}) |
+ |
+def download_translations(crowdin_api, source_dir, |
+ skipped_locales, required_locales): |
+ logger.info("Requesting generation of fresh translations archive...") |
+ result = crowdin_api.request("GET", "export") |
+ if result.get("success", {}).get("status") == "skipped": |
+ logger.warning("Archive generation skipped, either " |
+ "no changes or API usage excessive") |
+ |
+ logger.info("Downloading translations archive...") |
+ response = requests.get( |
+ "https://api.crowdin.com/api/project/%s/download/all.zip?key=%s" % ( |
+ crowdin_api.project_name, crowdin_api.api_key |
+ ) |
+ ) |
+ response.raise_for_status() |
+ logger.info("Extracting translations archive...") |
+ with zipfile.ZipFile(io.BytesIO(response.content), "r") as archive: |
+ locale_path = os.path.join(source_dir, "locales") |
+ # First clear existing translation files |
+ for root, dirs, files in os.walk(locale_path, topdown=True): |
+ if root == locale_path: |
+ # Don't delete locale files for unsupported locales or the default |
+ dirs[:] = [d for d in dirs if d not in skipped_locales] |
+ for f in files: |
+ if f.endswith(".json"): |
+ os.remove(os.path.join(root, f)) |
+ # Then extract the new ones in place |
+ for member in archive.namelist(): |
+ path, file_name = os.path.split(member) |
+ ext = os.path.splitext(file_name)[1] |
+ locale = os.path.normpath(path).split(os.sep)[0] |
+ if ext == ".json" and locale in required_locales: |
+ archive.extract(member, locale_path) |
Wladimir Palant
2015/07/08 23:11:08
Please use posixpath module here rather than os.pa
kzar
2015/07/11 19:21:16
Done.
|
+ |
+def crowdin_sync(source_dir, crowdin_api_key): |
+ with FileSource(source_dir) as source: |
+ config = source.read_config() |
+ defaultlocale = config.get("general", "defaultlocale") |
+ crowdin_project_name = config.get("general", "crowdin-project-name") |
+ |
+ crowdin_api = CrowdinAPI(crowdin_api_key, crowdin_project_name, |
+ defaultlocale) |
+ |
+ logger.info("Requesting project information...") |
+ project_info = crowdin_api.request("GET", "info") |
+ page_strings = extract_strings(source, defaultlocale) |
+ |
+ required_locales = {l for l in source.list_locales() if l != defaultlocale} |
+ enabled_locales = {l["code"] for l in project_info["languages"]} |
+ |
+ required_locales, skipped_locales = configure_locales( |
+ crowdin_api, required_locales, enabled_locales, defaultlocale |
+ ) |
+ remote_files, remote_directories = list_remote_files(project_info) |
+ local_files, local_directories = list_local_files(page_strings) |
+ |
+ # Avoid deleting all remote content if there was a problem listing local files |
+ if not local_files: |
+ logger.error("No existing strings found, maybe the project directory is " |
+ "not set up correctly? Aborting!") |
+ sys.exit(1) |
+ |
+ new_files = local_files - remote_files |
+ new_directories = local_directories - remote_directories |
+ create_directories(crowdin_api, new_directories) |
+ upload_new_files(crowdin_api, new_files, page_strings) |
+ upload_translations(crowdin_api, source_dir, new_files, required_locales) |
+ |
+ existing_files = local_files - new_files |
+ update_existing_files(crowdin_api, existing_files, page_strings) |
+ |
+ old_files = remote_files - local_files |
+ old_directories = remote_directories - local_directories |
+ remove_old_files(crowdin_api, old_files) |
+ remove_old_directories(crowdin_api, old_directories) |
+ |
+ download_translations(crowdin_api, source_dir, |
+ skipped_locales, required_locales) |
+ logger.info("Crowdin sync completed.") |
+ |
+if __name__ == "__main__": |
+ if len(sys.argv) < 3: |
+ print >>sys.stderr, "Usage: python -m cms.bin.translate www_directory crowdin_project_api_key [logging_level]" |
+ sys.exit(1) |
+ |
+ logging.basicConfig() |
+ logger.setLevel(sys.argv[3] if len(sys.argv) > 3 else logging.INFO) |
+ |
+ source_dir, crowdin_api_key = sys.argv[1:3] |
+ crowdin_sync(source_dir, crowdin_api_key) |