 Issue 29317015:
  Issue 2625 - [cms] Crowdin synchronisation script  (Closed)
    
  
    Issue 29317015:
  Issue 2625 - [cms] Crowdin synchronisation script  (Closed) 
  | Index: cms/bin/translate.py | 
| diff --git a/cms/bin/translate.py b/cms/bin/translate.py | 
| new file mode 100644 | 
| index 0000000000000000000000000000000000000000..857af2508f018332a8bd25305084f5f32e979be5 | 
| --- /dev/null | 
| +++ b/cms/bin/translate.py | 
| @@ -0,0 +1,286 @@ | 
| +# coding: utf-8 | 
| + | 
| +# This file is part of the Adblock Plus web scripts, | 
| +# Copyright (C) 2006-2015 Eyeo GmbH | 
| +# | 
| +# Adblock Plus is free software: you can redistribute it and/or modify | 
| +# it under the terms of the GNU General Public License version 3 as | 
| +# published by the Free Software Foundation. | 
| +# | 
| +# Adblock Plus is distributed in the hope that it will be useful, | 
| +# but WITHOUT ANY WARRANTY; without even the implied warranty of | 
| +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | 
| +# GNU General Public License for more details. | 
| +# | 
| +# You should have received a copy of the GNU General Public License | 
| +# along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. | 
| + | 
| +import io | 
| +import itertools | 
| +import json | 
| +import logging | 
| +import os | 
| +import sys | 
| +import zipfile | 
| + | 
| +import requests | 
| 
Sebastian Noack
2015/07/08 13:03:19
I see, we use "requests", because urllib doesn't h
 
Sebastian Noack
2015/07/08 14:23:00
I just realized that simply using built-in urllib2
 
kzar
2015/07/08 15:26:41
I discussed using the requests library with Wladim
 
Sebastian Noack
2015/07/08 15:43:50
I'm certainly curious about the reason, you decide
 
Wladimir Palant
2015/07/08 23:11:05
Doing multipart encoding manually is very awkward.
 
Sebastian Noack
2015/07/09 21:26:55
Personally, I'd still prefer to go with built-in u
 
Wladimir Palant
2015/07/10 21:24:04
Nope, they are both hacky and I'd definitely prefe
 
kzar
2015/07/11 19:21:17
Done.
 | 
| + | 
| +import cms.utils | 
| +from cms.sources import FileSource | 
| + | 
| +logger = logging.getLogger("cms.bin.translate") | 
| + | 
| +class CrowdinAPI: | 
| + FILES_PER_REQUEST = 20 | 
| + | 
| + def __init__(self, api_key, project_name, defaultlocale): | 
| + self.api_key = api_key | 
| + self.project_name = project_name | 
| + self.defaultlocale = defaultlocale | 
| 
Wladimir Palant
2015/07/08 23:11:06
It doesn't look like this field is ever used - and
 
kzar
2015/07/11 19:21:16
Done.
 | 
| + | 
| + def request(self, request_method, api_endpoint, **kwargs): | 
| + url = "https://api.crowdin.com/api/project/%s/%s?key=%s&json=1" % ( | 
| + self.project_name, api_endpoint, self.api_key | 
| + ) | 
| + try: | 
| + response = requests.request(request_method, url, **kwargs) | 
| + response.raise_for_status() | 
| + except requests.exceptions.HTTPError as e: | 
| + logger.error("API call to %s failed:\n%s" % (url, response.text)) | 
| + raise | 
| + except requests.exceptions.ConnectionError: | 
| 
Wladimir Palant
2015/07/08 23:11:06
There are more exception classes - ConnectionError
 
kzar
2015/07/11 19:21:17
Done.
 | 
| + logger.error("Connection to API failed for endpoint %s" % url) | 
| + raise | 
| + | 
| + try: | 
| + return response.json() | 
| + except ValueError: | 
| + logger.error("Invalid response returned by API endpoint %s" % url) | 
| + raise | 
| + | 
| + | 
| +def grouper(iterable, n): | 
| + iterator = iter(iterable) | 
| + while True: | 
| + chunk = tuple(itertools.islice(iterator, n)) | 
| + if chunk: | 
| 
Sebastian Noack
2015/07/08 13:03:19
Nit: You could get rid of the else block:
if not
 
kzar
2015/07/11 19:21:17
Done.
 | 
| + yield chunk | 
| + else: | 
| + break | 
| + | 
| +def extract_strings(source, defaultlocale): | 
| + logger.info("Extracting page strings (please be patient)...") | 
| + page_strings = {} | 
| + | 
| + def record_string(page, name, default, comment, fixed_strings): | 
| + store = page_strings.setdefault(page, {}) | 
| 
Sebastian Noack
2015/07/08 13:03:20
Do we care to not unnecessarily change the order o
 
Wladimir Palant
2015/07/08 23:11:06
Not necessarily relevant for diffs but Crowdin wil
 
kzar
2015/07/11 19:21:16
Done.
 | 
| + store[name] = {"message": default} | 
| + | 
| + if fixed_strings: | 
| + comment = comment + "\n" if comment else "" | 
| 
Sebastian Noack
2015/07/08 13:03:20
Nit. The ternary operator just adds uneeded comple
 
kzar
2015/07/11 19:21:17
`comment` might be None, hence this logic.
 | 
| + comment += ", ".join("{%d}: %s" % (i, s) | 
| 
Sebastian Noack
2015/07/08 13:03:20
Nit: No reason to pack/unpack the sequence here:
 
kzar
2015/07/11 19:21:17
Done.
 | 
| + for i, s in enumerate(fixed_strings, 1)) | 
| + if comment: | 
| + store[name]["description"] = comment | 
| + | 
| + for page, format in source.list_pages(): | 
| + cms.utils.process_page(source, defaultlocale, page, | 
| + format=format, record_default_strings=record_string) | 
| 
Sebastian Noack
2015/07/09 21:26:55
Recording the default strings is what we do here.
 
kzar
2015/07/11 19:21:16
Done.
 | 
| + return page_strings | 
| + | 
| +def configure_locales(crowdin_api, required_locales, enabled_locales, | 
| + defaultlocale): | 
| + logger.info("Checking which locales are supported by Crowdin...") | 
| + response = crowdin_api.request("GET", "supported-languages") | 
| + | 
| + supported_locales = {l["crowdin_code"] for l in response} | 
| + skipped_locales = required_locales - supported_locales | 
| + | 
| + if skipped_locales: | 
| + logger.warning("Ignoring locales that Crowdin doesn't support: %s" % ( | 
| + ", ".join(skipped_locales) | 
| + )) | 
| + required_locales -= skipped_locales | 
| + | 
| + # It's useful to have a list of all locales to skip | 
| + skipped_locales.add(defaultlocale) | 
| + | 
| + if not required_locales.issubset(enabled_locales): | 
| + logger.info("Enabling the required locales for the Crowdin project...") | 
| + crowdin_api.request( | 
| + "POST", "edit-project", | 
| + data={"languages[]": list(enabled_locales | required_locales)} | 
| + ) | 
| + | 
| + return required_locales, skipped_locales | 
| + | 
| +def list_remote_files(project_info): | 
| + def parse_file_node(node, path=""): | 
| + if node["node_type"] == "file": | 
| + remote_files.add(path + node["name"]) | 
| + elif node["node_type"] == "directory": | 
| + dir_name = path + node["name"] | 
| + remote_directories.add(dir_name) | 
| + for file in node.get("files", []): | 
| + parse_file_node(file, dir_name + "/") | 
| + | 
| + remote_files = set() | 
| + remote_directories = set() | 
| + for node in project_info["files"]: | 
| + parse_file_node(node) | 
| + return remote_files, remote_directories | 
| + | 
| +def list_local_files(page_strings): | 
| + local_files = set() | 
| + local_directories = set() | 
| + for page, strings in page_strings.iteritems(): | 
| + if strings: | 
| + local_files.add(page + ".json") | 
| + while "/" in page: | 
| + page = page.rsplit("/", 1)[0] | 
| + local_directories.add(page) | 
| + return local_files, local_directories | 
| + | 
| +def create_directories(crowdin_api, directories): | 
| + for directory in directories: | 
| + logger.info("Creating directory %s" % directory) | 
| + crowdin_api.request("POST", "add-directory", data={"name": directory}) | 
| + | 
| +def add_update_files(crowdin_api, api_endpoint, message, files, page_strings): | 
| + for group in grouper(files, CrowdinAPI.FILES_PER_REQUEST): | 
| 
Wladimir Palant
2015/07/08 23:11:08
No need to assume that crowdin_api is a CrowdinAPI
 
kzar
2015/07/11 19:21:16
Done.
 | 
| + files = {} | 
| + for file_name in group: | 
| + page = os.path.splitext(file_name)[0] | 
| + files["files[%s]" % file_name] = (file_name, json.dumps(page_strings[page])) | 
| + del page_strings[page] | 
| + logger.info(message % len(files)) | 
| + crowdin_api.request("POST", api_endpoint, files=files) | 
| + | 
| +def upload_new_files(crowdin_api, new_files, page_strings): | 
| + add_update_files(crowdin_api, "add-file", "Uploading %d new pages...", | 
| + new_files, page_strings) | 
| + | 
| +def update_existing_files(crowdin_api, existing_files, page_strings): | 
| + add_update_files(crowdin_api, "update-file", "Updating %d existing pages...", | 
| + existing_files, page_strings) | 
| + | 
| +def upload_translations(crowdin_api, source_dir, new_files, required_locales): | 
| + def open_locale_files(locale, files): | 
| + for file in files: | 
| + path = os.path.join(source_dir, "locales", locale, file) | 
| + if os.path.isfile(path): | 
| + yield ("files[%s]" % file, open(path, "r")) | 
| + | 
| + if new_files: | 
| + for locale in required_locales: | 
| + for files in grouper(open_locale_files(locale, new_files), | 
| 
Sebastian Noack
2015/07/08 13:03:20
You should better first get the chunk of filenames
 
kzar
2015/07/11 19:21:16
`open_locale_files` is a generator and I consume t
 | 
| + CrowdinAPI.FILES_PER_REQUEST): | 
| 
Wladimir Palant
2015/07/08 23:11:05
As above, crowdin_api.FILES_PER_REQUEST please.
 
kzar
2015/07/11 19:21:16
Done.
 | 
| + try: | 
| + logger.info("Uploading %d existing translation " | 
| + "files for locale %s..." % (len(files), locale)) | 
| + crowdin_api.request("POST", "upload-translation", files=dict(files), | 
| + data={"language": locale}) | 
| + finally: | 
| + for file_name, file in files: | 
| + file.close() | 
| + | 
| +def remove_old_files(crowdin_api, old_files): | 
| + for file_name in old_files: | 
| + logger.info("Removing old file %s" % file_name) | 
| + crowdin_api.request("POST", "delete-file", data={"file": file_name}) | 
| + | 
| +def remove_old_directories(crowdin_api, old_directories): | 
| + for directory in reversed(sorted(old_directories, key=len)): | 
| + logger.info("Removing old directory %s" % directory) | 
| + crowdin_api.request("POST", "delete-directory", data={"name": directory}) | 
| + | 
| +def download_translations(crowdin_api, source_dir, | 
| + skipped_locales, required_locales): | 
| + logger.info("Requesting generation of fresh translations archive...") | 
| + result = crowdin_api.request("GET", "export") | 
| + if result.get("success", {}).get("status") == "skipped": | 
| + logger.warning("Archive generation skipped, either " | 
| + "no changes or API usage excessive") | 
| + | 
| + logger.info("Downloading translations archive...") | 
| + response = requests.get( | 
| + "https://api.crowdin.com/api/project/%s/download/all.zip?key=%s" % ( | 
| + crowdin_api.project_name, crowdin_api.api_key | 
| + ) | 
| + ) | 
| + response.raise_for_status() | 
| + logger.info("Extracting translations archive...") | 
| + with zipfile.ZipFile(io.BytesIO(response.content), "r") as archive: | 
| + locale_path = os.path.join(source_dir, "locales") | 
| + # First clear existing translation files | 
| + for root, dirs, files in os.walk(locale_path, topdown=True): | 
| + if root == locale_path: | 
| + # Don't delete locale files for unsupported locales or the default | 
| + dirs[:] = [d for d in dirs if d not in skipped_locales] | 
| + for f in files: | 
| + if f.endswith(".json"): | 
| + os.remove(os.path.join(root, f)) | 
| + # Then extract the new ones in place | 
| + for member in archive.namelist(): | 
| + path, file_name = os.path.split(member) | 
| + ext = os.path.splitext(file_name)[1] | 
| + locale = os.path.normpath(path).split(os.sep)[0] | 
| + if ext == ".json" and locale in required_locales: | 
| + archive.extract(member, locale_path) | 
| 
Wladimir Palant
2015/07/08 23:11:08
Please use posixpath module here rather than os.pa
 
kzar
2015/07/11 19:21:16
Done.
 | 
| + | 
| +def crowdin_sync(source_dir, crowdin_api_key): | 
| + with FileSource(source_dir) as source: | 
| + config = source.read_config() | 
| + defaultlocale = config.get("general", "defaultlocale") | 
| + crowdin_project_name = config.get("general", "crowdin-project-name") | 
| + | 
| + crowdin_api = CrowdinAPI(crowdin_api_key, crowdin_project_name, | 
| + defaultlocale) | 
| + | 
| + logger.info("Requesting project information...") | 
| + project_info = crowdin_api.request("GET", "info") | 
| + page_strings = extract_strings(source, defaultlocale) | 
| + | 
| + required_locales = {l for l in source.list_locales() if l != defaultlocale} | 
| + enabled_locales = {l["code"] for l in project_info["languages"]} | 
| + | 
| + required_locales, skipped_locales = configure_locales( | 
| + crowdin_api, required_locales, enabled_locales, defaultlocale | 
| + ) | 
| + remote_files, remote_directories = list_remote_files(project_info) | 
| + local_files, local_directories = list_local_files(page_strings) | 
| + | 
| + # Avoid deleting all remote content if there was a problem listing local files | 
| + if not local_files: | 
| + logger.error("No existing strings found, maybe the project directory is " | 
| + "not set up correctly? Aborting!") | 
| + sys.exit(1) | 
| + | 
| + new_files = local_files - remote_files | 
| + new_directories = local_directories - remote_directories | 
| + create_directories(crowdin_api, new_directories) | 
| + upload_new_files(crowdin_api, new_files, page_strings) | 
| + upload_translations(crowdin_api, source_dir, new_files, required_locales) | 
| + | 
| + existing_files = local_files - new_files | 
| + update_existing_files(crowdin_api, existing_files, page_strings) | 
| + | 
| + old_files = remote_files - local_files | 
| + old_directories = remote_directories - local_directories | 
| + remove_old_files(crowdin_api, old_files) | 
| + remove_old_directories(crowdin_api, old_directories) | 
| + | 
| + download_translations(crowdin_api, source_dir, | 
| + skipped_locales, required_locales) | 
| + logger.info("Crowdin sync completed.") | 
| + | 
| +if __name__ == "__main__": | 
| + if len(sys.argv) < 3: | 
| + print >>sys.stderr, "Usage: python -m cms.bin.translate www_directory crowdin_project_api_key [logging_level]" | 
| + sys.exit(1) | 
| + | 
| + logging.basicConfig() | 
| + logger.setLevel(sys.argv[3] if len(sys.argv) > 3 else logging.INFO) | 
| + | 
| + source_dir, crowdin_api_key = sys.argv[1:3] | 
| + crowdin_sync(source_dir, crowdin_api_key) |