| Index: cms/bin/translate.py |
| diff --git a/cms/bin/translate.py b/cms/bin/translate.py |
| new file mode 100644 |
| index 0000000000000000000000000000000000000000..e5bc9b61a7e51e13f7ebefc85a98f2010960f1e1 |
| --- /dev/null |
| +++ b/cms/bin/translate.py |
| @@ -0,0 +1,259 @@ |
| +# coding: utf-8 |
| + |
| +# This file is part of the Adblock Plus web scripts, |
| +# Copyright (C) 2006-2015 Eyeo GmbH |
| +# |
| +# Adblock Plus is free software: you can redistribute it and/or modify |
| +# it under the terms of the GNU General Public License version 3 as |
| +# published by the Free Software Foundation. |
| +# |
| +# Adblock Plus is distributed in the hope that it will be useful, |
| +# but WITHOUT ANY WARRANTY; without even the implied warranty of |
| +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| +# GNU General Public License for more details. |
| +# |
| +# You should have received a copy of the GNU General Public License |
| +# along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. |
| + |
| +from io import BytesIO |
| +from itertools import islice |
|
Wladimir Palant
2015/06/29 19:05:36
Nit: I'm not a big fan of importing individual fun
kzar
2015/07/02 12:33:11
Done.
|
| +import json |
| +import logging |
| +import os |
| +import sys |
| +import zipfile |
| + |
| +import requests |
| + |
| +import cms.utils |
| +from cms.sources import FileSource |
| + |
| +FILES_PER_REQUEST = 20 |
| + |
| +logger = logging.getLogger("cms.bin.translate") |
| +crowdin_api_key = None |
| +crowdin_project_name = None |
| +defaultlocale = None |
|
Wladimir Palant
2015/06/29 19:05:37
The three variables above shouldn't be globals - t
kzar
2015/07/02 12:33:11
Done.
|
| + |
| +def grouper(iterable, n): |
| + iterator = iter(iterable) |
| + while True: |
| + chunk = tuple(islice(iterator, n)) |
| + if chunk: |
| + yield chunk |
| + else: |
| + break |
|
Wladimir Palant
2015/06/29 19:05:36
Please link to https://stackoverflow.com/questions
kzar
2015/07/02 12:33:10
I originally did take this function from Stackover
|
| + |
| +def crowdin_request(request_method, api_endpoint, **kwargs): |
| + url = "https://api.crowdin.com/api/project/%s/%s?key=%s&json=1" % ( |
| + crowdin_project_name, api_endpoint, crowdin_api_key |
| + ) |
| + response = requests.request(request_method, url, **kwargs) |
|
Wladimir Palant
2015/06/29 19:05:36
You seem to be assuming that this will not throw a
kzar
2015/07/02 12:33:12
Done.
|
| + try: |
| + response.raise_for_status() |
| + except requests.exceptions.HTTPError as e: |
| + logger.error("API call to %s failed:\n%s" % (url, response.text)) |
| + raise |
| + return response.json() |
|
Wladimir Palant
2015/06/29 19:05:37
This call might also raise an exception - if JSON
kzar
2015/07/02 12:33:12
Done.
|
| + |
| +def extract_strings(source, defaultlocale): |
| + logger.info("Extracting page strings (please be patient)...") |
| + page_strings = {} |
| + for page, format in source.list_pages(): |
| + params = cms.utils.get_page_params(source, defaultlocale, page) |
| + strings = params["localedata"] |
| + comments = params["localecomments"] |
| + for string_name in strings.iterkeys(): |
| + strings[string_name] = {"message": strings[string_name]} |
| + if string_name in comments: |
| + strings[string_name]["description"] = comments[string_name] |
| + page_strings[page] = strings |
| + return page_strings |
| + |
| +def ensure_required_locales(required_locales, enabled_locales, defaultlocale): |
|
Wladimir Palant
2015/06/29 19:05:36
Nit: configure_locales maybe? This is more about c
kzar
2015/07/02 12:33:11
Done.
|
| + logger.info("Checking which locales are supported by Crowdin...") |
| + response = crowdin_request("GET", "supported-languages") |
| + |
| + supported_locales = {l["crowdin_code"] for l in response} |
| + skipped_locales = list(required_locales.difference(supported_locales)) |
|
Wladimir Palant
2015/06/29 19:05:36
Nit: why convert a set to list here? Just change s
kzar
2015/07/02 12:33:12
Done.
|
| + |
| + if skipped_locales: |
| + logger.warning("Ignoring locales that Crowdin doesn't support: %s" % ( |
| + ", ".join(skipped_locales) |
| + )) |
| + required_locales = required_locales.intersection(supported_locales) |
|
Wladimir Palant
2015/06/29 19:05:36
Nit: required_locales = required_locales & support
kzar
2015/07/02 12:33:12
Done.
|
| + |
| + # It's useful to have a list of all locales to skip |
| + skipped_locales.append(defaultlocale) |
| + |
| + if not required_locales.issubset(enabled_locales): |
| + logger.info("Enabling the required locales for the Crowdin project...") |
| + crowdin_request( |
| + "POST", "edit-project", |
| + data={"languages[]": list(enabled_locales.union(required_locales))} |
|
Wladimir Palant
2015/06/29 19:05:36
Nit: list(enabled_locales | required_locales) plea
kzar
2015/07/02 12:33:11
Done.
|
| + ) |
| + |
| + return required_locales, skipped_locales |
| + |
| +def list_remote_files(project_info): |
| + def parse_file_node(node, path=""): |
| + if node["node_type"] == "file": |
| + remote_files.add(path + node["name"]) |
| + elif node["node_type"] == "directory": |
| + dir_name = path + node["name"] |
| + remote_directories.add(dir_name) |
| + for file in node.get("files", []): |
| + parse_file_node(file, dir_name + "/") |
| + |
| + remote_files = set() |
| + remote_directories = set() |
| + map(parse_file_node, project_info["files"]) |
|
Wladimir Palant
2015/06/29 19:05:37
You are misusing map() here, its callback should n
kzar
2015/07/02 12:33:12
Done.
|
| + return remote_files, remote_directories |
| + |
| +def list_local_files(page_strings): |
| + local_files = set() |
| + local_directories = set() |
| + for page, strings in page_strings.iteritems(): |
| + if strings: |
| + local_files.add(page + ".json") |
| + if "/" in page: |
| + parts = page.split("/")[:-1] |
| + path = "" |
| + while parts: |
| + path += parts.pop(0) |
| + local_directories.add(path) |
| + path += "/" |
|
Wladimir Palant
2015/06/29 19:05:36
I think the following should be simpler:
local_
kzar
2015/07/02 12:33:10
Done.
|
| + return local_files, local_directories |
| + |
| +def create_directories(directories): |
| + for directory in directories: |
| + logger.info("Creating directory %s" % directory) |
| + crowdin_request("POST", "add-directory", data={"name": directory}) |
| + |
| +def add_update_files(api_endpoint, message, files, page_strings): |
| + for group in grouper(files, FILES_PER_REQUEST): |
| + files = {} |
| + for file_name in group: |
| + page = file_name[:-5] |
|
Wladimir Palant
2015/06/29 19:05:37
Use os.path.splitext()?
kzar
2015/07/02 12:33:12
Done.
|
| + files["files[%s]" % file_name] = (file_name, json.dumps(page_strings[page])) |
| + del page_strings[page] |
|
Wladimir Palant
2015/06/29 19:05:37
Why delete the strings?
kzar
2015/07/02 12:33:11
We no longer need them, and as they might be quite
|
| + logger.info(message % len(files)) |
| + crowdin_request("POST", api_endpoint, files=files) |
| + |
| +def upload_new_files(new_files, page_strings): |
| + add_update_files("add-file", "Uploading %d new pages...", |
| + new_files, page_strings) |
| + |
| +def update_existing_files(existing_files, page_strings): |
| + add_update_files("update-file", "Updating %d existing pages...", |
| + existing_files, page_strings) |
| + |
| +def upload_translations(source_dir, new_files, required_locales): |
| + def open_locale_files(locale, files): |
| + for file in files: |
| + path = os.path.join(source_dir, "locales", locale, file) |
| + if os.path.isfile(path): |
| + yield ("files[%s]" % file, open(path, "r")) |
| + |
| + if new_files: |
| + for locale in required_locales: |
| + for files in grouper(open_locale_files(locale, new_files), |
| + FILES_PER_REQUEST): |
| + logger.info("Uploading %d existing translation " |
| + "files for locale %s..." % (len(files), locale)) |
|
Wladimir Palant
2015/06/29 19:05:37
Uploading per locale might result in doing one req
kzar
2015/07/02 12:33:12
This is deliberate, when uploading translations yo
|
| + crowdin_request("POST", "upload-translation", files=dict(files), |
| + data={"language": locale}) |
| + for f in files: |
| + f[1].close() |
|
Wladimir Palant
2015/06/29 19:05:37
These files should be closed regardless of whether
kzar
2015/07/02 12:33:10
Done.
|
| + |
| +def remove_old_files(old_files): |
| + for file_name in old_files: |
| + logger.info("Removing old file %s" % file_name) |
| + crowdin_request("POST", "delete-file", data={"file": file_name}) |
| + |
| +def remove_old_directories(old_directories): |
| + for directory in reversed(sorted(old_directories, key=len)): |
| + logger.info("Removing old directory %s" % directory) |
| + crowdin_request("POST", "delete-directory", data={"name": directory}) |
| + |
| +def download_translations(source_dir, skipped_locales): |
| + logger.info("Requesting generation of fresh translations archive...") |
| + result = crowdin_request("GET", "export") |
| + if result.get("success", {}).get("status") == "skipped": |
| + logger.warning("Archive generation skipped, either " |
| + "no changes or API usage excessive") |
| + |
| + logger.info("Downloading translations archive...") |
| + response = requests.get( |
| + "https://api.crowdin.com/api/project/%s/download/all.zip?key=%s" % ( |
| + crowdin_project_name, crowdin_api_key |
| + ) |
| + ) |
| + response.raise_for_status() |
| + logger.info("Extracting translations archive...") |
| + with zipfile.ZipFile(BytesIO(response.content), "r") as archive: |
| + locale_path = os.path.join(source_dir, "locales") |
| + # First clear existing translation files |
| + for root, dirs, files in os.walk(locale_path, topdown=True): |
| + if root == locale_path: |
| + # Don't delete locale files for unsupported locales or the default |
| + dirs[:] = [d for d in dirs if d not in skipped_locales] |
| + for f in files: |
| + if f.endswith(".json"): |
| + os.remove(os.path.join(root, f)) |
| + # Then extract the new ones in place |
| + archive.extractall(locale_path) |
|
Wladimir Palant
2015/06/29 19:05:37
Running extractall is pretty dangerous IMHO. I'd s
kzar
2015/07/02 12:33:12
Done.
|
| + |
| +def crowdin_sync(source_dir, crowdin_api_key): |
| + global crowdin_project_name, defaultlocale |
| + |
| + with FileSource(source_dir) as source: |
| + config = source.read_config() |
| + defaultlocale = config.get("general", "defaultlocale") |
| + crowdin_project_name = config.get("general", "crowdin-project-name") |
| + |
| + logger.info("Requesting project information...") |
| + project_info = crowdin_request("GET", "info") |
|
Wladimir Palant
2015/06/29 19:05:37
Nit: The two lines above and setting enabled_local
kzar
2015/07/02 12:33:11
I originally didn't request the project informatio
|
| + page_strings = extract_strings(source, defaultlocale) |
| + |
| + required_locales = {l for l in source.list_locales() if l != defaultlocale} |
| + enabled_locales = {l["code"] for l in project_info["languages"]} |
| + |
| + required_locales, skipped_locales = ensure_required_locales( |
|
Wladimir Palant
2015/06/29 19:05:37
Why do we need to know the skipped locales here? T
kzar
2015/07/02 12:33:11
We need to avoid deleting locale files for skipped
Wladimir Palant
2015/07/08 23:11:05
You can (and should) limit deleting to required_lo
kzar
2015/07/11 19:21:15
Done.
|
| + required_locales, enabled_locales, defaultlocale |
| + ) |
| + remote_files, remote_directories = list_remote_files(project_info) |
| + local_files, local_directories = list_local_files(page_strings) |
| + |
| + # Avoid deleting all remote content if there was a problem listing local files |
| + if not local_files: |
| + logger.error("No page strings found. (Wrong project directory?) Aborting!") |
|
Wladimir Palant
2015/06/29 19:05:37
Nit: The settings.ini file is there, so it cannot
kzar
2015/07/02 12:33:11
Done.
|
| + sys.exit(1) |
| + |
| + new_files = list(local_files.difference(remote_files)) |
| + new_directories = list(local_directories.difference(remote_directories)) |
|
Wladimir Palant
2015/06/29 19:05:37
Nit: I don't see the point converting sets to list
kzar
2015/07/02 12:33:11
(Cool I found this one myself and later noticed yo
|
| + create_directories(new_directories) |
| + upload_new_files(new_files, page_strings) |
| + upload_translations(source_dir, new_files, required_locales) |
| + |
| + existing_files = list(local_files.intersection(remote_files)) |
|
Wladimir Palant
2015/06/29 19:05:36
Nit: existing_files = local_files - new_files?
kzar
2015/07/02 12:33:10
Done.
|
| + update_existing_files(existing_files, page_strings) |
| + |
| + old_files = remote_files.difference(local_files) |
| + old_directories = remote_directories.difference(local_directories) |
| + remove_old_files(old_files) |
| + remove_old_directories(old_directories) |
| + |
| + download_translations(source_dir, skipped_locales) |
| + logger.info("Crowdin sync completed.") |
| + |
| +if __name__ == "__main__": |
| + if len(sys.argv) < 3: |
| + print >>sys.stderr, "Usage: python -m cms.bin.translate www_directory crowdin_project_api_key [logging_level]" |
| + sys.exit(1) |
| + |
| + logging.basicConfig() |
| + logger.setLevel(sys.argv[3] if len(sys.argv) > 3 else logging.INFO) |
| + |
| + source_dir, crowdin_api_key = sys.argv[1:3] |
| + crowdin_sync(source_dir, crowdin_api_key) |