Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Unified Diff: cms/bin/translate.py

Issue 29317015: Issue 2625 - [cms] Crowdin synchronisation script (Closed)
Patch Set: Slightly simplified request exception logic Created July 12, 2015, 5:47 a.m.
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « README.md ('k') | cms/converters.py » ('j') | cms/converters.py » ('J')
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: cms/bin/translate.py
diff --git a/cms/bin/translate.py b/cms/bin/translate.py
new file mode 100644
index 0000000000000000000000000000000000000000..810cb27b8d4ba1badb131a6b2ec798436323b3c5
--- /dev/null
+++ b/cms/bin/translate.py
@@ -0,0 +1,299 @@
+# coding: utf-8
+
+# This file is part of the Adblock Plus web scripts,
+# Copyright (C) 2006-2015 Eyeo GmbH
+#
+# Adblock Plus is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 3 as
+# published by the Free Software Foundation.
+#
+# Adblock Plus is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.
+
+import collections
+import io
+import itertools
+import json
+import logging
+import os
+import posixpath
+import sys
+import zipfile
+
+import urllib3
+
+import cms.utils
+from cms.sources import FileSource
+
+logger = logging.getLogger("cms.bin.translate")
+
+class CrowdinAPI:
+ FILES_PER_REQUEST = 20
+
+ def __init__(self, api_key, project_name):
+ self.api_key = api_key
+ self.project_name = project_name
+ self.connection = urllib3.connection_from_url("https://api.crowdin.com/")
+
+ def request(self, request_method, api_endpoint, data=None, files=None):
+ url = "/api/project/%s/%s?key=%s&json=1" % (
+ self.project_name, api_endpoint, self.api_key
Sebastian Noack 2015/07/14 11:31:05 Please encode the parameters properly: url = "/ap
kzar 2015/07/14 12:54:27 Done.
+ )
+
+ fields = []
+ if data:
+ for name, value in data.iteritems():
+ if isinstance(value, basestring):
+ fields.append((name, value))
+ else:
+ fields += [(name + "[]", v) for v in value]
Sebastian Noack 2015/07/14 11:31:05 Nit: fields.extend((name + "[]", v) for v in value
kzar 2015/07/14 12:54:27 Done.
+ if files:
+ fields += [("files[%s]" % f[0], f) for f in files]
Sebastian Noack 2015/07/14 11:31:04 Note that |'%s' % f| returns it object representat
Sebastian Noack 2015/07/14 11:31:05 Nit: Please use .extend() here as well.
kzar 2015/07/14 12:54:27 So f[0] is actually the file name and we need to p
kzar 2015/07/14 12:54:30 Done.
+
+ try:
+ response = self.connection.request(
+ request_method, str(url), fields=fields,
+ timeout=urllib3.Timeout(connect=5)
Sebastian Noack 2015/07/14 11:31:07 Any particular reason you specify a custom connect
kzar 2015/07/14 12:54:30 During testing I found that by default it didn't s
Sebastian Noack 2015/07/14 14:39:27 But differently than urllib/urllib2 which we use e
kzar 2015/07/15 09:51:24 Done.
+ )
+ if response.status < 200 or response.status >= 300:
Sebastian Noack 2015/07/14 11:31:07 How about |response.status not in xrange(200, 299)
kzar 2015/07/14 12:54:28 I think I prefer it as is.
+ raise urllib3.exceptions.HTTPError(response.status)
+ except urllib3.exceptions.HTTPError as e:
Sebastian Noack 2015/07/14 11:31:06 Nit: Since we don't use the variable e you can omi
kzar 2015/07/14 12:54:28 Done.
+ logger.error("API call to %s failed:\n%s" % (url, response.data))
Sebastian Noack 2015/07/14 11:31:05 You can pass the values for the placeholders direc
kzar 2015/07/14 12:54:30 Done.
+ raise
+
+ try:
+ return json.loads(response.data)
Sebastian Noack 2015/07/14 11:31:04 How about |json.load(response)|?
Sebastian Noack 2015/07/14 11:31:06 Note that like urllib/urllib2, urllib3's repsonse
kzar 2015/07/14 12:54:28 This doesn't work as you would expect, even though
Sebastian Noack 2015/07/14 14:39:27 I just tested it myself. And it did work. You have
Sebastian Noack 2015/07/14 14:39:28 For reference, I just realized that urllib3, magic
kzar 2015/07/15 09:51:23 Acknowledged.
kzar 2015/07/15 09:51:24 You're right, I forgot to set preload_content when
+ except ValueError:
+ logger.error("Invalid response returned by API endpoint %s" % url)
Sebastian Noack 2015/07/14 11:31:05 Same here: logger.error("Invalid response returned
kzar 2015/07/14 12:54:30 Done.
+ raise
+
+
+def grouper(iterable, n):
+ iterator = iter(iterable)
+ while True:
+ chunk = tuple(itertools.islice(iterator, n))
+ if not chunk:
+ break
+ yield chunk
+
+def extract_strings(source, defaultlocale):
+ logger.info("Extracting page strings (please be patient)...")
+ page_strings = {}
+
+ def record_string(page, locale, name, value, comment, fixed_strings):
+ if locale != defaultlocale:
+ return
+
+ try:
+ store = page_strings[page]
+ except KeyError:
+ store = page_strings[page] = collections.OrderedDict()
+
+ store[name] = {"message": value}
+
+ if fixed_strings:
+ comment = comment + "\n" if comment else ""
+ comment += ", ".join("{%d}: %s" % i_s
+ for i_s in enumerate(fixed_strings, 1))
+ if comment:
+ store[name]["description"] = comment
+
+ for page, format in source.list_pages():
+ cms.utils.process_page(source, defaultlocale, page,
+ format=format, localized_string_callback=record_string)
+ return page_strings
+
+def configure_locales(crowdin_api, required_locales, enabled_locales,
+ defaultlocale):
+ logger.info("Checking which locales are supported by Crowdin...")
+ response = crowdin_api.request("GET", "supported-languages")
+
+ supported_locales = {l["crowdin_code"] for l in response}
+ skipped_locales = required_locales - supported_locales
+
+ if skipped_locales:
+ logger.warning("Ignoring locales that Crowdin doesn't support: %s" % (
+ ", ".join(skipped_locales)
+ ))
+ required_locales -= skipped_locales
+
+ if not required_locales.issubset(enabled_locales):
+ logger.info("Enabling the required locales for the Crowdin project...")
+ crowdin_api.request(
+ "POST", "edit-project",
+ data={"languages": list(enabled_locales | required_locales)}
Sebastian Noack 2015/07/14 11:31:05 Since .request() merely iterates over the value (i
kzar 2015/07/14 12:54:30 Done.
+ )
+
+ return required_locales
+
+def list_remote_files(project_info):
+ def parse_file_node(node, path=""):
+ if node["node_type"] == "file":
+ remote_files.add(path + node["name"])
+ elif node["node_type"] == "directory":
+ dir_name = path + node["name"]
+ remote_directories.add(dir_name)
+ for file in node.get("files", []):
+ parse_file_node(file, dir_name + "/")
+
+ remote_files = set()
+ remote_directories = set()
+ for node in project_info["files"]:
+ parse_file_node(node)
+ return remote_files, remote_directories
+
+def list_local_files(page_strings):
+ local_files = set()
+ local_directories = set()
+ for page, strings in page_strings.iteritems():
+ if strings:
+ local_files.add(page + ".json")
+ while "/" in page:
+ page = page.rsplit("/", 1)[0]
+ local_directories.add(page)
+ return local_files, local_directories
+
+def create_directories(crowdin_api, directories):
+ for directory in directories:
+ logger.info("Creating directory %s" % directory)
+ crowdin_api.request("POST", "add-directory", data={"name": directory})
+
+def add_update_files(crowdin_api, api_endpoint, message, files, page_strings):
+ for group in grouper(files, crowdin_api.FILES_PER_REQUEST):
+ files = []
+ for file_name in group:
+ page = os.path.splitext(file_name)[0]
+ files.append((file_name, json.dumps(page_strings[page]), "application/json"))
+ del page_strings[page]
+ logger.info(message % len(files))
+ crowdin_api.request("POST", api_endpoint, files=files)
+
+def upload_new_files(crowdin_api, new_files, page_strings):
+ add_update_files(crowdin_api, "add-file", "Uploading %d new pages...",
+ new_files, page_strings)
+
+def update_existing_files(crowdin_api, existing_files, page_strings):
+ add_update_files(crowdin_api, "update-file", "Updating %d existing pages...",
+ existing_files, page_strings)
+
+def upload_translations(crowdin_api, source_dir, new_files, required_locales):
+ def open_locale_files(locale, files):
+ for file_name in files:
+ path = os.path.join(source_dir, "locales", locale, file_name)
+ if os.path.isfile(path):
+ with open(path, "r") as f:
+ yield (file_name, f.read(), "application/json")
+
+ if new_files:
+ for locale in required_locales:
+ for files in grouper(open_locale_files(locale, new_files),
+ crowdin_api.FILES_PER_REQUEST):
+ logger.info("Uploading %d existing translation "
+ "files for locale %s..." % (len(files), locale))
+ crowdin_api.request("POST", "upload-translation", files=files,
+ data={"language": locale})
+
+def remove_old_files(crowdin_api, old_files):
+ for file_name in old_files:
+ logger.info("Removing old file %s" % file_name)
+ crowdin_api.request("POST", "delete-file", data={"file": file_name})
+
+def remove_old_directories(crowdin_api, old_directories):
+ for directory in reversed(sorted(old_directories, key=len)):
+ logger.info("Removing old directory %s" % directory)
+ crowdin_api.request("POST", "delete-directory", data={"name": directory})
+
+def download_translations(crowdin_api, source_dir, required_locales):
+ logger.info("Requesting generation of fresh translations archive...")
+ result = crowdin_api.request("GET", "export")
+ if result.get("success", {}).get("status") == "skipped":
+ logger.warning("Archive generation skipped, either "
+ "no changes or API usage excessive")
+
+ logger.info("Downloading translations archive...")
+ response = crowdin_api.connection.request(
+ "GET",
+ "/api/project/%s/download/all.zip?key=%s" % (
Sebastian Noack 2015/07/14 11:31:07 As above, please use urllib.quote and urllib.urlen
kzar 2015/07/14 12:54:27 Done.
+ crowdin_api.project_name, crowdin_api.api_key
+ ), preload_content = False
+ )
+ if response.status < 200 or response.status >= 300:
Sebastian Noack 2015/07/14 11:31:05 How about |response.status not in xrange(200, 299)
kzar 2015/07/14 12:54:28 See above, I prefer it as is.
+ raise urllib3.exceptions.HTTPError(response.status, response.data)
+
+ logger.info("Extracting translations archive...")
+ with zipfile.ZipFile(io.BytesIO(response.data), "r") as archive:
Sebastian Noack 2015/07/14 11:31:04 The response is a file-like object by itself. So i
kzar 2015/07/14 12:54:28 I agree this _should_ work but in practice it just
Sebastian Noack 2015/07/14 14:39:28 Ah right, ZipFile() requires a file-like object th
kzar 2015/07/15 09:51:24 Glad one of us understands :p
+ locale_path = os.path.join(source_dir, "locales")
+ # First clear existing translation files
+ for root, dirs, files in os.walk(locale_path, topdown=True):
+ if root == locale_path:
+ dirs[:] = [d for d in dirs if d in required_locales]
+ for f in files:
+ if f.endswith(".json"):
+ os.remove(os.path.join(root, f))
+ # Then extract the new ones in place
+ for member in archive.namelist():
+ path, file_name = posixpath.split(member)
+ ext = posixpath.splitext(file_name)[1]
+ locale = path.split(posixpath.sep)[0]
+ if ext == ".json" and locale in required_locales:
+ archive.extract(member, locale_path)
+
+def crowdin_sync(source_dir, crowdin_api_key):
+ with FileSource(source_dir) as source:
+ config = source.read_config()
+ defaultlocale = config.get("general", "defaultlocale")
+ crowdin_project_name = config.get("general", "crowdin-project-name")
+
+ crowdin_api = CrowdinAPI(crowdin_api_key, crowdin_project_name)
+
+ logger.info("Requesting project information...")
+ project_info = crowdin_api.request("GET", "info")
+ page_strings = extract_strings(source, defaultlocale)
+
+ required_locales = {l for l in source.list_locales() if l != defaultlocale}
+ enabled_locales = {l["code"] for l in project_info["languages"]}
+
+ required_locales = configure_locales(crowdin_api, required_locales,
+ enabled_locales, defaultlocale)
+
+ remote_files, remote_directories = list_remote_files(project_info)
+ local_files, local_directories = list_local_files(page_strings)
+
+ # Avoid deleting all remote content if there was a problem listing local files
+ if not local_files:
+ logger.error("No existing strings found, maybe the project directory is "
+ "not set up correctly? Aborting!")
+ sys.exit(1)
+
+ new_files = local_files - remote_files
+ new_directories = local_directories - remote_directories
+ create_directories(crowdin_api, new_directories)
+ upload_new_files(crowdin_api, new_files, page_strings)
+ upload_translations(crowdin_api, source_dir, new_files, required_locales)
+
+ existing_files = local_files - new_files
+ update_existing_files(crowdin_api, existing_files, page_strings)
+
+ old_files = remote_files - local_files
+ old_directories = remote_directories - local_directories
+ remove_old_files(crowdin_api, old_files)
+ remove_old_directories(crowdin_api, old_directories)
+
+ download_translations(crowdin_api, source_dir, required_locales)
+ logger.info("Crowdin sync completed.")
+
+if __name__ == "__main__":
+ if len(sys.argv) < 3:
+ print >>sys.stderr, "Usage: python -m cms.bin.translate www_directory crowdin_project_api_key [logging_level]"
+ sys.exit(1)
+
+ logging.basicConfig()
+ logger.setLevel(sys.argv[3] if len(sys.argv) > 3 else logging.INFO)
+
+ source_dir, crowdin_api_key = sys.argv[1:3]
+ crowdin_sync(source_dir, crowdin_api_key)
« no previous file with comments | « README.md ('k') | cms/converters.py » ('j') | cms/converters.py » ('J')

Powered by Google App Engine
This is Rietveld