cms/bin/translate.py - Issue 29317015: Issue 2625 - [cms] Crowdin synchronisation script

Side by Side Diff: cms/bin/translate.py

Issue 29317015: Issue 2625 - [cms] Crowdin synchronisation script (Closed)

Patch Set: Addressed more feedback from Sebastian Created July 15, 2015, 9:49 a.m.

Left:
Right:

Use n/p to move between diff chunks; N/P to move between comments.

Jump to:

View unified diff | Download patch

OLD	NEW
(Empty)
	1 # coding: utf-8

	2

	3 # This file is part of the Adblock Plus web scripts,

	4 # Copyright (C) 2006-2015 Eyeo GmbH

	5 #

	6 # Adblock Plus is free software: you can redistribute it and/or modify

	7 # it under the terms of the GNU General Public License version 3 as

	8 # published by the Free Software Foundation.

	9 #

	10 # Adblock Plus is distributed in the hope that it will be useful,

	11 # but WITHOUT ANY WARRANTY; without even the implied warranty of

	12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

	13 # GNU General Public License for more details.

	14 #

	15 # You should have received a copy of the GNU General Public License

	16 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.

	17

	18 import collections

	19 import io

	20 import itertools

	21 import json

	22 import logging

	23 import os

	24 import posixpath

	25 import sys

	26 import urllib

	27 import zipfile

	28

	29 import urllib3

	30

	31 import cms.utils

	32 from cms.sources import FileSource

	33

	34 logger = logging.getLogger("cms.bin.translate")

	35

	36 class CrowdinAPI:

	37 FILES_PER_REQUEST = 20

	38

	39 def __init__(self, api_key, project_name):

	40 self.api_key = api_key

	41 self.project_name = project_name

	42 self.connection = urllib3.connection_from_url("https://api.crowdin.com/")

	43

	44 def raw_request(self, request_method, api_endpoint, query_params, **kwargs):

	45 url = "/api/project/%s/%s?%s" % (

	46 urllib.quote(self.project_name),

	47 urllib.quote(api_endpoint),

	48 urllib.urlencode([("key", self.api_key)] + query_params)

	49 )

	50 try:

	51 response = self.connection.request(

	52 request_method, str(url), **kwargs
	Sebastian Noack 2015/07/15 10:31:03 Converting url to an str object seems to be unnece Converting url to an str object seems to be unnecessary. I don't see any obvious code path where it isn't one. And even then urllib3 doesn't seem to bother whether it gets an str or unicode object here. kzar 2015/07/15 11:09:03 For some reason the URL was ending up being unicod Show quoted text On 2015/07/15 10:31:03, Sebastian Noack wrote: > Converting url to an str object seems to be unnecessary. I don't see any obvious > code path where it isn't one. And even then urllib3 doesn't seem to bother > whether it gets an str or unicode object here. For some reason the URL was ending up being unicode, perhaps from the project name pulled out of the config? Anyway it turns out if the URL passed to httplib is unicode then the whole request becomes unicode. Then later when httplib tries to append the body of the request an encoding exception could be thrown if there are any non-ascii characters in the body (e.g. existing Russian translations). This was the simplest way I found to make things work consistently. Sebastian Noack 2015/07/15 11:27:11 So far for urllib3. This is certainly a silly beha Show quoted text On 2015/07/15 11:09:03, kzar wrote: > On 2015/07/15 10:31:03, Sebastian Noack wrote: > > Converting url to an str object seems to be unnecessary. I don't see any > obvious > > code path where it isn't one. And even then urllib3 doesn't seem to bother > > whether it gets an str or unicode object here. > > For some reason the URL was ending up being unicode, perhaps from the project > name pulled out of the config? Anyway it turns out if the URL passed to httplib > is unicode then the whole request becomes unicode. Then later when httplib tries > to append the body of the request an encoding exception could be thrown if there > are any non-ascii characters in the body (e.g. existing Russian translations). > This was the simplest way I found to make things work consistently. So far for urllib3. This is certainly a silly behavior, if not a footgun. But yeah, nothing else/better we could do about it.
	53 )

	54

	55 if response.status < 200 or response.status >= 300:

	56 raise urllib3.exceptions.HTTPError(response.status)

	57

	58 return response
	Sebastian Noack 2015/07/15 10:31:03 Nit: The return doesn't need to be in the try..cat Nit: The return doesn't need to be in the try..catch block kzar 2015/07/15 11:09:03 Done. Show quoted text On 2015/07/15 10:31:03, Sebastian Noack wrote: > Nit: The return doesn't need to be in the try..catch block Done.
	59 except urllib3.exceptions.HTTPError:

	60 logger.error("API call to %s failed:\n%s", url, response.data)
	Sebastian Noack 2015/07/15 10:31:03 If self.connect.request() fails, "response" wouldn If self.connect.request() fails, "response" wouldn't be deifned. kzar 2015/07/15 11:09:03 Whoops, Done. Show quoted text On 2015/07/15 10:31:03, Sebastian Noack wrote: > If self.connect.request() fails, "response" wouldn't be deifned. Whoops, Done.
	61 raise

	62

	63 def request(self, request_method, api_endpoint, data=None, files=None):

	64 fields = []

	65 if data:

	66 for name, value in data.iteritems():

	67 if isinstance(value, basestring):

	68 fields.append((name, value))

	69 else:

	70 fields.extend((name + "[]", v) for v in value)

	71 if files:

	72 fields.extend(("files[%s]" % f[0], f) for f in files)

	73

	74 response = self.raw_request(

	75 request_method, api_endpoint, [("json", "1")],

	76 fields=fields, preload_content=False

	77 )

	78

	79 try:

	80 return json.load(response)

	81 except ValueError:

	82 logger.error("Invalid response returned by API endpoint %s", url)

	83 raise

	84

	85

	86 def grouper(iterable, n):

	87 iterator = iter(iterable)

	88 while True:

	89 chunk = tuple(itertools.islice(iterator, n))

	90 if not chunk:

	91 break

	92 yield chunk

	93

	94 def extract_strings(source, defaultlocale):

	95 logger.info("Extracting page strings (please be patient)...")

	96 page_strings = {}

	97

	98 def record_string(page, locale, name, value, comment, fixed_strings):

	99 if locale != defaultlocale:

	100 return

	101

	102 try:

	103 store = page_strings[page]

	104 except KeyError:

	105 store = page_strings[page] = collections.OrderedDict()

	106

	107 store[name] = {"message": value}

	108

	109 if fixed_strings:

	110 comment = comment + "\n" if comment else ""

	111 comment += ", ".join("{%d}: %s" % i_s

	112 for i_s in enumerate(fixed_strings, 1))

	113 if comment:

	114 store[name]["description"] = comment

	115

	116 for page, format in source.list_pages():

	117 cms.utils.process_page(source, defaultlocale, page,

	118 format=format, localized_string_callback=record_strin g)

	119 return page_strings

	120

	121 def configure_locales(crowdin_api, required_locales, enabled_locales,

	122 defaultlocale):

	123 logger.info("Checking which locales are supported by Crowdin...")

	124 response = crowdin_api.request("GET", "supported-languages")

	125

	126 supported_locales = {l["crowdin_code"] for l in response}

	127 skipped_locales = required_locales - supported_locales

	128

	129 if skipped_locales:

	130 logger.warning("Ignoring locales that Crowdin doesn't support: %s",

	131 ", ".join(skipped_locales))

	132 required_locales -= skipped_locales

	133

	134 if not required_locales.issubset(enabled_locales):

	135 logger.info("Enabling the required locales for the Crowdin project...")

	136 crowdin_api.request(

	137 "POST", "edit-project",

	138 data={"languages": enabled_locales \| required_locales}

	139 )

	140

	141 return required_locales

	142

	143 def list_remote_files(project_info):

	144 def parse_file_node(node, path=""):

	145 if node["node_type"] == "file":

	146 remote_files.add(path + node["name"])

	147 elif node["node_type"] == "directory":

	148 dir_name = path + node["name"]

	149 remote_directories.add(dir_name)

	150 for file in node.get("files", []):

	151 parse_file_node(file, dir_name + "/")

	152

	153 remote_files = set()

	154 remote_directories = set()

	155 for node in project_info["files"]:

	156 parse_file_node(node)

	157 return remote_files, remote_directories

	158

	159 def list_local_files(page_strings):

	160 local_files = set()

	161 local_directories = set()

	162 for page, strings in page_strings.iteritems():

	163 if strings:

	164 local_files.add(page + ".json")

	165 while "/" in page:

	166 page = page.rsplit("/", 1)[0]

	167 local_directories.add(page)

	168 return local_files, local_directories

	169

	170 def create_directories(crowdin_api, directories):

	171 for directory in directories:

	172 logger.info("Creating directory %s", directory)

	173 crowdin_api.request("POST", "add-directory", data={"name": directory})

	174

	175 def add_update_files(crowdin_api, api_endpoint, message, files, page_strings):

	176 for group in grouper(files, crowdin_api.FILES_PER_REQUEST):

	177 files = []

	178 for file_name in group:

	179 page = os.path.splitext(file_name)[0]

	180 files.append((file_name, json.dumps(page_strings[page]), "application/json "))

	181 del page_strings[page]

	182 logger.info(message, len(files))

	183 crowdin_api.request("POST", api_endpoint, files=files)

	184

	185 def upload_new_files(crowdin_api, new_files, page_strings):

	186 add_update_files(crowdin_api, "add-file", "Uploading %d new pages...",

	187 new_files, page_strings)

	188

	189 def update_existing_files(crowdin_api, existing_files, page_strings):

	190 add_update_files(crowdin_api, "update-file", "Updating %d existing pages...",

	191 existing_files, page_strings)

	192

	193 def upload_translations(crowdin_api, source_dir, new_files, required_locales):

	194 def open_locale_files(locale, files):

	195 for file_name in files:

	196 path = os.path.join(source_dir, "locales", locale, file_name)

	197 if os.path.isfile(path):

	198 with open(path, "rb") as f:

	199 yield (file_name, f.read(), "application/json")

	200

	201 if new_files:

	202 for locale in required_locales:

	203 for files in grouper(open_locale_files(locale, new_files),

	204 crowdin_api.FILES_PER_REQUEST):

	205 logger.info("Uploading %d existing translation "

	206 "files for locale %s...", len(files), locale)

	207 crowdin_api.request("POST", "upload-translation", files=files,

	208 data={"language": locale})

	209

	210 def remove_old_files(crowdin_api, old_files):

	211 for file_name in old_files:

	212 logger.info("Removing old file %s", file_name)

	213 crowdin_api.request("POST", "delete-file", data={"file": file_name})

	214

	215 def remove_old_directories(crowdin_api, old_directories):

	216 for directory in reversed(sorted(old_directories, key=len)):

	217 logger.info("Removing old directory %s", directory)

	218 crowdin_api.request("POST", "delete-directory", data={"name": directory})

	219

	220 def download_translations(crowdin_api, source_dir, required_locales):

	221 logger.info("Requesting generation of fresh translations archive...")

	222 result = crowdin_api.request("GET", "export")

	223 if result.get("success", {}).get("status") == "skipped":

	224 logger.warning("Archive generation skipped, either "

	225 "no changes or API usage excessive")

	226

	227 logger.info("Downloading translations archive...")

	228 response = crowdin_api.raw_request("GET", "download/all.zip", [])

	229

	230 logger.info("Extracting translations archive...")

	231 with zipfile.ZipFile(io.BytesIO(response.data), "r") as archive:

	232 locale_path = os.path.join(source_dir, "locales")

	233 # First clear existing translation files

	234 for root, dirs, files in os.walk(locale_path, topdown=True):

	235 if root == locale_path:

	236 dirs[:] = [d for d in dirs if d in required_locales]

	237 for f in files:

	238 if f.lower().endswith(".json"):

	239 os.remove(os.path.join(root, f))

	240 # Then extract the new ones in place

	241 for member in archive.namelist():

	242 path, file_name = posixpath.split(member)

	243 ext = posixpath.splitext(file_name)[1]

	244 locale = path.split(posixpath.sep)[0]

	245 if ext.lower() == ".json" and locale in required_locales:

	246 archive.extract(member, locale_path)

	247

	248 def crowdin_sync(source_dir, crowdin_api_key):

	249 with FileSource(source_dir) as source:

	250 config = source.read_config()

	251 defaultlocale = config.get("general", "defaultlocale")

	252 crowdin_project_name = config.get("general", "crowdin-project-name")

	253

	254 crowdin_api = CrowdinAPI(crowdin_api_key, crowdin_project_name)

	255

	256 logger.info("Requesting project information...")

	257 project_info = crowdin_api.request("GET", "info")

	258 page_strings = extract_strings(source, defaultlocale)

	259

	260 required_locales = {l for l in source.list_locales() if l != defaultlocale}

	261 enabled_locales = {l["code"] for l in project_info["languages"]}

	262

	263 required_locales = configure_locales(crowdin_api, required_locales,

	264 enabled_locales, defaultlocale)

	265

	266 remote_files, remote_directories = list_remote_files(project_info)

	267 local_files, local_directories = list_local_files(page_strings)

	268

	269 # Avoid deleting all remote content if there was a problem listing local files

	270 if not local_files:

	271 logger.error("No existing strings found, maybe the project directory is "

	272 "not set up correctly? Aborting!")

	273 sys.exit(1)

	274

	275 new_files = local_files - remote_files

	276 new_directories = local_directories - remote_directories

	277 create_directories(crowdin_api, new_directories)

	278 upload_new_files(crowdin_api, new_files, page_strings)

	279 upload_translations(crowdin_api, source_dir, new_files, required_locales)

	280

	281 existing_files = local_files - new_files

	282 update_existing_files(crowdin_api, existing_files, page_strings)

	283

	284 old_files = remote_files - local_files

	285 old_directories = remote_directories - local_directories

	286 remove_old_files(crowdin_api, old_files)

	287 remove_old_directories(crowdin_api, old_directories)

	288

	289 download_translations(crowdin_api, source_dir, required_locales)

	290 logger.info("Crowdin sync completed.")

	291

	292 if __name__ == "__main__":

	293 if len(sys.argv) < 3:

	294 print >>sys.stderr, "Usage: python -m cms.bin.translate www_directory crowdi n_project_api_key [logging_level]"

	295 sys.exit(1)

	296

	297 logging.basicConfig()

	298 logger.setLevel(sys.argv[3] if len(sys.argv) > 3 else logging.INFO)

	299

	300 source_dir, crowdin_api_key = sys.argv[1:3]

	301 crowdin_sync(source_dir, crowdin_api_key)

OLD	NEW

« no previous file with comments | « README.md ('k') | cms/converters.py » ('j') | no next file with comments »