| Index: cms/translations/xtm/utils.py |
| diff --git a/cms/translations/xtm/utils.py b/cms/translations/xtm/utils.py |
| new file mode 100644 |
| index 0000000000000000000000000000000000000000..2cb220cdcead84591c2582f53798f0308ff80014 |
| --- /dev/null |
| +++ b/cms/translations/xtm/utils.py |
| @@ -0,0 +1,443 @@ |
| +# This file is part of the Adblock Plus web scripts, |
| +# Copyright (C) 2006-present eyeo GmbH |
| +# |
| +# Adblock Plus is free software: you can redistribute it and/or modify |
| +# it under the terms of the GNU General Public License version 3 as |
| +# published by the Free Software Foundation. |
| +# |
| +# Adblock Plus is distributed in the hope that it will be useful, |
| +# but WITHOUT ANY WARRANTY; without even the implied warranty of |
| +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| +# GNU General Public License for more details. |
| +# |
| +# You should have received a copy of the GNU General Public License |
| +# along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. |
| + |
| +import collections |
| +import logging |
| +import os |
| +import time |
| +import json |
| + |
| +from cms.utils import process_page |
| +import cms.translations.xtm.constants as const |
| +from cms.translations.xtm.xtm_api import XTMCloudException |
| + |
| + |
| +__all__ = [ |
| + 'extract_strings', 'resolve_locales', 'map_locales', 'local_to_remote', |
| + 'remote_to_local', 'run_and_wait', 'sanitize_project_name', 'read_token', |
| + 'get_files_to_upload', 'log_resulting_jobs', 'clear_files', 'input_fn', |
| +] |
| + |
| + |
| +def log_resulting_jobs(jobs): |
| + """Log the jobs created as a result of uploading files/ creating projects. |
| + |
| + Parameters |
| + ---------- |
| + jobs: iterable |
| + Of dicts, as returned by XTM. |
| + |
| + """ |
| + if len(jobs) == 0: |
| + logging.info(const.InfoMessages.NO_JOBS_CREATED) |
| + return |
| + |
| + for job in jobs: |
| + logging.info( |
| + const.InfoMessages.CREATED_JOB.format( |
| + job['jobId'], job['fileName'], job['targetLanguage'], |
| + ), |
| + ) |
| + |
| + |
| +def get_files_to_upload(source): |
| + """Return the files to upload in the format supported by XTMCloudAPI. |
| + |
| + It performs the following tasks: |
| + 1. Extracts the translation strings from the website. |
| + 2. Cleanup the extracted strings (i.e. ignore all empty ones). |
| + 3. Convert local file names to remote ones. |
| + 4. Construct a dictionary with the format required by XTMCloudAPI |
| + |
| + Parameters |
| + ---------- |
| + source: cms.sources.Source |
| + |
| + Returns |
| + ------- |
| + dict |
| + With the following format: |
| + <remote_filename>: <file_data> |
| + |
| + """ |
| + # 1. Extracting strings. |
| + raw_strings = extract_strings(source) |
| + page_strings = {} |
| + |
| + # 2. Cleaning up. |
| + for page, string in raw_strings.iteritems(): |
| + if string: |
| + page_strings[page] = string |
| + |
| + # 3. Converting local file names. |
| + remote_names = local_to_remote(page_strings) |
| + |
| + # 4. Constructing final data structure |
| + files_to_upload = {} |
| + for file in remote_names: |
| + files_to_upload[remote_names[file]] = json.dumps(page_strings[file]) |
| + |
| + return files_to_upload |
| + |
| + |
| +def read_token(): |
| + """Read token from pre-defined environment variable.""" |
| + token = os.environ.get(const.Token.ENV_VAR) |
| + if token: |
| + return token |
| + |
| + raise Exception(const.ErrorMessages.NO_TOKEN_PROVIDED.format( |
| + const.Token.CREATION_CMD, |
| + )) |
| + |
| + |
| +def sanitize_project_name(name): |
| + """Handle project name conflicts. |
| + |
| + Parameters |
| + ---------- |
| + name: str |
| + The name of the project. |
| + |
| + Returns |
| + ------- |
| + str |
| + The new name of the project, with the length cut down to |
| + const.ProjectName.MAX_LENGTH and invalid characters replaced with |
| + const.ProjectName.NAME_WILDCARD. |
| + |
| + """ |
| + valid_name = ''.join( |
| + [const.ProjectName.NAME_WILDCARD |
| + if c in const.ProjectName.INVALID_CHARS else c for c in name], |
| + ) |
| + |
| + return valid_name[:const.ProjectName.MAX_LENGTH] |
| + |
| + |
| +def extract_strings(source): |
| + """Extract strings from a website. |
| + |
| + Parameters |
| + ---------- |
| + source: cms.sources.Source |
| + The source representing the website. |
| + |
| + Returns |
| + ------- |
| + dict |
| + With the extracted strings. |
| + |
| + """ |
| + logging.info(const.InfoMessages.EXTRACTING_STRINGS) |
| + page_strings = collections.defaultdict(collections.OrderedDict) |
| + |
| + defaultlocale = source.read_config().get( |
| + const.Config.MAIN_SECTION, const.Config.DEFAULT_LOCALE_OPTION, |
| + ) |
| + |
| + def record_string(page, locale, name, value, comment, fixed_strings): |
| + if locale != defaultlocale: |
| + return |
| + |
| + store = page_strings[page] |
| + store[name] = {'message': value} |
| + |
| + if fixed_strings: |
| + comment = comment + '\n' if comment else '' |
| + comment += ', '.join('{{{0}}}: {1}'.format(*i_s) |
| + for i_s in enumerate(fixed_strings, 1)) |
| + if comment: |
| + store[name]['description'] = comment |
| + |
| + for page, page_format in source.list_pages(): |
| + process_page(source, defaultlocale, page, format=page_format, |
| + localized_string_callback=record_string) |
| + |
| + return page_strings |
| + |
| + |
| +def resolve_locales(api, source): |
| + """Sync a website's locales with the target languages of the API. |
| + |
| + Parameters |
| + ---------- |
| + api: cms.bin.xtm_translations.xtm_api.XTMCloudAPI |
| + Handler used to make requests to the API. |
| + source: cms.sources.Source |
| + Source representing a website. |
| + |
| + """ |
| + logging.info(const.InfoMessages.RESOLVING_LOCALES) |
| + local_locales = map_locales(source) |
| + project_id = source.read_config().get( |
| + const.Config.XTM_SECTION, const.Config.PROJECT_OPTION, |
| + ) |
| + |
| + languages = run_and_wait( |
| + api.get_target_languages, |
| + XTMCloudException, |
| + const.UNDER_ANALYSIS_MESSAGE, |
| + const.InfoMessages.WAITING_FOR_PROJECT, |
| + project_id=project_id, |
| + ) |
| + |
| + enabled_locales = {l.encode('utf-8') for l in languages} |
| + |
| + if len(enabled_locales - local_locales) != 0: |
| + raise Exception(const.ErrorMessages.LOCALES_NOT_PRESENT.format( |
| + enabled_locales - local_locales, project_id, |
| + )) |
| + |
| + if not local_locales == enabled_locales: |
| + # Add languages to the project |
| + langs_to_add = list(local_locales - enabled_locales) |
| + logging.info(const.InfoMessages.ADDING_LANGUAGES.format( |
| + project_id, langs_to_add, |
| + )) |
| + run_and_wait( |
| + api.add_target_languages, |
| + XTMCloudException, |
| + const.UNDER_ANALYSIS_MESSAGE, |
| + const.InfoMessages.WAITING_FOR_PROJECT, |
| + project_id=project_id, |
| + target_languages=langs_to_add, |
| + ) |
| + |
| + |
| +def map_locales(source): |
| + """Map website locale to target languages supported by XTM. |
| + |
| + Parameters |
| + ---------- |
| + source: cms.sources.Source |
| + Source representing a website. |
| + |
| + Returns |
| + ------- |
| + set |
| + Of the resulting mapped locales. |
| + |
| + """ |
| + config = source.read_config() |
| + defaultlocale = config.get(const.Config.MAIN_SECTION, |
| + const.Config.DEFAULT_LOCALE_OPTION) |
| + locales = source.list_locales() - {defaultlocale} |
| + |
| + mapped_locales = set() |
| + |
| + for locale in locales: |
| + if locale in const.SUPPORTED_LOCALES: |
| + mapped_locales.add(locale) |
| + else: |
| + xtm_locale = '{0}_{1}'.format(locale, locale.upper()) |
| + if xtm_locale in const.SUPPORTED_LOCALES: |
| + mapped_locales.add(xtm_locale) |
| + else: |
| + logging.warning( |
| + const.WarningMessages.LOCALE_NOT_SUPPORTED.format(locale), |
| + ) |
| + |
| + return mapped_locales |
| + |
| + |
| +def local_to_remote(local_names): |
| + """Convert local file names to valid remote ones. |
| + |
| + Parameters |
| + ---------- |
| + local_names: iterable |
| + The local file names (without any extension). |
| + |
| + Returns |
| + ------- |
| + dict |
| + With the local files. Each element in the set is a tuple with the |
| + format: |
| + <local_filename>: <remote_filename> |
| + |
| + """ |
| + files = {} |
| + |
| + for page in local_names: |
| + remote_name = '{}.json'.format( |
| + page.replace(os.path.sep, const.FileNames.PATH_SEP_REP), |
| + ) |
| + if len(remote_name) > const.FileNames.MAX_LENGTH: |
| + raise Exception( |
| + const.ErrorMessages.FILENAME_TOO_LONG.format( |
| + '{}.json'.format(page), const.FileNames.MAX_LENGTH, |
| + )) |
| + files[page] = remote_name |
| + |
| + return files |
| + |
| + |
| +def remote_to_local(filename, source_dir, locales): |
| + """Parse a remote filename and construct a local filesystem name. |
| + |
| + Parameters |
| + ---------- |
| + filename: str |
| + The remote filename. |
| + source_dir: str |
| + The path to the source directory of the file. |
| + locales: iterator |
| + Of local language directories. |
| + |
| + Returns |
| + ------- |
| + str |
| + The full path of the file. |
| + |
| + """ |
| + path_elements = filename.split(const.FileNames.PATH_SEP_REP) |
| + if path_elements[0] == '': |
| + path_elements = path_elements[1:] |
| + if path_elements[0] not in locales: |
| + candidate_locale = path_elements[0].split('_')[0] |
| + if candidate_locale not in locales: |
| + raise Exception( |
| + const.ErrorMessages.CANT_RESOLVE_REMOTE_LANG.format( |
| + path_elements[0], |
| + ), |
| + ) |
| + |
| + path_elements[0] = ''.join([ |
| + candidate_locale, |
| + path_elements[0].split('_')[1][len(candidate_locale):], |
| + ]) |
| + |
| + return os.path.join(source_dir, *path_elements) |
| + |
| + |
| +def run_and_wait(func, exc, err_msg, user_msg=None, retry_delay=1, |
| + retries=10, **kw): |
| + """Run a function and, if a specific exception occurs, try again. |
| + |
| + Tries to run the function, and if an exception with a specific message |
| + is raised, it sleeps and tries again. |
| + Parameters |
| + ---------- |
| + func: function |
| + The function to be run. |
| + retry_delay: int |
| + The amount of time to wait on this specific run (in seconds). |
| + exc: Exception |
| + The exception we expect to be raised. |
| + err_msg: str |
| + The message we expect to be in the exception. |
| + user_msg: str |
| + Message to be displayed to the user if we waited for more than 3 steps. |
| + retries: int |
| + The number of retries left until actually raising the exception. |
| + kw: dict |
| + The keyword arguments for the function. |
| + |
| + Returns |
| + ------- |
| + The return result of the function. |
| + |
| + """ |
| + if retries == 7 and user_msg: |
| + logging.info(user_msg) |
| + try: |
| + result = func(**kw) |
| + return result |
| + except exc as err: |
| + if retries == 0: |
| + raise |
| + if err_msg in str(err): |
| + time.sleep(retry_delay) |
| + return run_and_wait( |
| + func, exc, err_msg, user_msg, |
| + min(retry_delay * 2, const.MAX_WAIT_TIME), retries - 1, **kw |
| + ) |
| + raise |
| + |
| + |
| +def clear_files(dir_path, required_locales, extension='.json'): |
| + """Delete translation files with a specific extension from dir_path. |
| + |
| + Parameters |
| + ---------- |
| + dir_path: str |
| + Path to the root of the subtree we want to delete the files from. |
| + required_locales: iterable |
| + Only directories form required_locales will be included |
| + extension: str |
| + The extension of the files to delete. |
| + |
| + """ |
| + for root, dirs, files in os.walk(dir_path, topdown=True): |
| + if root == dir_path: |
| + dirs[:] = [d for d in dirs if d in required_locales] |
| + for f in files: |
| + if f.lower().endswith(extension.lower()): |
| + os.remove(os.path.join(root, f)) |
| + |
| + |
| +def get_locales(path, default): |
| + """List the locales available in a website. |
| + |
| + It will exclude the default language from the list. |
| + |
| + Parameters |
| + ---------- |
| + path: str |
| + The path to the locales directory. |
| + default: str |
| + The default language for the website. |
| + |
| + Returns |
| + ------- |
| + iterable |
| + Of the available locales. |
| + |
| + """ |
| + full_contents = os.listdir(path) |
| + |
| + return [ |
| + d for d in full_contents |
| + if os.path.isdir(os.path.join(path, d)) and d != default |
| + ] |
| + |
| + |
| +def write_to_file(data, file_path): |
| + """Write data to a given file path. |
| + |
| + If the directory path does not exist, then it will be created by default. |
| + |
| + Parameters |
| + ---------- |
| + data: bytes |
| + The data to be written to the file. |
| + file_path: str |
| + The path of the file we want to write. |
| + |
| + """ |
| + dirs_path = os.path.join(*os.path.split(file_path)[:-1]) |
| + if not os.path.isdir(dirs_path): |
| + os.makedirs(dirs_path) |
| + |
| + with open(file_path, 'wb') as f: |
| + f.write(data) |
| + |
| + |
| +def input_fn(text): |
| + try: |
| + return raw_input(text) |
| + except Exception: |
| + return input(text) |