Index: cms/bin/xtm_translations/utils.py |
diff --git a/cms/bin/xtm_translations/utils.py b/cms/bin/xtm_translations/utils.py |
new file mode 100644 |
index 0000000000000000000000000000000000000000..35a407d63cac3dd4da59b0eac8f4fee1cf478152 |
--- /dev/null |
+++ b/cms/bin/xtm_translations/utils.py |
@@ -0,0 +1,439 @@ |
+# This file is part of the Adblock Plus web scripts, |
+# Copyright (C) 2006-present eyeo GmbH |
+# |
+# Adblock Plus is free software: you can redistribute it and/or modify |
+# it under the terms of the GNU General Public License version 3 as |
+# published by the Free Software Foundation. |
+# |
+# Adblock Plus is distributed in the hope that it will be useful, |
+# but WITHOUT ANY WARRANTY; without even the implied warranty of |
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
+# GNU General Public License for more details. |
+# |
+# You should have received a copy of the GNU General Public License |
+# along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. |
+import collections |
+import logging |
+import os |
+import time |
+import json |
+ |
+from cms.utils import process_page |
+import cms.bin.xtm_translations.constants as const |
+from cms.bin.xtm_translations.xtm_api import XTMCloudException |
+ |
+ |
+__all__ = [ |
Vasily Kuznetsov
2018/09/26 10:43:16
I have an impression that some of these functions
Tudor Avram
2018/10/04 06:48:05
The idea of this module was to have all the small
|
+ 'extract_strings', 'resolve_locales', 'map_locales', 'get_local_files', |
+ 'resolve_remote_filename', 'run_and_wait', 'resolve_naming_conflicts', |
+ 'read_token', 'get_files_to_upload', 'log_resulting_jobs', 'clear_files', |
+ 'input_fn', |
+] |
+ |
+ |
+def log_resulting_jobs(jobs): |
+ """Log the jobs created as a result of uploading files/ creating projects. |
+ |
+ Parameters |
+ ---------- |
+ jobs: iterable |
+ Of dicts, as returned by XTM. |
+ |
+ """ |
+ if len(jobs) == 0: |
+ logging.info(const.InfoMessages.NO_JOBS_CREATED) |
+ return |
+ |
+ for job in jobs: |
+ logging.info( |
+ const.InfoMessages.CREATED_JOB.format( |
+ job['jobId'], job['fileName'], job['targetLanguage'], |
+ ), |
+ ) |
+ |
+ |
+def get_files_to_upload(files, page_strings): |
+ """Return the files to upload in the format supported by XTMCloudAPI. |
+ |
+ Parameters |
+ ---------- |
+ files: set |
+ Of tuples, with (<local_name>, <remote_name>) format. |
+ page_strings: dict |
+ Containing the files data. |
+ |
+ Returns |
+ ------- |
+ generator |
+ Of (<file_name>, <file_data>) tuples. |
+ |
+ """ |
+ files_to_upload = [] |
+ for file in files: |
+ page = os.path.splitext(file[0])[0] |
+ files_to_upload.append((file[1], json.dumps(page_strings[page]))) |
+ |
+ return files_to_upload |
+ |
+ |
+def read_token(): |
+ """Read token from pre-defined environment variable.""" |
+ token = os.environ.get(const.Token.ENV_VAR) |
+ if token: |
+ return token |
+ |
+ raise Exception(const.ErrorMessages.NO_TOKEN_PROVIDED.format( |
+ const.Token.CREATION_CMD, |
+ )) |
+ |
+ # !!! QUESTION !!!: Do we want to add the option for the user to enter |
+ # the token from the console, pretty much the same way as the password, |
+ # when logging in? |
+ |
+ |
+def resolve_naming_conflicts(name): |
Vasily Kuznetsov
2018/09/26 10:43:16
It seems like this doesn't really handle name conf
Tudor Avram
2018/10/04 06:48:04
Done.
|
+ """Handle project name conflicts. |
+ |
+ Parameters |
+ ---------- |
+ name: str |
+ The name of the project. |
+ |
+ Returns |
+ ------- |
+ str |
+ The new name of the project, with the length cut down to |
+ const.ProjectName.MAX_LENGTH and invalid characters replaced with |
+ const.ProjectName.NAME_WILDCARD. |
+ |
+ """ |
+ # !!!QUESTION!!!: https://gitlab.com/eyeo/websites/cms/issues/4 defined |
+ # the naming guidelines when we were going with one project per |
+ # issue approach. How do they change now that we have a project per |
+ # website, instead of GitLab issue? |
+ valid_name = ''.join( |
+ [const.ProjectName.NAME_WILDCARD |
+ if c in const.ProjectName.INVALID_CHARS else c for c in name], |
+ ) |
+ if len(valid_name) > const.ProjectName.MAX_LENGTH: |
+ return valid_name[:const.ProjectName.MAX_LENGTH] |
Vasily Kuznetsov
2018/09/26 10:43:16
I would just always do this and remove the if. It
Tudor Avram
2018/10/04 06:48:04
Done.
|
+ return valid_name |
+ |
+ |
+def extract_strings(source): |
+ """Extract strings from a website. |
+ |
+ Parameters |
+ ---------- |
+ source: cms.sources.Source |
+ The source representing the website. |
+ |
+ Returns |
+ ------- |
+ dict |
+ With the extracted strings. |
+ |
+ """ |
+ logging.info(const.InfoMessages.EXTRACTING_STRINGS) |
+ page_strings = collections.defaultdict(collections.OrderedDict) |
+ |
+ defaultlocale = source.read_config().get( |
+ const.Config.MAIN_SECTION, const.Config.DEFAULT_LOCALE_OPTION, |
+ ) |
+ |
+ def record_string(page, locale, name, value, comment, fixed_strings): |
+ if locale != defaultlocale: |
+ return |
+ |
+ store = page_strings[page] |
+ store[name] = {'message': value} |
+ |
+ if fixed_strings: |
+ comment = comment + '\n' if comment else '' |
+ comment += ', '.join('{{{0}}}: {1}'.format(*i_s) |
+ for i_s in enumerate(fixed_strings, 1)) |
+ if comment: |
+ store[name]['description'] = comment |
+ |
+ for page, page_format in source.list_pages(): |
+ process_page(source, defaultlocale, page, format=page_format, |
+ localized_string_callback=record_string) |
+ return page_strings |
+ |
+ |
+def resolve_locales(api, source): |
+ """Sync a website's locales with the target languages of the API. |
+ |
+ Parameters |
+ ---------- |
+ api: cms.bin.xtm_translations.xtm_api.XTMCloudAPI |
+ Handler used to make requests to the API. |
+ source: cms.sources.Source |
+ Source representing a website. |
+ |
+ """ |
+ logging.info(const.InfoMessages.RESOLVING_LOCALES) |
+ local_locales = map_locales(source) |
+ project_id = source.read_config().get( |
+ const.Config.XTM_SECTION, const.Config.PROJECT_OPTION, |
+ ) |
+ |
+ languages = run_and_wait( |
+ api.get_target_languages, |
+ XTMCloudException, |
+ const.UNDER_ANALYSIS_MESSAGE, |
+ const.InfoMessages.WAITING_FOR_PROJECT, |
+ project_id=project_id, |
+ ) |
+ |
+ enabled_locales = {l.encode('utf-8') for l in languages} |
+ |
+ if len(enabled_locales - local_locales) != 0: |
+ raise Exception(const.ErrorMessages.LOCALES_NOT_PRESENT.format( |
+ enabled_locales - local_locales, project_id, |
+ )) |
+ |
+ if not local_locales == enabled_locales: |
+ # Add languages to the project |
+ langs_to_add = list(local_locales - enabled_locales) |
+ logging.info(const.InfoMessages.ADDING_LANGUAGES.format( |
+ project_id, langs_to_add, |
+ )) |
+ run_and_wait( |
+ api.add_target_languages, |
+ XTMCloudException, |
+ const.UNDER_ANALYSIS_MESSAGE, |
+ const.InfoMessages.WAITING_FOR_PROJECT, |
+ project_id=project_id, |
+ target_languages=langs_to_add, |
+ ) |
+ |
+ |
+def map_locales(source): |
+ """Map website locale to target languages supported by XTM. |
+ |
+ Parameters |
+ ---------- |
+ source: cms.sources.Source |
+ Source representing a website. |
+ |
+ Returns |
+ ------- |
+ set |
+ Of the resulting mapped locales. |
+ |
+ """ |
+ config = source.read_config() |
+ defaultlocale = config.get(const.Config.MAIN_SECTION, |
+ const.Config.DEFAULT_LOCALE_OPTION) |
+ locales = source.list_locales() - {defaultlocale} |
+ |
+ mapped_locales = set() |
+ |
+ for locale in locales: |
+ if locale in const.SUPPORTED_LOCALES: |
+ mapped_locales.add(locale) |
+ else: |
+ xtm_locale = '{0}_{1}'.format(locale, locale.upper()) |
+ if xtm_locale in const.SUPPORTED_LOCALES: |
+ mapped_locales.add(xtm_locale) |
+ else: |
+ logging.warning( |
+ const.WarningMessages.LOCALE_NOT_SUPPORTED.format(locale), |
+ ) |
+ |
+ return mapped_locales |
+ |
+ |
+def get_local_files(page_strings): |
Vasily Kuznetsov
2018/09/26 10:43:16
This function does 3 things:
- select pages that h
Tudor Avram
2018/10/04 06:48:04
Done.
|
+ """List the local files and directories from the page_strings dictionary. |
+ |
+ Parameters |
+ ---------- |
+ page_strings: dir |
+ The parsed strings for all the pages. |
+ |
+ Returns |
+ ------- |
+ set |
Vasily Kuznetsov
2018/09/26 10:43:16
This data structure looks kind of like it wants to
Tudor Avram
2018/10/04 06:48:07
Done.
|
+ With the local files. Each element in the set is a tuple with the |
+ format: |
+ (<local_file_name>, <remote_file_name>) |
+ |
+ """ |
+ files = set() |
+ |
+ for page, strings in page_strings.iteritems(): |
+ if strings: |
+ page += '.json' |
+ file = ( |
+ page, |
+ page.replace(os.path.sep, const.FILENAMES['path_sep_rep']), |
+ ) |
+ if len(file[1]) > const.FILENAMES['max_length']: |
+ raise Exception(const.ErrorMessages.FILENAME_TOO_LONG.format( |
+ file[0], const.FILENAMES['max_length'], |
+ )) |
+ files.add(file) |
+ |
+ return files |
+ |
+ |
+def resolve_remote_filename(filename, source_dir, locales): |
Vasily Kuznetsov
2018/09/26 10:43:15
Maybe a better name would be to_local_filename() o
Tudor Avram
2018/10/04 06:48:05
Done.
|
+ """Parse a remote filename and construct a local filesystem name. |
+ |
+ Parameters |
+ ---------- |
+ filename: str |
+ The remote filename. |
+ source_dir: str |
+ The path to the source directory of the file. |
+ locales: iterator |
+ Of local language directories. |
+ |
+ Returns |
+ ------- |
+ str |
+ The full path of the file. |
+ |
+ """ |
+ path_elements = filename.split(const.FILENAMES['path_sep_rep']) |
+ if path_elements[0] == '': |
+ path_elements = path_elements[1:] |
+ if path_elements[0] not in locales: |
+ candidate_locale = path_elements[0].split('_')[0] |
+ if candidate_locale not in locales: |
+ raise Exception( |
+ const.ErrorMessages.CANT_RESOLVE_REMOTE_LANG.format( |
+ path_elements[0], |
+ ), |
+ ) |
+ |
+ path_elements[0] = ''.join([ |
+ candidate_locale, |
+ path_elements[0].split('_')[1][len(candidate_locale):], |
+ ]) |
+ |
+ return os.path.join(source_dir, *path_elements) |
+ |
+ |
+def run_and_wait(func, exp, err_msg, user_msg=None, wait=1, max_tries=10, |
+ step=1, **kw): |
+ """Run a function and, if a specific exception occurs, try again. |
+ |
+ Tries to run the function, and if an exception with a specific message |
+ is raised, it sleeps and tries again. |
+ Parameters |
+ ---------- |
+ func: function |
+ The function to be run. |
+ wait: int |
Vasily Kuznetsov
2018/09/26 10:43:16
Maybe this could be called "retry_delay" for more
Tudor Avram
2018/10/04 06:48:03
Done.
|
+ The amount of time to wait on this specific run |
+ exp: Exception |
Vasily Kuznetsov
2018/09/26 10:43:15
I think a better name for exception is exc, exp so
Tudor Avram
2018/10/04 06:48:06
Done.
|
+ The exception we expect to be raised. |
+ err_msg: str |
+ The message we expect to be in the exception. |
+ user_msg: str |
+ Message to be displayed to the user if we waited for more than 3 steps. |
+ max_tries: int |
Vasily Kuznetsov
2018/09/26 10:43:15
Idea: You can replace this and the following param
Tudor Avram
2018/10/04 06:48:06
Done.
|
+ The maximum number of tries until giving up. |
+ step: int |
+ The try we're at. |
+ kw: dict |
+ The keyword arguments for the function. |
+ |
+ Returns |
+ ------- |
+ The return result of the function. |
+ |
+ """ |
+ if step == 3 and user_msg: |
+ logging.info(user_msg) |
+ try: |
+ result = func(**kw) |
+ return result |
+ except exp as err: |
+ if step > max_tries: |
+ raise |
+ if err_msg in str(err): |
+ time.sleep(wait) |
+ return run_and_wait( |
+ func, exp, err_msg, user_msg, |
+ min(wait * 2, const.MAX_WAIT_TIME), max_tries, step + 1, **kw |
+ ) |
+ raise |
+ |
+ |
+def clear_files(dir_path, required_locales, extension='.json'): |
+ """Delete files with an extension from the tree starting with dir_path. |
Vasily Kuznetsov
2018/09/26 10:43:16
It seems like this function is about deleting tran
Tudor Avram
2018/10/04 06:48:07
Done.
|
+ |
+ Parameters |
+ ---------- |
+ dir_path: str |
+ Path to the root of the subtree we want to delete the files from. |
+ required_locales: iterable |
+ Only directories form required_locales will be included |
+ extension: str |
+ The extension of the files to delete. |
+ |
+ """ |
+ for root, dirs, files in os.walk(dir_path, topdown=True): |
+ if root == dir_path: |
+ dirs[:] = [d for d in dirs if d in required_locales] |
+ for f in files: |
+ if f.lower().endswith(extension.lower()): |
+ os.remove(os.path.join(root, f)) |
+ |
+ |
+def get_locales(path, default): |
Vasily Kuznetsov
2018/09/26 10:43:16
Why don't we use source.list_locales() for this? W
Tudor Avram
2018/10/04 06:48:06
As discussed in person, I need to see all the loca
Vasily Kuznetsov
2018/10/05 10:56:25
Acknowledged.
|
+ """List the locales available in a website. |
+ |
+ It will exclude the default language from the list. |
+ |
+ Parameters |
+ ---------- |
+ path: str |
+ The path to the locales directory. |
+ default: str |
+ The default language for the website. |
+ |
+ Returns |
+ ------- |
+ iterable |
+ Of the available locales. |
+ |
+ """ |
+ full_contents = os.listdir(path) |
+ |
+ return [ |
+ d for d in full_contents |
+ if os.path.isdir(os.path.join(path, d)) and d != default |
+ ] |
+ |
+ |
+def write_to_file(data, file_path): |
Vasily Kuznetsov
2018/09/26 10:43:16
I wonder if it makes sense to use a similar functi
Tudor Avram
2018/10/04 06:48:05
As discussed in person, there's no such function i
Vasily Kuznetsov
2018/10/05 10:56:24
Acknowledged.
|
+ """Write data to a given file path. |
+ |
+ If the directory path does not exist, then it will be created by default. |
+ |
+ Parameters |
+ ---------- |
+ data: bytes |
+ The data to be written to the file. |
+ file_path: str |
+ The path of the file we want to write. |
+ |
+ """ |
+ dirs_path = os.path.join(*os.path.split(file_path)[:-1]) |
+ if not os.path.isdir(dirs_path): |
+ os.makedirs(dirs_path) |
+ |
+ with open(file_path, 'wb') as f: |
+ f.write(data) |
+ |
+ |
+def input_fn(text): |
+ try: |
+ return raw_input(text) |
+ except Exception: |
+ return input(text) |