| Index: eyeo-depup/src/vcs.py |
| diff --git a/eyeo-depup/src/vcs.py b/eyeo-depup/src/vcs.py |
| new file mode 100644 |
| index 0000000000000000000000000000000000000000..d38a9a6ec5f44c33d131452534ddb575eea9239e |
| --- /dev/null |
| +++ b/eyeo-depup/src/vcs.py |
| @@ -0,0 +1,327 @@ |
| +# This file is part of Adblock Plus <https://adblockplus.org/>, |
| +# Copyright (C) 2006-present eyeo GmbH |
| +# |
| +# Adblock Plus is free software: you can redistribute it and/or modify |
| +# it under the terms of the GNU General Public License version 3 as |
| +# published by the Free Software Foundation. |
| +# |
| +# Adblock Plus is distributed in the hope that it will be useful, |
| +# but WITHOUT ANY WARRANTY; without even the implied warranty of |
| +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| +# GNU General Public License for more details. |
| +# |
| +# You should have received a copy of the GNU General Public License |
| +# along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. |
| + |
| +"""VCS related classes for eyeo-depup.""" |
| + |
| +from __future__ import print_function, unicode_literals |
| + |
| +import io |
| +import json |
| +import logging |
| +import os |
| +import shutil |
| +import subprocess |
| +import sys |
| +import tempfile |
| + |
| +logging.basicConfig() |
| +logger = logging.getLogger('vcs') |
| + |
| + |
| +class Vcs(object): |
| + """Baseclass for Git and Mercurial.""" |
| + |
| + JSON_DQUOTES = '__DQ__' |
| + |
| + class VcsException(Exception): |
| + """Raised when no distinct VCS for a given repository was found.""" |
| + |
| + def __init__(self, location, force_clone=False): |
| + """Construct a Vcs object for a given location. |
| + |
| + parameters: |
| + location: The repository location, may be a local folder or a remote |
| + location. |
| + |
| + When the specified location does not exist locally, Vcs will attempt |
| + to create a temporary repository, cloned from the given location. |
| + |
| + """ |
| + self._source, self._repository = os.path.split(location) |
| + if not os.path.exists(location) or force_clone: |
| + self._make_temporary(location) |
| + self._clean_up = True |
| + else: |
| + self._cwd = location |
| + self._clean_up = False |
| + |
| + def __enter__(self): |
| + """Enter the object's context.""" |
| + return self |
| + |
| + def __exit__(self, exc_type, exc_value, traceback): |
| + """Exit the object'c context and delete any temporary data.""" |
| + if self._clean_up: |
| + shutil.rmtree(self._cwd) |
| + |
| + @classmethod |
| + def is_vcs_for_repo(cls, path): |
| + """Assert if cls is a suitable VCS for the given (repository-) path.""" |
| + try: |
| + subprocess.check_output([cls.EXECUTABLE, 'status'], cwd=path, |
| + stderr=subprocess.STDOUT) |
| + return True |
| + except subprocess.CalledProcessError: |
| + return False |
| + |
| + def run_cmd(self, *args, **kwargs): |
| + """Run the vcs with the given commands.""" |
| + cmd = self.BASE_CMD + args |
| + try: |
| + return subprocess.check_output( |
| + cmd, |
| + cwd=os.path.join(self._cwd), |
| + stderr=subprocess.STDOUT, |
| + ).decode('utf-8') |
| + except subprocess.CalledProcessError as e: |
| + logger.error(e.output.decode('utf-8')) |
| + sys.exit(1) |
| + |
| + def _get_latest(self): |
| + self.run_cmd(self.UPDATE_LOCAL_HISTORY) |
| + |
| + def _escape_changes(self, changes): |
| + return changes.replace('"', '\\"').replace(self.JSON_DQUOTES, '"') |
| + |
| + def _changes_as_json(self, changes): |
| + return json.loads( |
| + '[{}]'.format(','.join( |
| + self._escape_changes(changes).strip().splitlines() |
| + ))) |
| + |
| + def merged_diff(self, rev_a, rev_b, n_unified=16): |
| + """Invoke the VCS' functionality to create a unified diff. |
| + |
| + Parameters: |
| + rev_a: The revision representing the start. |
| + rev_b: The revision representing the end. Defaults to |
| + Cls.DEFAULT_NEW_REVISION |
| + n_unified: The amount of context lines to add to the diff. |
| + |
| + """ |
| + return self.run_cmd('diff', '--unified=' + str(n_unified), |
| + *(self._rev_comb( |
| + rev_a, |
| + rev_b or self.DEFAULT_NEW_REVISION))) |
| + |
| + def change_list(self, rev_a, rev_b): |
| + """Return the repository's history from revisions a to b as JSON. |
| + |
| + Parameters: |
| + rev_a: The revision representing the start. |
| + rev_b: The revision representing the end. Defaults to |
| + Cls.DEFAULT_NEW_REVISION |
| + |
| + """ |
| + self._get_latest() |
| + |
| + log_format = self._log_format() |
| + rev_cmd = self._rev_comb(rev_a, rev_b or self.DEFAULT_NEW_REVISION) |
| + |
| + changes = self.run_cmd(*('log',) + log_format + rev_cmd) |
| + return self._changes_as_json(changes) |
| + |
| + def enhance_changes_information(self, changes, dependency_location, fake): |
| + """Enhance the change list with matching revisions from a mirror. |
| + |
| + Parameters: |
| + changes: The list to enhance, containing dictionaries |
| + with the keys "hash", "author", "date" and |
| + "message". |
| + dependency_location: The (remote or locale) location of the |
| + repository, which is supposed to be the mirror |
| + for the current repository. |
| + fake {True, False}: Causes atual processing of a mirror repository |
| + (False) or fakes values (True) |
| + |
| + """ |
| + self_ex = self.EXECUTABLE |
| + mirr_ex = self._other_cls.EXECUTABLE |
| + |
| + if not fake: |
| + with self._other_cls(dependency_location) as mirror: |
| + mirror._get_latest() |
| + |
| + mirrored_hashes = { |
| + change['hash']: mirror.matching_hash(change['author'], |
| + change['date'], |
| + change['message']) |
| + for change in changes |
| + } |
| + else: |
| + mirrored_hashes = {} |
| + |
| + for change in changes: |
| + change[self_ex + '_url'] = self.REVISION_URL.format( |
| + repository=self._repository, revision=change['hash']) |
| + change[self_ex + '_hash'] = change['hash'] |
| + |
| + mirrored_hash = mirrored_hashes.get(change['hash'], 'NO MIRROR') |
| + del change['hash'] |
| + |
| + change[mirr_ex + '_url'] = self._other_cls.REVISION_URL.format( |
| + repository=self._repository, revision=mirrored_hash) |
| + change[mirr_ex + '_hash'] = mirrored_hash |
| + |
| + @staticmethod |
| + def factory(location, force_clone=False): |
| + """Get a suiting Vcs instance for the given repository path.""" |
| + obj = None |
| + for cls in [Git, Mercurial]: |
| + if cls.is_vcs_for_repo(location): |
| + if obj is not None: |
| + raise Vcs.VcsException( |
| + "Found multiple possible VCS' for " + location) |
| + obj = cls(location, force_clone) |
| + |
| + if obj is None: |
| + raise Vcs.VcsException('No valid VCS found for ' + location) |
| + return obj |
| + |
| + |
| +class Mercurial(Vcs): |
| + """Mercurial specialization of VCS.""" |
| + |
| + EXECUTABLE = 'hg' |
| + BASE_CMD = (EXECUTABLE, '--config', 'defaults.log=', '--config', |
| + 'defaults.pull=', '--config', 'defaults.diff=') |
| + UPDATE_LOCAL_HISTORY = 'pull' |
| + LOG_TEMLATE = ('\\{"hash":"{node|short}","author":"{author|person}",' |
| + '"date":"{date|rfc822date}","message":"{desc|strip|' |
| + 'firstline}"}\n') |
| + DEFAULT_NEW_REVISION = 'master' |
| + |
| + REVISION_URL = 'https://hg.adblockplus.org/{repository}/rev/{revision}' |
| + |
| + def __init__(self, *args): |
| + """Construct a Mercurial object and specify Git as the mirror class.""" |
| + self._other_cls = Git |
| + super(Mercurial, self).__init__(*args) |
| + |
| + def _rev_comb(self, rev_a, rev_b): |
| + # Only take into account those changesets, which are actually affecting |
| + # the repository's content. See |
| + # https://www.mercurial-scm.org/repo/hg/help/revsets |
| + return ('-r', '{}::{}'.format(rev_a, rev_b)) |
| + |
| + def _log_format(self): |
| + log_format = self.LOG_TEMLATE.replace('"', self.JSON_DQUOTES) |
| + return ('--template', log_format) |
| + |
| + def change_list(self, *args): |
| + """Apply measures for hg log and call Vcs's change_list.""" |
| + # Mercurial's command for producing a log between revisions using the |
| + # revision set produced by self._rev_comb returns the changesets in a |
| + # reversed order. Additionally the current revision is returned. |
| + return list(reversed(super(Mercurial, self).change_list(*args)[1:])) |
| + |
| + def matching_hash(self, author, date, message): |
| + """Get the responsible commit for the given information. |
| + |
| + A commit must stafisy equailty for author, date and commit message, in |
| + order to be recognized as the matching commit. |
| + |
| + """ |
| + return self.run_cmd('log', '-u', author, '-d', date, '--keyword', |
| + message, '--template', '{node|short}') |
| + |
| + def _make_temporary(self, location): |
| + self._cwd = tempfile.mkdtemp() |
| + os.mkdir(os.path.join(self._cwd, '.hg')) |
| + |
| + with io.open(os.path.join(self._cwd, '.hg', 'hgrc'), 'w') as fp: |
| + fp.write('[paths]{}default = {}{}'.format(os.linesep, location, |
| + os.linesep)) |
| + |
| + def commit_changes(self, msg): |
| + """Add any local changes and commit the with <msg>.""" |
| + self.run_cmd('commit', '-m', msg) |
| + |
| + def undo_changes(self): |
| + """Undo all changes in local repsitory and leave no backup.""" |
| + self.run_cmd('revert', '--all', '--no-backup') |
| + |
| + def repo_is_clean(self): |
| + """Check whether the current repository is clean.""" |
| + buff = self.run_cmd('status') |
| + return len(buff) == 0 |
| + |
| + |
| +class Git(Vcs): |
| + """Git specialization of Vcs.""" |
| + |
| + EXECUTABLE = 'git' |
| + BASE_CMD = (EXECUTABLE,) |
| + UPDATE_LOCAL_HISTORY = 'fetch' |
| + LOG_TEMLATE = '{"hash":"%h","author":"%an","date":"%aD","message":"%s"}' |
| + DEFAULT_NEW_REVISION = 'origin/master' |
| + |
| + REVISION_URL = ('https://www.github.com/adblockplus/{repository}/commit/' |
| + '{revision}') |
| + |
| + def __init__(self, *args): |
| + """Construct a Git object and specify Mercurial as the mirror class.""" |
| + self._other_cls = Mercurial |
| + super(Git, self).__init__(*args) |
| + |
| + def _rev_comb(self, rev_a, rev_b): |
| + return ('{}..{}'.format(rev_a, rev_b),) |
| + |
| + def _log_format(self): |
| + return ('--pretty=format:{}'.format(self.LOG_TEMLATE.replace( |
| + '"', self.JSON_DQUOTES)),) |
| + |
| + def matching_hash(self, author, date, message): |
| + """Get the responsible commit for the given information. |
| + |
| + A commit must stafisy equailty for author, date and commit message, in |
| + order to be recognized as the matching commit. |
| + |
| + """ |
| + # Git does not implement exact date matching directly. Additionally, |
| + # git is only capable of filtering by COMMIT DATE instead of |
| + # AUTHOR DATE (which is what we are actually looking for), see |
| + # https://stackoverflow.com/q/37311494/ |
| + # Since naturally the COMMIT DATE allways is later then the AUTHOR |
| + # DATE, we are at least able to limit the valid range to after our |
| + # given date |
| + result = self.run_cmd('log', '--author={}'.format(author), |
| + '--grep={}'.format(message), |
| + '--after={}'.format(date), '--pretty=format:%h') |
| + if len(result.split()) > 1: |
| + raise Vcs.VcsException('FATAL: Ambiguous commit filter!') |
| + return result |
| + |
| + def _make_temporary(self, location): |
| + self._cwd = tempfile.mkdtemp() |
| + self.run_cmd('clone', '--bare', location, self._cwd) |
| + |
| + def commit_changes(self, msg): |
| + """Add any local changes and commit the with <msg>.""" |
| + self.run_cmd('add', '.') |
| + self.run_cmd('commit', '-m', msg) |
| + |
| + def undo_changes(self): |
| + """Undo all changes in local repsitory.""" |
| + self.run_cmd('checkout', '.') |
| + |
| + def repo_is_clean(self): |
| + """Check whether the current repository is clean.""" |
| + # unstaged changes |
| + no_uncommited = len(self.run_cmd('diff-index', 'HEAD', '--')) == 0 |
| + # untracked changes |
| + no_untracked = len(self.run_cmd('ls-files', '-o', '-d', |
| + '--exclude-standard')) == 0 |
| + return no_uncommited and no_untracked |