Index: eyeo-depup/src/vcs.py |
diff --git a/eyeo-depup/src/vcs.py b/eyeo-depup/src/vcs.py |
new file mode 100644 |
index 0000000000000000000000000000000000000000..d38a9a6ec5f44c33d131452534ddb575eea9239e |
--- /dev/null |
+++ b/eyeo-depup/src/vcs.py |
@@ -0,0 +1,327 @@ |
+# This file is part of Adblock Plus <https://adblockplus.org/>, |
+# Copyright (C) 2006-present eyeo GmbH |
+# |
+# Adblock Plus is free software: you can redistribute it and/or modify |
+# it under the terms of the GNU General Public License version 3 as |
+# published by the Free Software Foundation. |
+# |
+# Adblock Plus is distributed in the hope that it will be useful, |
+# but WITHOUT ANY WARRANTY; without even the implied warranty of |
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
+# GNU General Public License for more details. |
+# |
+# You should have received a copy of the GNU General Public License |
+# along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. |
+ |
+"""VCS related classes for eyeo-depup.""" |
+ |
+from __future__ import print_function, unicode_literals |
+ |
+import io |
+import json |
+import logging |
+import os |
+import shutil |
+import subprocess |
+import sys |
+import tempfile |
+ |
+logging.basicConfig() |
+logger = logging.getLogger('vcs') |
+ |
+ |
+class Vcs(object): |
+ """Baseclass for Git and Mercurial.""" |
+ |
+ JSON_DQUOTES = '__DQ__' |
+ |
+ class VcsException(Exception): |
+ """Raised when no distinct VCS for a given repository was found.""" |
+ |
+ def __init__(self, location, force_clone=False): |
+ """Construct a Vcs object for a given location. |
+ |
+ parameters: |
+ location: The repository location, may be a local folder or a remote |
+ location. |
+ |
+ When the specified location does not exist locally, Vcs will attempt |
+ to create a temporary repository, cloned from the given location. |
+ |
+ """ |
+ self._source, self._repository = os.path.split(location) |
+ if not os.path.exists(location) or force_clone: |
+ self._make_temporary(location) |
+ self._clean_up = True |
+ else: |
+ self._cwd = location |
+ self._clean_up = False |
+ |
+ def __enter__(self): |
+ """Enter the object's context.""" |
+ return self |
+ |
+ def __exit__(self, exc_type, exc_value, traceback): |
+ """Exit the object'c context and delete any temporary data.""" |
+ if self._clean_up: |
+ shutil.rmtree(self._cwd) |
+ |
+ @classmethod |
+ def is_vcs_for_repo(cls, path): |
+ """Assert if cls is a suitable VCS for the given (repository-) path.""" |
+ try: |
+ subprocess.check_output([cls.EXECUTABLE, 'status'], cwd=path, |
+ stderr=subprocess.STDOUT) |
+ return True |
+ except subprocess.CalledProcessError: |
+ return False |
+ |
+ def run_cmd(self, *args, **kwargs): |
+ """Run the vcs with the given commands.""" |
+ cmd = self.BASE_CMD + args |
+ try: |
+ return subprocess.check_output( |
+ cmd, |
+ cwd=os.path.join(self._cwd), |
+ stderr=subprocess.STDOUT, |
+ ).decode('utf-8') |
+ except subprocess.CalledProcessError as e: |
+ logger.error(e.output.decode('utf-8')) |
+ sys.exit(1) |
+ |
+ def _get_latest(self): |
+ self.run_cmd(self.UPDATE_LOCAL_HISTORY) |
+ |
+ def _escape_changes(self, changes): |
+ return changes.replace('"', '\\"').replace(self.JSON_DQUOTES, '"') |
+ |
+ def _changes_as_json(self, changes): |
+ return json.loads( |
+ '[{}]'.format(','.join( |
+ self._escape_changes(changes).strip().splitlines() |
+ ))) |
+ |
+ def merged_diff(self, rev_a, rev_b, n_unified=16): |
+ """Invoke the VCS' functionality to create a unified diff. |
+ |
+ Parameters: |
+ rev_a: The revision representing the start. |
+ rev_b: The revision representing the end. Defaults to |
+ Cls.DEFAULT_NEW_REVISION |
+ n_unified: The amount of context lines to add to the diff. |
+ |
+ """ |
+ return self.run_cmd('diff', '--unified=' + str(n_unified), |
+ *(self._rev_comb( |
+ rev_a, |
+ rev_b or self.DEFAULT_NEW_REVISION))) |
+ |
+ def change_list(self, rev_a, rev_b): |
+ """Return the repository's history from revisions a to b as JSON. |
+ |
+ Parameters: |
+ rev_a: The revision representing the start. |
+ rev_b: The revision representing the end. Defaults to |
+ Cls.DEFAULT_NEW_REVISION |
+ |
+ """ |
+ self._get_latest() |
+ |
+ log_format = self._log_format() |
+ rev_cmd = self._rev_comb(rev_a, rev_b or self.DEFAULT_NEW_REVISION) |
+ |
+ changes = self.run_cmd(*('log',) + log_format + rev_cmd) |
+ return self._changes_as_json(changes) |
+ |
+ def enhance_changes_information(self, changes, dependency_location, fake): |
+ """Enhance the change list with matching revisions from a mirror. |
+ |
+ Parameters: |
+ changes: The list to enhance, containing dictionaries |
+ with the keys "hash", "author", "date" and |
+ "message". |
+ dependency_location: The (remote or locale) location of the |
+ repository, which is supposed to be the mirror |
+ for the current repository. |
+ fake {True, False}: Causes atual processing of a mirror repository |
+ (False) or fakes values (True) |
+ |
+ """ |
+ self_ex = self.EXECUTABLE |
+ mirr_ex = self._other_cls.EXECUTABLE |
+ |
+ if not fake: |
+ with self._other_cls(dependency_location) as mirror: |
+ mirror._get_latest() |
+ |
+ mirrored_hashes = { |
+ change['hash']: mirror.matching_hash(change['author'], |
+ change['date'], |
+ change['message']) |
+ for change in changes |
+ } |
+ else: |
+ mirrored_hashes = {} |
+ |
+ for change in changes: |
+ change[self_ex + '_url'] = self.REVISION_URL.format( |
+ repository=self._repository, revision=change['hash']) |
+ change[self_ex + '_hash'] = change['hash'] |
+ |
+ mirrored_hash = mirrored_hashes.get(change['hash'], 'NO MIRROR') |
+ del change['hash'] |
+ |
+ change[mirr_ex + '_url'] = self._other_cls.REVISION_URL.format( |
+ repository=self._repository, revision=mirrored_hash) |
+ change[mirr_ex + '_hash'] = mirrored_hash |
+ |
+ @staticmethod |
+ def factory(location, force_clone=False): |
+ """Get a suiting Vcs instance for the given repository path.""" |
+ obj = None |
+ for cls in [Git, Mercurial]: |
+ if cls.is_vcs_for_repo(location): |
+ if obj is not None: |
+ raise Vcs.VcsException( |
+ "Found multiple possible VCS' for " + location) |
+ obj = cls(location, force_clone) |
+ |
+ if obj is None: |
+ raise Vcs.VcsException('No valid VCS found for ' + location) |
+ return obj |
+ |
+ |
+class Mercurial(Vcs): |
+ """Mercurial specialization of VCS.""" |
+ |
+ EXECUTABLE = 'hg' |
+ BASE_CMD = (EXECUTABLE, '--config', 'defaults.log=', '--config', |
+ 'defaults.pull=', '--config', 'defaults.diff=') |
+ UPDATE_LOCAL_HISTORY = 'pull' |
+ LOG_TEMLATE = ('\\{"hash":"{node|short}","author":"{author|person}",' |
+ '"date":"{date|rfc822date}","message":"{desc|strip|' |
+ 'firstline}"}\n') |
+ DEFAULT_NEW_REVISION = 'master' |
+ |
+ REVISION_URL = 'https://hg.adblockplus.org/{repository}/rev/{revision}' |
+ |
+ def __init__(self, *args): |
+ """Construct a Mercurial object and specify Git as the mirror class.""" |
+ self._other_cls = Git |
+ super(Mercurial, self).__init__(*args) |
+ |
+ def _rev_comb(self, rev_a, rev_b): |
+ # Only take into account those changesets, which are actually affecting |
+ # the repository's content. See |
+ # https://www.mercurial-scm.org/repo/hg/help/revsets |
+ return ('-r', '{}::{}'.format(rev_a, rev_b)) |
+ |
+ def _log_format(self): |
+ log_format = self.LOG_TEMLATE.replace('"', self.JSON_DQUOTES) |
+ return ('--template', log_format) |
+ |
+ def change_list(self, *args): |
+ """Apply measures for hg log and call Vcs's change_list.""" |
+ # Mercurial's command for producing a log between revisions using the |
+ # revision set produced by self._rev_comb returns the changesets in a |
+ # reversed order. Additionally the current revision is returned. |
+ return list(reversed(super(Mercurial, self).change_list(*args)[1:])) |
+ |
+ def matching_hash(self, author, date, message): |
+ """Get the responsible commit for the given information. |
+ |
+ A commit must stafisy equailty for author, date and commit message, in |
+ order to be recognized as the matching commit. |
+ |
+ """ |
+ return self.run_cmd('log', '-u', author, '-d', date, '--keyword', |
+ message, '--template', '{node|short}') |
+ |
+ def _make_temporary(self, location): |
+ self._cwd = tempfile.mkdtemp() |
+ os.mkdir(os.path.join(self._cwd, '.hg')) |
+ |
+ with io.open(os.path.join(self._cwd, '.hg', 'hgrc'), 'w') as fp: |
+ fp.write('[paths]{}default = {}{}'.format(os.linesep, location, |
+ os.linesep)) |
+ |
+ def commit_changes(self, msg): |
+ """Add any local changes and commit the with <msg>.""" |
+ self.run_cmd('commit', '-m', msg) |
+ |
+ def undo_changes(self): |
+ """Undo all changes in local repsitory and leave no backup.""" |
+ self.run_cmd('revert', '--all', '--no-backup') |
+ |
+ def repo_is_clean(self): |
+ """Check whether the current repository is clean.""" |
+ buff = self.run_cmd('status') |
+ return len(buff) == 0 |
+ |
+ |
+class Git(Vcs): |
+ """Git specialization of Vcs.""" |
+ |
+ EXECUTABLE = 'git' |
+ BASE_CMD = (EXECUTABLE,) |
+ UPDATE_LOCAL_HISTORY = 'fetch' |
+ LOG_TEMLATE = '{"hash":"%h","author":"%an","date":"%aD","message":"%s"}' |
+ DEFAULT_NEW_REVISION = 'origin/master' |
+ |
+ REVISION_URL = ('https://www.github.com/adblockplus/{repository}/commit/' |
+ '{revision}') |
+ |
+ def __init__(self, *args): |
+ """Construct a Git object and specify Mercurial as the mirror class.""" |
+ self._other_cls = Mercurial |
+ super(Git, self).__init__(*args) |
+ |
+ def _rev_comb(self, rev_a, rev_b): |
+ return ('{}..{}'.format(rev_a, rev_b),) |
+ |
+ def _log_format(self): |
+ return ('--pretty=format:{}'.format(self.LOG_TEMLATE.replace( |
+ '"', self.JSON_DQUOTES)),) |
+ |
+ def matching_hash(self, author, date, message): |
+ """Get the responsible commit for the given information. |
+ |
+ A commit must stafisy equailty for author, date and commit message, in |
+ order to be recognized as the matching commit. |
+ |
+ """ |
+ # Git does not implement exact date matching directly. Additionally, |
+ # git is only capable of filtering by COMMIT DATE instead of |
+ # AUTHOR DATE (which is what we are actually looking for), see |
+ # https://stackoverflow.com/q/37311494/ |
+ # Since naturally the COMMIT DATE allways is later then the AUTHOR |
+ # DATE, we are at least able to limit the valid range to after our |
+ # given date |
+ result = self.run_cmd('log', '--author={}'.format(author), |
+ '--grep={}'.format(message), |
+ '--after={}'.format(date), '--pretty=format:%h') |
+ if len(result.split()) > 1: |
+ raise Vcs.VcsException('FATAL: Ambiguous commit filter!') |
+ return result |
+ |
+ def _make_temporary(self, location): |
+ self._cwd = tempfile.mkdtemp() |
+ self.run_cmd('clone', '--bare', location, self._cwd) |
+ |
+ def commit_changes(self, msg): |
+ """Add any local changes and commit the with <msg>.""" |
+ self.run_cmd('add', '.') |
+ self.run_cmd('commit', '-m', msg) |
+ |
+ def undo_changes(self): |
+ """Undo all changes in local repsitory.""" |
+ self.run_cmd('checkout', '.') |
+ |
+ def repo_is_clean(self): |
+ """Check whether the current repository is clean.""" |
+ # unstaged changes |
+ no_uncommited = len(self.run_cmd('diff-index', 'HEAD', '--')) == 0 |
+ # untracked changes |
+ no_untracked = len(self.run_cmd('ls-files', '-o', '-d', |
+ '--exclude-standard')) == 0 |
+ return no_uncommited and no_untracked |