Index: .hgignore
===================================================================
--- a/.hgignore
+++ b/.hgignore
@@ -1,8 +1,10 @@
syntax: glob
*.pyc
*.pyo
.tox/
__pycache__/
*.egg-info/
build/
dist/
+.cache/
+.coverage
Index: update-copyright/README.md
===================================================================
new file mode 100644
--- /dev/null
+++ b/update-copyright/README.md
@@ -0,0 +1,32 @@
+# update-copyright
+
+This script will attempt to update the eyeo copyright information on each
+page of each repo listed on a Mercurial index page.
+By default, running the script will scrape [https://hg.adblockplus.org/](https://hg.adblockplus.org/),
+and then for each repo, it will:
+* make a local copy of the repo
+* update the copyright information on each file to the current year
+* attempt to push the updates to `ssh://hg@hg.adblockplus.org/`
+
+If a user doesn't have permission to push to a repo, the script will make a
+local `repo-name.patch` file to submit later.
+
+You are free to use this for other projects but please keep in mind that we
+make no stability guarantees whatsoever and might change functionality any
+time.
+
+## How to use
+
+To update the copyright on all the repos indexed at [https://hg.adblockplus.org/](https://hg.adblockplus.org/),
+simply run the script, e.g.:
+
+ ./update_copyright.py
+
+To run the script elsewhere, you must specify the URL for a Mercurial index
+site to scrape, and the location of the base URL to push to, e.g.:
+
+ ./update_copyright.py -u https://hg.example.com/ -p ssh://user@hg.example.com/
+
+## Testing
+
+Testing can be run via [tox](http://tox.readthedocs.org/).
Index: update-copyright/tests/data/hg_page.html
===================================================================
new file mode 100644
--- /dev/null
+++ b/update-copyright/tests/data/hg_page.html
@@ -0,0 +1,99 @@
+
+
+
+
+
+
+
+
+
+Mercurial repositories index
+
+
+
+
+
+
+
+
+
+
Index: update-copyright/tests/data/sample_file.py
===================================================================
new file mode 100644
--- /dev/null
+++ b/update-copyright/tests/data/sample_file.py
@@ -0,0 +1,15 @@
+# This file is part of the Adblock Plus web scripts,
+# Copyright (C) 2006-2015 eyeo GmbH
+#
+# Adblock Plus is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 3 as
+# published by the Free Software Foundation.
+#
+
+
+def foo():
+ value = 'LegalCopyright', 'Copyright (C) 2006-2016 Eyeo GmbH'
+ return value
+
+
+# Copyright (C) 2006-2014 example GmbH
Index: update-copyright/tests/test_update_copyright.py
===================================================================
new file mode 100644
--- /dev/null
+++ b/update-copyright/tests/test_update_copyright.py
@@ -0,0 +1,115 @@
+#!/usr/bin/env python3
+
+import os
+import re
+import datetime
+import subprocess
+import shutil
+import urllib.parse
+
+import pytest
+
+from update_copyright import extract_urls, text_replace, hg_commit, main
+
+data_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'data')
+
+
+def create_repo(path):
+ subprocess.check_call(['hg', 'init', path])
+ with open(os.path.join(path, '.hg', 'hgrc'), 'w+') as hgrc:
+ set_user = '[ui]\nusername = Test User '
+ hgrc.write(set_user)
+ shutil.copy(os.path.join(data_path, 'sample_file.py'), path)
+ subprocess.check_call(['hg', 'commit', '-Am', 'Initial commit',
+ '--repository', path])
+
+
+@pytest.fixture()
+def temp_dir(tmpdir):
+ temp_dir = tmpdir.mkdir('temp_dir')
+ return temp_dir
+
+
+@pytest.fixture()
+def temp_repo(tmpdir):
+ """"Returns a path to a temporary repo containing one sample file"""
+ temp_repo = tmpdir.mkdir('tmp_dir')
+ create_repo(str(temp_repo))
+ return temp_repo
+
+
+@pytest.fixture()
+def base_dir(tmpdir):
+ """Returns a temporary directory that contains one html page and two
+ repositories (one with push access, the other without)"""
+ tmp_repo = tmpdir.mkdir('tmp_dir')
+ temp_dir = str(tmp_repo)
+ subprocess.check_call(['cp', os.path.join(data_path, 'hg_page.html'),
+ temp_dir])
+ repo_1 = os.path.join(temp_dir, 'repo_1')
+ repo_2 = os.path.join(temp_dir, 'repo_2')
+ os.mkdir(repo_1)
+ os.mkdir(repo_2)
+ create_repo(repo_1)
+ create_repo(repo_2)
+
+ # Make repo_2 read-only
+ with open(os.path.join(repo_2, '.hg/hgrc'), 'w') as hgrc:
+ hook = '[hooks]\npretxnchangegroup = return True'
+ hgrc.write(hook)
+ return temp_dir
+
+
+def test_extract_urls():
+ data_url = urllib.parse.urljoin('file:///', data_path)
+ urls = [data_url + '/repo_1/',
+ data_url + '/repo_2/']
+ assert urls == extract_urls(os.path.join(data_url, 'hg_page.html'))
+
+
+def test_text_replacement(temp_repo):
+ updated = 0
+ filename = temp_repo.join('sample_file.py').strpath
+ text_replace(temp_repo.strpath, filename)
+ with open(filename) as file:
+ text = file.read()
+ pattern = re.compile(r'(copyright.*?\d{4})(?:-\d{4})?\s+eyeo gmbh',
+ re.I)
+ for year in re.finditer(pattern, text):
+ dates = re.search(r'(\d{4})-(\d{4})', year.group(0))
+ if dates.group(2) == str(datetime.datetime.now().year):
+ updated += 1
+
+ # test that non-eyeo copyright information are left alone
+ assert '2014 example' in text
+ # test for copyright information in both strings and comments
+ assert updated == 2
+
+
+def test_hg_commit(temp_repo, temp_dir):
+ directory = str(temp_dir)
+ repo = str(temp_repo)
+ subprocess.check_call(['hg', 'clone', repo, directory])
+ open(os.path.join(directory, 'foo'), 'w').close()
+ subprocess.check_call(['hg', 'add', '--repository', directory])
+ hg_commit(directory, repo)
+
+ # Make sure both files contain the commmit message from hg log
+ log_1 = subprocess.run(['hg', 'log', '--repository', repo],
+ stdout=subprocess.PIPE)
+ assert 'Noissue - Updated copyright year' in str(log_1.stdout)
+
+
+def test_all(base_dir):
+ main(urllib.parse.urljoin('file:///', os.path.join(
+ base_dir, 'hg_page.html')), None)
+
+ # assert hg log for repo_1
+ log_1 = subprocess.run(['hg', 'log', '--repository',
+ os.path.join(base_dir, 'repo_1')],
+ stdout=subprocess.PIPE)
+ assert 'Noissue - Updated copyright year' in str(log_1.stdout)
+
+ # assert the .patch file for repo_2
+ assert'Noissue - Updated copyright year' in open('repo_2.patch').read()
+ subprocess.call(['rm', 'repo_2.patch']) # cleanup
Index: update-copyright/tox.ini
===================================================================
new file mode 100644
--- /dev/null
+++ b/update-copyright/tox.ini
@@ -0,0 +1,18 @@
+[tox]
+envlist = py{35,36}
+skipsdist = True
+
+[testenv]
+setenv =
+ PYTHONPATH = {toxinidir}
+
+deps =
+ pytest
+ pytest-cov
+ flake8
+ pep8-naming
+ ../flake8-eyeo
+
+commands =
+ pytest --cov=update_copyright tests
+ flake8 update_copyright.py tests
Index: update-copyright/update_copyright.py
===================================================================
new file mode 100755
--- /dev/null
+++ b/update-copyright/update_copyright.py
@@ -0,0 +1,150 @@
+#!/usr/bin/env python3
+
+import os
+import subprocess
+import re
+import datetime
+import shutil
+import urllib.parse
+import urllib.request
+import html.parser
+import argparse
+
+
+CURRENT_YEAR = datetime.datetime.now().year
+
+
+def process_repo(url, hg_upstream):
+ repo = url.rstrip('/').split('/')[-1]
+
+ if repo in {
+ # headers are copied from libadblockplus, no need to update seperately
+ 'libadblockplus-binaries',
+ # huge and only contains autogenerated builds
+ 'downloads',
+ }:
+ return
+
+ try:
+ subprocess.check_call(['hg', 'clone', url, repo])
+ if repo == 'adblockbrowser':
+ # adblockbrowser is a FF fork with its own changes in a
+ # seperate branch
+ subprocess.check_call(['hg', 'up', '--rev', 'adblockbrowser',
+ '--repository', repo])
+ else:
+ # switch to 'master' bookmark if it exists
+ subprocess.call(['hg', 'up', '--rev', 'master',
+ '--repository', repo])
+ for dirpath, dirnames, filenames in os.walk(repo):
+ if dirpath == repo:
+ dirnames.remove('.hg')
+
+ for filename in filenames:
+ text_replace(dirpath, filename)
+ if hg_upstream is None:
+ hg_upstream = url
+ else:
+ hg_upstream += '/' + repo
+ hg_commit(repo, hg_upstream)
+
+ finally:
+ shutil.rmtree(repo, ignore_errors=True)
+
+
+def text_replace(dirpath, filename):
+ with open(os.path.join(dirpath, filename), 'r+',
+ encoding='utf-8', newline='') as file:
+ try:
+ text = file.read()
+ except UnicodeDecodeError:
+ return
+
+ text = re.sub(
+ r'(copyright.*?\d{4})(?:-\d{4})?\s+eyeo gmbh',
+ r'\1-{} eyeo GmbH'.format(CURRENT_YEAR), text, 0, re.I
+ )
+ file.seek(0)
+ file.write(text)
+ file.truncate()
+
+
+def hg_commit(repo, hg_upstream):
+ try:
+ subprocess.check_call(['hg', 'commit', '-m',
+ 'Noissue - Updated copyright year',
+ '--repository', repo])
+ except subprocess.CalledProcessError as e:
+ if e.returncode == 1: # no changes
+ return
+ raise
+
+ # Push changes, or save patch if access denied
+ if subprocess.call(['hg', 'push', '--repository', repo, hg_upstream]) != 0:
+ with open(repo + '.patch', 'wb') as file:
+ print('couldnt push, making patch instead')
+ subprocess.check_call(['hg', 'export', '--repository', repo],
+ stdout=file)
+
+
+class Parser(html.parser.HTMLParser):
+ result = []
+ recordtr = False
+ cell = 0
+ current_url = ''
+
+ def handle_starttag(self, tag, attrs):
+ if tag == 'tr':
+ self.recordtr = True
+ if tag == 'td':
+ self.cell += 1
+ if tag == 'a':
+ attrs = dict(attrs)
+ if 'list' in attrs.get('class', '').split():
+ self.current_url = attrs['href']
+
+ def handle_endtag(self, tag):
+ if tag == 'tr':
+ self.recordtr = False
+ self.cell = 0
+
+ def handle_data(self, data):
+ if self.cell == 2 and self.recordtr is True:
+ self.recordtr = False
+ self.cell = 0
+ # Only process the URL if the description is not Deprecated
+ if ('*DEPRECATED*' not in data and
+ '(Deprecated)' not in data and
+ len(self.current_url) > 2):
+ self.result += [self.current_url]
+ return self.result
+
+
+def extract_urls(hg_page):
+ base_url = os.path.dirname(hg_page) + '/'
+ parser = Parser()
+ with urllib.request.urlopen(hg_page) as response:
+ parser.feed(response.read().decode('utf-8'))
+ parser.close()
+ repo_urls = []
+ for url in parser.result:
+ repo_urls.append(urllib.parse.urljoin(base_url, url))
+ return repo_urls
+
+
+def main(hg_page, hg_upstream):
+ for repo in extract_urls(hg_page):
+ process_repo(repo, hg_upstream)
+
+
+if __name__ == '__main__':
+ arg_parser = argparse.ArgumentParser()
+ arg_parser.add_argument('-u', '--hg-url',
+ help='specify which Mercurial URL site to scrape')
+ arg_parser.add_argument('-p', '--push-url',
+ help='specify where to push the repository')
+ args = arg_parser.parse_args()
+ if args.hg_url and not args.push_url:
+ arg_parser.error('If -u is provided, -p is mandatory')
+ main(args.hg_url or 'https://hg.adblockplus.org/',
+ args.push_url or 'ssh://hg@hg.adblockplus.org/')