| Index: update-copyright/update_copyright.py |
| =================================================================== |
| new file mode 100755 |
| --- /dev/null |
| +++ b/update-copyright/update_copyright.py |
| @@ -0,0 +1,151 @@ |
| +#!/usr/bin/env python3 |
| + |
| +import os |
| +import subprocess |
| +import re |
| +import datetime |
| +import shutil |
| +import urllib.parse |
| +import urllib.request |
| +import html.parser |
| +import argparse |
| + |
| + |
| +CURRENT_YEAR = datetime.datetime.now().year |
| + |
| + |
| +def process_repo(url, hg_upstream): |
| + repo = url.rstrip('/').split('/')[-1] |
| + |
| + if repo in { |
| + # headers are copied from libadblockplus, no need to update seperately |
| + 'libadblockplus-binaries', |
| + # huge and only contains autogenerated builds |
| + 'downloads', |
| + }: |
| + return |
| + |
| + try: |
| + subprocess.check_call(['hg', 'clone', url, repo]) |
| + if repo == 'adblockbrowser': |
| + # adblockbrowser is a FF fork with its own changes in a |
| + # seperate branch |
| + subprocess.check_call(['hg', 'up', '--rev', 'adblockbrowser', |
| + '--repository', repo]) |
| + else: |
| + # switch to 'master' bookmark if it exists |
| + subprocess.call(['hg', 'up', '--rev', 'master', |
| + '--repository', repo]) |
| + for dirpath, dirnames, filenames in os.walk(repo): |
| + if dirpath == repo: |
| + dirnames.remove('.hg') |
| + |
| + for filename in filenames: |
| + text_replace(dirpath, filename) |
| + if hg_upstream is None: |
| + hg_upstream = url |
| + else: |
| + hg_upstream += '/' + repo |
| + hg_commit(repo, hg_upstream) |
| + |
| + finally: |
| + shutil.rmtree(repo, ignore_errors=True) |
| + |
| + |
| +def text_replace(dirpath, filename): |
| + with open(os.path.join(dirpath, filename), 'r+', |
| + encoding='utf-8', newline='') as file: |
| + try: |
| + text = file.read() |
| + except UnicodeDecodeError: |
| + return |
| + |
| + text = re.sub( |
| + r'(copyright.*?\d{4})(?:-\d{4})?\s+eyeo gmbh', |
| + r'\1-{} eyeo GmbH'.format(CURRENT_YEAR), text, 0, re.I |
| + ) |
| + file.seek(0) |
| + file.write(text) |
| + file.truncate() |
| + |
| + |
| +def hg_commit(repo, hg_upstream): |
| + try: |
| + subprocess.check_call(['hg', 'commit', '-m', |
| + 'Noissue - Updated copyright year', |
| + '--repository', repo]) |
| + except subprocess.CalledProcessError as e: |
| + if e.returncode == 1: # no changes |
| + return |
| + raise |
| + |
| + # Push changes, or save patch if access denied |
| + if subprocess.call(['hg', 'push', '--repository', repo, hg_upstream]) != 0: |
| + with open(repo + '.patch', 'wb') as file: |
| + print('couldnt push, making patch instead') |
| + subprocess.check_call(['hg', 'export', '--repository', repo], |
| + stdout=file) |
| + |
| + |
| +class Parser(html.parser.HTMLParser): |
| + result = [] |
| + recordtr = False |
| + cell = 0 |
| + current_url = '' |
| + |
| + def handle_starttag(self, tag, attrs): |
| + if tag == 'tr': |
| + self.recordtr = True |
| + if tag == 'td': |
| + self.cell += 1 |
| + if tag == 'a': |
| + attrs = dict(attrs) |
| + if 'list' in attrs.get('class', '').split(): |
| + self.current_url = attrs['href'] |
| + |
| + def handle_endtag(self, tag): |
| + if tag == 'tr': |
| + self.recordtr = False |
| + self.cell = 0 |
| + |
| + def handle_data(self, data): |
| + if self.cell == 2 and self.recordtr is True: |
| + self.recordtr = False |
| + self.cell = 0 |
| + # Only process the URL if the description is not Deprecated |
| + if ('*DEPRECATED*' not in data and '(Deprecated)' not in data and |
| + len(self.current_url) > 2): |
| + self.result += [self.current_url] |
| + return self.result |
| + |
| + |
| +def extract_urls(hg_page): |
| + base_url = os.path.dirname(hg_page) + '/' |
| + parser = Parser() |
| + with urllib.request.urlopen(hg_page) as response: |
| + parser.feed(response.read().decode('utf-8')) |
| + parser.close() |
| + repo_urls = [] |
| + for url in parser.result: |
| + repo_urls.append(urllib.parse.urljoin(base_url, url)) |
| + return repo_urls |
| + |
| + |
| +def main(hg_page, hg_upstream): |
| + for repo in extract_urls(hg_page): |
| + process_repo(repo, hg_upstream) |
| + |
| + |
| +if __name__ == '__main__': |
| + arg_parser = argparse.ArgumentParser() |
| + arg_parser.add_argument('-u', '--hg-url', |
| + help='specify which Mercurial URL site to scrape', |
| + default='https://hg.adblockplus.org/') |
| + arg_parser.add_argument('-p', '--push-url', |
| + default='ssh://hg@hg.adblockplus.org/', |
| + help='specify where to push the repository') |
| + args = arg_parser.parse_args() |
| + if (args.hg_url != 'https://hg.adblockplus.org/' and args.push_url == |
| + 'ssh://hg@hg.adblockplus.org/'): |
| + arg_parser.error('If -u is provided, -p is mandatory') |
| + main(args.hg_url, args.push_url) |