 Issue 29459580:
  Issue 5250 - Add copyright update script  (Closed) 
  Base URL: https://hg.adblockplus.org/codingtools
    
  
    Issue 29459580:
  Issue 5250 - Add copyright update script  (Closed) 
  Base URL: https://hg.adblockplus.org/codingtools| Index: update-copyright/update_copyright.py | 
| =================================================================== | 
| new file mode 100755 | 
| --- /dev/null | 
| +++ b/update-copyright/update_copyright.py | 
| @@ -0,0 +1,151 @@ | 
| +#!/usr/bin/env python3 | 
| + | 
| +import os | 
| +import subprocess | 
| +import re | 
| +import datetime | 
| +import shutil | 
| +import urllib.parse | 
| +import urllib.request | 
| +import html.parser | 
| +import argparse | 
| + | 
| + | 
| +CURRENT_YEAR = datetime.datetime.now().year | 
| + | 
| + | 
| +def process_repo(url, hg_upstream): | 
| + repo = url.rstrip('/').split('/')[-1] | 
| + | 
| + if repo in { | 
| + # headers are copied from libadblockplus, no need to update seperately | 
| + 'libadblockplus-binaries', | 
| + # huge and only contains autogenerated builds | 
| + 'downloads', | 
| + }: | 
| + return | 
| + | 
| + try: | 
| + subprocess.check_call(['hg', 'clone', url, repo]) | 
| + if repo == 'adblockbrowser': | 
| + # adblockbrowser is a FF fork with its own changes in a | 
| + # seperate branch | 
| + subprocess.check_call(['hg', 'up', '--rev', 'adblockbrowser', | 
| + '--repository', repo]) | 
| + else: | 
| + # switch to 'master' bookmark if it exists | 
| + subprocess.call(['hg', 'up', '--rev', 'master', | 
| + '--repository', repo]) | 
| + for dirpath, dirnames, filenames in os.walk(repo): | 
| + if dirpath == repo: | 
| + dirnames.remove('.hg') | 
| + | 
| + for filename in filenames: | 
| + text_replace(dirpath, filename) | 
| + if hg_upstream is None: | 
| + hg_upstream = url | 
| + else: | 
| + hg_upstream += '/' + repo | 
| + hg_commit(repo, hg_upstream) | 
| + | 
| + finally: | 
| + shutil.rmtree(repo, ignore_errors=True) | 
| + | 
| + | 
| +def text_replace(dirpath, filename): | 
| + with open(os.path.join(dirpath, filename), 'r+', | 
| + encoding='utf-8', newline='') as file: | 
| + try: | 
| + text = file.read() | 
| + except UnicodeDecodeError: | 
| + return | 
| + | 
| + text = re.sub( | 
| + r'(copyright.*?\d{4})(?:-\d{4})?\s+eyeo gmbh', | 
| + r'\1-{} eyeo GmbH'.format(CURRENT_YEAR), text, 0, re.I | 
| + ) | 
| + file.seek(0) | 
| + file.write(text) | 
| + file.truncate() | 
| + | 
| + | 
| +def hg_commit(repo, hg_upstream): | 
| + try: | 
| + subprocess.check_call(['hg', 'commit', '-m', | 
| + 'Noissue - Updated copyright year', | 
| + '--repository', repo]) | 
| + except subprocess.CalledProcessError as e: | 
| + if e.returncode == 1: # no changes | 
| + return | 
| + raise | 
| + | 
| + # Push changes, or save patch if access denied | 
| + if subprocess.call(['hg', 'push', '--repository', repo, hg_upstream]) != 0: | 
| + with open(repo + '.patch', 'wb') as file: | 
| + print('couldnt push, making patch instead') | 
| + subprocess.check_call(['hg', 'export', '--repository', repo], | 
| + stdout=file) | 
| + | 
| + | 
| +class Parser(html.parser.HTMLParser): | 
| + result = [] | 
| + recordtr = False | 
| + cell = 0 | 
| + current_url = '' | 
| + | 
| + def handle_starttag(self, tag, attrs): | 
| + if tag == 'tr': | 
| + self.recordtr = True | 
| + if tag == 'td': | 
| + self.cell += 1 | 
| + if tag == 'a': | 
| + attrs = dict(attrs) | 
| + if 'list' in attrs.get('class', '').split(): | 
| + self.current_url = attrs['href'] | 
| + | 
| + def handle_endtag(self, tag): | 
| + if tag == 'tr': | 
| + self.recordtr = False | 
| + self.cell = 0 | 
| + | 
| + def handle_data(self, data): | 
| + if self.cell == 2 and self.recordtr is True: | 
| + self.recordtr = False | 
| + self.cell = 0 | 
| + # Only process the URL if the description is not Deprecated | 
| + if ('*DEPRECATED*' not in data and '(Deprecated)' not in data and | 
| + len(self.current_url) > 2): | 
| + self.result += [self.current_url] | 
| + return self.result | 
| + | 
| + | 
| +def extract_urls(hg_page): | 
| + base_url = os.path.dirname(hg_page) + '/' | 
| + parser = Parser() | 
| + with urllib.request.urlopen(hg_page) as response: | 
| + parser.feed(response.read().decode('utf-8')) | 
| + parser.close() | 
| + repo_urls = [] | 
| + for url in parser.result: | 
| + repo_urls.append(urllib.parse.urljoin(base_url, url)) | 
| + return repo_urls | 
| + | 
| + | 
| +def main(hg_page, hg_upstream): | 
| + for repo in extract_urls(hg_page): | 
| + process_repo(repo, hg_upstream) | 
| + | 
| + | 
| +if __name__ == '__main__': | 
| + arg_parser = argparse.ArgumentParser() | 
| + arg_parser.add_argument('-u', '--hg-url', | 
| + help='specify which Mercurial URL site to scrape', | 
| + default='https://hg.adblockplus.org/') | 
| + arg_parser.add_argument('-p', '--push-url', | 
| + default='ssh://hg@hg.adblockplus.org/', | 
| + help='specify where to push the repository') | 
| + args = arg_parser.parse_args() | 
| + if (args.hg_url != 'https://hg.adblockplus.org/' and args.push_url == | 
| + 'ssh://hg@hg.adblockplus.org/'): | 
| 
Sebastian Noack
2017/07/07 16:58:31
Perhaps it would be worth to handle the defaults m
 
Sebastian Noack
2017/07/17 10:19:54
What is about this comment?
 
rosie
2017/07/17 13:23:17
Done.
 
rosie
2017/07/17 13:23:17
Yeah, makes sense. I like this way better.
 | 
| + arg_parser.error('If -u is provided, -p is mandatory') | 
| + main(args.hg_url, args.push_url) |