update-copyright/update_copyright.py - Issue 29459580: Issue 5250 - Add copyright update script

Unified Diff: update-copyright/update_copyright.py

Issue 29459580: Issue 5250 - Add copyright update script (Closed) Base URL: https://hg.adblockplus.org/codingtools

Patch Set: Addressed more comments Created July 3, 2017, 3:31 p.m.

Use n/p to move between diff chunks; N/P to move between comments.

Jump to:

View side-by-side diff with in-line comments

Index: update-copyright/update_copyright.py

===================================================================

new file mode 100644

--- /dev/null

+++ b/update-copyright/update_copyright.py

@@ -0,0 +1,157 @@

+#!/usr/bin/env python3

+import os

+import sys

+import subprocess

+import re

+import datetime

+import shutil

+import urllib.parse

+import urllib.request

+import html.parser

+import argparse

+from posixpath import dirname

+CURRENT_YEAR = datetime.datetime.now().year

+def process_repo(url, hg_upstream):

+ repo = url.rstrip('/').split('/')[-1]

+ if repo in {

+ # headers are copied from libadblockplus, no need to update seperately

+ 'libadblockplus-binaries',

+ # huge and only contains autogenerated builds

+ 'downloads',

+ }:

+ return

+ try:

+ subprocess.check_call(['hg', 'clone', url, repo])

+ if repo == 'adblockbrowser':

+ # adblockbrowser is a FF fork with its own changes in a

+ # seperate branch

+ subprocess.check_call(['hg', 'up', '--rev', 'adblockbrowser',

+ '--repository', repo])

+ else:

+ # switch to 'master' bookmark if it exists

+ subprocess.call(['hg', 'up', '--rev', 'master',

+ '--repository', repo])

+ for dirpath, dirnames, filenames in os.walk(repo):

+ if dirpath == repo:

+ dirnames.remove('.hg')

+ for filename in filenames:

+ text_replace(dirpath, filename)

+ if hg_upstream is None:

+ hg_upstream = url

+ hg_commit(repo, hg_upstream)

+ finally:

+ shutil.rmtree(repo, ignore_errors=True)

+def text_replace(dirpath, filename):

+ with open(os.path.join(dirpath, filename), 'r+',

+ encoding='utf-8', newline='') as file:

+ try:

+ text = file.read()

+ except UnicodeDecodeError:

+ print("Error: Couldn't read {}{}".format(dirpath, filename))

+ return

+ text = re.sub(

+ r'(copyright.*?\d{4})(?:-\d{4})?\s+eyeo gmbh',

+ r'\1-{} eyeo GmbH'.format(CURRENT_YEAR), text, 0, re.I

+ )

+ file.seek(0)

+ file.write(text)

+ file.truncate()

+def hg_commit(repo, hg_upstream):

+ try:

+ subprocess.check_call(['hg', 'commit', '-m',

+ 'Noissue - Updated copyright year',

+ '--repository', repo])

+ except subprocess.CalledProcessError as e:

+ if e.returncode == 1: # no changes

+ return

+ raise

+ # Push changes, or save patch if access denied

+ if 'ssh://hg@hg.adblockplus.org/' in hg_upstream:

Vasily Kuznetsov 2017/07/03 19:25:47 Here we still have hardcoded logic related to hg.a

rosie 2017/07/04 13:38:25 Done.

+ hg_upstream += repo

+ if subprocess.call(['hg', 'push', '--repository', repo, hg_upstream]) != 0:

+ with open(repo + '.patch', 'wb') as file:

+ print('couldnt push, making patch instead')

+ subprocess.check_call(['hg', 'export', '--repository', repo],

+ stdout=file)

+class Parser(html.parser.HTMLParser):

+ result = []

+ recordtr = False

+ cell = 0

+ current_url = ''

+ def handle_starttag(self, tag, attrs):

+ if tag == 'tr':

+ self.recordtr = True

+ if tag == 'td':

+ self.cell += 1

+ if tag == 'a':

+ attrs = dict(attrs)

+ if 'list' in attrs.get('class', '').split():

+ self.current_url = attrs['href']

+ def handle_endtag(self, tag):

+ if tag == 'tr':

+ self.recordtr = False

+ self.cell = 0

+ def handle_data(self, data):

+ if self.cell == 2 and self.recordtr is True:

+ self.recordtr = False

+ self.cell = 0

+ # Only process the URL if the description is not Deprecated

+ deprecated = (re.search(r'\*DEPRECATED\*', data) or

+ re.search(r'(Deprecated)', data))

+ if not deprecated and len(self.current_url) > 2:

+ self.result += [self.current_url]

+ return self.result

+def extract_urls(hg_page):

+ base_url = dirname(hg_page) + '/'

Vasily Kuznetsov 2017/07/03 19:25:47 This will probably still break on some non-POSIX s

rosie 2017/07/04 13:38:26 Acknowledged.

+ parser = Parser()

+ with urllib.request.urlopen(hg_page) as response:

+ parser.feed(response.read().decode('utf-8'))

+ parser.close()

+ repo_urls = []

+ for url in parser.result:

+ repo_urls.append(urllib.parse.urljoin(base_url, url))

+ return repo_urls

+def main(hg_page, hg_upstream):

+ for repo in extract_urls(hg_page):

+ process_repo(repo, hg_upstream)

+if __name__ == '__main__':

+ arg_parser = argparse.ArgumentParser()

+ arg_parser.add_argument('-u', '--hg-url',

+ default=None,

+ help='specify which Mercurial URL site to scrape')

+ arg_parser.add_argument('-p', '--push-url',

+ default=None,

+ help='specify where to push the repository')

+ args = arg_parser.parse_args()

+ if args.hg_url is None:

Vasily Kuznetsov 2017/07/03 19:25:47 Actually this is not necessary. If you make this o

rosie 2017/07/04 13:38:25 Done.

+ arg_parser.error('-u HG_URL was not specified')

+ sys.exit(2)

+ hg_page = args.hg_url

Vasily Kuznetsov 2017/07/03 19:25:47 Do you think these intermediate variables add valu

rosie 2017/07/04 13:38:25 Done.

+ hg_upstream = args.push_url

+ main(hg_page, hg_upstream)

« update-copyright/tests/test_update_copyright.py ('K') | « update-copyright/tox.ini ('k') | no next file » | no next file with comments »