update-copyright/update_copyright.py - Issue 29459580: Issue 5250 - Add copyright update script

Side by Side Diff: update-copyright/update_copyright.py

Issue 29459580: Issue 5250 - Add copyright update script (Closed) Base URL: https://hg.adblockplus.org/codingtools

Patch Set: Addressed more comments Created July 3, 2017, 3:31 p.m.

Left:
Right:

Use n/p to move between diff chunks; N/P to move between comments.

Jump to:

View unified diff | Download patch

OLD	NEW
(Empty)
	1 #!/usr/bin/env python3

	2

	3 import os

	4 import sys

	5 import subprocess

	6 import re

	7 import datetime

	8 import shutil

	9 import urllib.parse

	10 import urllib.request

	11 import html.parser

	12 import argparse

	13 from posixpath import dirname

	14

	15

	16 CURRENT_YEAR = datetime.datetime.now().year

	17

	18

	19 def process_repo(url, hg_upstream):

	20 repo = url.rstrip('/').split('/')[-1]

	21

	22 if repo in {

	23 # headers are copied from libadblockplus, no need to update seperately

	24 'libadblockplus-binaries',

	25 # huge and only contains autogenerated builds

	26 'downloads',

	27 }:

	28 return

	29

	30 try:

	31 subprocess.check_call(['hg', 'clone', url, repo])

	32 if repo == 'adblockbrowser':

	33 # adblockbrowser is a FF fork with its own changes in a

	34 # seperate branch

	35 subprocess.check_call(['hg', 'up', '--rev', 'adblockbrowser',

	36 '--repository', repo])

	37 else:

	38 # switch to 'master' bookmark if it exists

	39 subprocess.call(['hg', 'up', '--rev', 'master',

	40 '--repository', repo])

	41 for dirpath, dirnames, filenames in os.walk(repo):

	42 if dirpath == repo:

	43 dirnames.remove('.hg')

	44

	45 for filename in filenames:

	46 text_replace(dirpath, filename)

	47 if hg_upstream is None:

	48 hg_upstream = url

	49 hg_commit(repo, hg_upstream)

	50

	51 finally:

	52 shutil.rmtree(repo, ignore_errors=True)

	53

	54

	55 def text_replace(dirpath, filename):

	56 with open(os.path.join(dirpath, filename), 'r+',

	57 encoding='utf-8', newline='') as file:

	58 try:

	59 text = file.read()

	60 except UnicodeDecodeError:

	61 print("Error: Couldn't read {}{}".format(dirpath, filename))

	62 return

	63

	64 text = re.sub(

	65 r'(copyright.*?\d{4})(?:-\d{4})?\s+eyeo gmbh',

	66 r'\1-{} eyeo GmbH'.format(CURRENT_YEAR), text, 0, re.I

	67 )

	68 file.seek(0)

	69 file.write(text)

	70 file.truncate()

	71

	72

	73 def hg_commit(repo, hg_upstream):

	74 try:

	75 subprocess.check_call(['hg', 'commit', '-m',

	76 'Noissue - Updated copyright year',

	77 '--repository', repo])

	78 except subprocess.CalledProcessError as e:

	79 if e.returncode == 1: # no changes

	80 return

	81 raise

	82

	83 # Push changes, or save patch if access denied

	84 if 'ssh://hg@hg.adblockplus.org/' in hg_upstream:
	Vasily Kuznetsov 2017/07/03 19:25:47 Here we still have hardcoded logic related to hg.a Here we still have hardcoded logic related to hg.adblockplus.org. I think a better way could be to remove this `if` here and add an `else` branch to the `if` in lines 47-48 so that if `hg_upstream` is given by an option, we consider it a base url and append the repository name to it. Something like: if hg_upstream is None: hg_upstream = url else: hg_upstream += '/' + repo Seems like it would cover our use case and also work correctly with other possible values of the -p option. rosie 2017/07/04 13:38:25 Done. Show quoted text On 2017/07/03 19:25:47, Vasily Kuznetsov wrote: > Here we still have hardcoded logic related to http://hg.adblockplus.org. I think a > better way could be to remove this `if` here and add an `else` branch to the > `if` in lines 47-48 so that if `hg_upstream` is given by an option, we consider > it a base url and append the repository name to it. Something like: > > if hg_upstream is None: > hg_upstream = url > else: > hg_upstream += '/' + repo > > Seems like it would cover our use case and also work correctly with other > possible values of the -p option. Done.
	85 hg_upstream += repo

	86 if subprocess.call(['hg', 'push', '--repository', repo, hg_upstream]) != 0:

	87 with open(repo + '.patch', 'wb') as file:

	88 print('couldnt push, making patch instead')

	89 subprocess.check_call(['hg', 'export', '--repository', repo],

	90 stdout=file)

	91

	92

	93 class Parser(html.parser.HTMLParser):

	94 result = []

	95 recordtr = False

	96 cell = 0

	97 current_url = ''

	98

	99 def handle_starttag(self, tag, attrs):

	100 if tag == 'tr':

	101 self.recordtr = True

	102 if tag == 'td':

	103 self.cell += 1

	104 if tag == 'a':

	105 attrs = dict(attrs)

	106 if 'list' in attrs.get('class', '').split():

	107 self.current_url = attrs['href']

	108

	109 def handle_endtag(self, tag):

	110 if tag == 'tr':

	111 self.recordtr = False

	112 self.cell = 0

	113

	114 def handle_data(self, data):

	115 if self.cell == 2 and self.recordtr is True:

	116 self.recordtr = False

	117 self.cell = 0

	118 # Only process the URL if the description is not Deprecated

	119 deprecated = (re.search(r'\DEPRECATED\', data) or

	120 re.search(r'(Deprecated)', data))

	121 if not deprecated and len(self.current_url) > 2:

	122 self.result += [self.current_url]

	123 return self.result

	124

	125

	126 def extract_urls(hg_page):

	127 base_url = dirname(hg_page) + '/'
	Vasily Kuznetsov 2017/07/03 19:25:47 This will probably still break on some non-POSIX s This will probably still break on some non-POSIX systems (like Windows), but I guess it's ok, many other things wouldn't work either and we can probably live with this script being POSIX-only. rosie 2017/07/04 13:38:26 Acknowledged. Show quoted text On 2017/07/03 19:25:47, Vasily Kuznetsov wrote: > This will probably still break on some non-POSIX systems (like Windows), but I > guess it's ok, many other things wouldn't work either and we can probably live > with this script being POSIX-only. Acknowledged.
	128 parser = Parser()

	129 with urllib.request.urlopen(hg_page) as response:

	130 parser.feed(response.read().decode('utf-8'))

	131 parser.close()

	132 repo_urls = []

	133 for url in parser.result:

	134 repo_urls.append(urllib.parse.urljoin(base_url, url))

	135 return repo_urls

	136

	137

	138 def main(hg_page, hg_upstream):

	139 for repo in extract_urls(hg_page):

	140 process_repo(repo, hg_upstream)

	141

	142

	143 if __name__ == '__main__':

	144 arg_parser = argparse.ArgumentParser()

	145 arg_parser.add_argument('-u', '--hg-url',

	146 default=None,

	147 help='specify which Mercurial URL site to scrape')

	148 arg_parser.add_argument('-p', '--push-url',

	149 default=None,

	150 help='specify where to push the repository')

	151 args = arg_parser.parse_args()

	152 if args.hg_url is None:
	Vasily Kuznetsov 2017/07/03 19:25:47 Actually this is not necessary. If you make this o Actually this is not necessary. If you make this option mandatory (check argparse documentation), argparse will signal errors for you. rosie 2017/07/04 13:38:25 Done. Show quoted text On 2017/07/03 19:25:47, Vasily Kuznetsov wrote: > Actually this is not necessary. If you make this option mandatory (check > argparse documentation), argparse will signal errors for you. Done.
	153 arg_parser.error('-u HG_URL was not specified')

	154 sys.exit(2)

	155 hg_page = args.hg_url
	Vasily Kuznetsov 2017/07/03 19:25:47 Do you think these intermediate variables add valu Do you think these intermediate variables add value? I would probably remove them (and call `main` with `args.hg_url` and `args.push_url` directly), but I don't feel very strongly about it so whatever seems better to you is cool. rosie 2017/07/04 13:38:25 Done. Show quoted text On 2017/07/03 19:25:47, Vasily Kuznetsov wrote: > Do you think these intermediate variables add value? I would probably remove > them (and call `main` with `args.hg_url` and `args.push_url` directly), but I > don't feel very strongly about it so whatever seems better to you is cool. Done.
	156 hg_upstream = args.push_url

	157 main(hg_page, hg_upstream)

OLD	NEW

« update-copyright/tests/test_update_copyright.py ('K') | « update-copyright/tox.ini ('k') | no next file » | no next file with comments »