update-copyright/update_copyright.py - Issue 29459580: Issue 5250 - Add copyright update script

Side by Side Diff: update-copyright/update_copyright.py

Issue 29459580: Issue 5250 - Add copyright update script (Closed) Base URL: https://hg.adblockplus.org/codingtools

Patch Set: Remove the wrapper Created July 7, 2017, 4:43 p.m.

Left:
Right:

Use n/p to move between diff chunks; N/P to move between comments.

Jump to:

View unified diff | Download patch

OLD	NEW
(Empty)
	1 #!/usr/bin/env python3

	2

	3 import os

	4 import subprocess

	5 import re

	6 import datetime

	7 import shutil

	8 import urllib.parse

	9 import urllib.request

	10 import html.parser

	11 import argparse

	12

	13

	14 CURRENT_YEAR = datetime.datetime.now().year

	15

	16

	17 def process_repo(url, hg_upstream):

	18 repo = url.rstrip('/').split('/')[-1]

	19

	20 if repo in {

	21 # headers are copied from libadblockplus, no need to update seperately

	22 'libadblockplus-binaries',

	23 # huge and only contains autogenerated builds

	24 'downloads',

	25 }:

	26 return

	27

	28 try:

	29 subprocess.check_call(['hg', 'clone', url, repo])

	30 if repo == 'adblockbrowser':

	31 # adblockbrowser is a FF fork with its own changes in a

	32 # seperate branch

	33 subprocess.check_call(['hg', 'up', '--rev', 'adblockbrowser',

	34 '--repository', repo])

	35 else:

	36 # switch to 'master' bookmark if it exists

	37 subprocess.call(['hg', 'up', '--rev', 'master',

	38 '--repository', repo])

	39 for dirpath, dirnames, filenames in os.walk(repo):

	40 if dirpath == repo:

	41 dirnames.remove('.hg')

	42

	43 for filename in filenames:

	44 text_replace(dirpath, filename)

	45 if hg_upstream is None:

	46 hg_upstream = url

	47 else:

	48 hg_upstream += '/' + repo

	49 hg_commit(repo, hg_upstream)

	50

	51 finally:

	52 shutil.rmtree(repo, ignore_errors=True)

	53

	54

	55 def text_replace(dirpath, filename):

	56 with open(os.path.join(dirpath, filename), 'r+',

	57 encoding='utf-8', newline='') as file:

	58 try:

	59 text = file.read()

	60 except UnicodeDecodeError:

	61 return

	62

	63 text = re.sub(

	64 r'(copyright.*?\d{4})(?:-\d{4})?\s+eyeo gmbh',

	65 r'\1-{} eyeo GmbH'.format(CURRENT_YEAR), text, 0, re.I

	66 )

	67 file.seek(0)

	68 file.write(text)

	69 file.truncate()

	70

	71

	72 def hg_commit(repo, hg_upstream):

	73 try:

	74 subprocess.check_call(['hg', 'commit', '-m',

	75 'Noissue - Updated copyright year',

	76 '--repository', repo])

	77 except subprocess.CalledProcessError as e:

	78 if e.returncode == 1: # no changes

	79 return

	80 raise

	81

	82 # Push changes, or save patch if access denied

	83 if subprocess.call(['hg', 'push', '--repository', repo, hg_upstream]) != 0:

	84 with open(repo + '.patch', 'wb') as file:

	85 print('couldnt push, making patch instead')

	86 subprocess.check_call(['hg', 'export', '--repository', repo],

	87 stdout=file)

	88

	89

	90 class Parser(html.parser.HTMLParser):

	91 result = []

	92 recordtr = False

	93 cell = 0

	94 current_url = ''

	95

	96 def handle_starttag(self, tag, attrs):

	97 if tag == 'tr':

	98 self.recordtr = True

	99 if tag == 'td':

	100 self.cell += 1

	101 if tag == 'a':

	102 attrs = dict(attrs)

	103 if 'list' in attrs.get('class', '').split():

	104 self.current_url = attrs['href']

	105

	106 def handle_endtag(self, tag):

	107 if tag == 'tr':

	108 self.recordtr = False

	109 self.cell = 0

	110

	111 def handle_data(self, data):

	112 if self.cell == 2 and self.recordtr is True:

	113 self.recordtr = False

	114 self.cell = 0

	115 # Only process the URL if the description is not Deprecated

	116 if ('DEPRECATED' not in data and '(Deprecated)' not in data and

	117 len(self.current_url) > 2):

	118 self.result += [self.current_url]

	119 return self.result

	120

	121

	122 def extract_urls(hg_page):

	123 base_url = os.path.dirname(hg_page) + '/'

	124 parser = Parser()

	125 with urllib.request.urlopen(hg_page) as response:

	126 parser.feed(response.read().decode('utf-8'))

	127 parser.close()

	128 repo_urls = []

	129 for url in parser.result:

	130 repo_urls.append(urllib.parse.urljoin(base_url, url))

	131 return repo_urls

	132

	133

	134 def main(hg_page, hg_upstream):

	135 for repo in extract_urls(hg_page):

	136 process_repo(repo, hg_upstream)

	137

	138

	139 if __name__ == '__main__':

	140 arg_parser = argparse.ArgumentParser()

	141 arg_parser.add_argument('-u', '--hg-url',

	142 help='specify which Mercurial URL site to scrape',

	143 default='https://hg.adblockplus.org/')

	144 arg_parser.add_argument('-p', '--push-url',

	145 default='ssh://hg@hg.adblockplus.org/',

	146 help='specify where to push the repository')

	147 args = arg_parser.parse_args()

	148 if (args.hg_url != 'https://hg.adblockplus.org/' and args.push_url ==

	149 'ssh://hg@hg.adblockplus.org/'):
	Sebastian Noack 2017/07/07 16:58:31 Perhaps it would be worth to handle the defaults m Perhaps it would be worth to handle the defaults manually, in order to avoid duplication. It also seems to make the logic easier to follow (for me at least). arg_parser.add_argument('-u', '--hg-url', help='specify which Mercurial URL site to scrape') arg_parser.add_argument('-p', '--push-url', help='specify where to push the repository') args = arg_parser.parse_args() if args.hg_url and not args.push_url: arg_parser.error('If -u is provided, -p is mandatory') main(args.hg_url or 'https://hg.adblockplus.org/', args.push_url or 'ssh://hg@hg.adblockplus.org/') What do you think? Sebastian Noack 2017/07/17 10:19:54 What is about this comment? Show quoted text On 2017/07/07 16:58:31, Sebastian Noack wrote: > Perhaps it would be worth to handle the defaults manually, in order to avoid > duplication. It also seems to make the logic easier to follow (for me at least). > > arg_parser.add_argument('-u', '--hg-url', > help='specify which Mercurial URL site to scrape') > arg_parser.add_argument('-p', '--push-url', > help='specify where to push the repository') > args = arg_parser.parse_args() > if args.hg_url and not args.push_url: > arg_parser.error('If -u is provided, -p is mandatory') > main(args.hg_url or 'https://hg.adblockplus.org/', > args.push_url or 'ssh://hg@hg.adblockplus.org/') > > What do you think? What is about this comment? rosie 2017/07/17 13:23:17 Done. Show quoted text On 2017/07/07 16:58:31, Sebastian Noack wrote: > Perhaps it would be worth to handle the defaults manually, in order to avoid > duplication. It also seems to make the logic easier to follow (for me at least). > > arg_parser.add_argument('-u', '--hg-url', > help='specify which Mercurial URL site to scrape') > arg_parser.add_argument('-p', '--push-url', > help='specify where to push the repository') > args = arg_parser.parse_args() > if args.hg_url and not args.push_url: > arg_parser.error('If -u is provided, -p is mandatory') > main(args.hg_url or 'https://hg.adblockplus.org/', > args.push_url or 'ssh://hg@hg.adblockplus.org/') > > What do you think? Done. rosie 2017/07/17 13:23:17 Yeah, makes sense. I like this way better. Show quoted text On 2017/07/17 10:19:54, Sebastian Noack wrote: > On 2017/07/07 16:58:31, Sebastian Noack wrote: > > Perhaps it would be worth to handle the defaults manually, in order to avoid > > duplication. It also seems to make the logic easier to follow (for me at > least). > > > > arg_parser.add_argument('-u', '--hg-url', > > help='specify which Mercurial URL site to scrape') > > arg_parser.add_argument('-p', '--push-url', > > help='specify where to push the repository') > > args = arg_parser.parse_args() > > if args.hg_url and not args.push_url: > > arg_parser.error('If -u is provided, -p is mandatory') > > main(args.hg_url or 'https://hg.adblockplus.org/', > > args.push_url or 'ssh://hg@hg.adblockplus.org/') > > > > What do you think? > > What is about this comment? Yeah, makes sense. I like this way better.
	150 arg_parser.error('If -u is provided, -p is mandatory')

	151 main(args.hg_url, args.push_url)

OLD	NEW

« no previous file with comments | « update-copyright/tox.ini ('k') | no next file » | no next file with comments »