update-copyright/update_copyright.py - Issue 29459580: Issue 5250 - Add copyright update script

Delta Between Two Patch Sets: update-copyright/update_copyright.py

Issue 29459580: Issue 5250 - Add copyright update script (Closed) Base URL: https://hg.adblockplus.org/codingtools

Left Patch Set: Addressed more comments Created July 3, 2017, 3:31 p.m.

Right Patch Set: Fix indentation and default args Created July 17, 2017, 1:22 p.m.

Left:
Right:

Use n/p to move between diff chunks; N/P to move between comments.

Jump to:

Left: Side by side diff | Download
Right: Side by side diff | Download

LEFT	RIGHT
1 #!/usr/bin/env python3	1 #!/usr/bin/env python3

2	2

3 import os	3 import os

4 import sys

5 import subprocess	4 import subprocess

6 import re	5 import re

7 import datetime	6 import datetime

8 import shutil	7 import shutil

9 import urllib.parse	8 import urllib.parse

10 import urllib.request	9 import urllib.request

11 import html.parser	10 import html.parser

12 import argparse	11 import argparse

13 from posixpath import dirname

14	12

15	13

16 CURRENT_YEAR = datetime.datetime.now().year	14 CURRENT_YEAR = datetime.datetime.now().year

17	15

18	16

19 def process_repo(url, hg_upstream):	17 def process_repo(url, hg_upstream):

20 repo = url.rstrip('/').split('/')[-1]	18 repo = url.rstrip('/').split('/')[-1]

21	19

22 if repo in {	20 if repo in {

23 # headers are copied from libadblockplus, no need to update seperately	21 # headers are copied from libadblockplus, no need to update seperately

(...skipping 15 matching lines...) Expand all Loading...
39 subprocess.call(['hg', 'up', '--rev', 'master',	37 subprocess.call(['hg', 'up', '--rev', 'master',

40 '--repository', repo])	38 '--repository', repo])

41 for dirpath, dirnames, filenames in os.walk(repo):	39 for dirpath, dirnames, filenames in os.walk(repo):

42 if dirpath == repo:	40 if dirpath == repo:

43 dirnames.remove('.hg')	41 dirnames.remove('.hg')

44	42

45 for filename in filenames:	43 for filename in filenames:

46 text_replace(dirpath, filename)	44 text_replace(dirpath, filename)

47 if hg_upstream is None:	45 if hg_upstream is None:

48 hg_upstream = url	46 hg_upstream = url

	47 else:

	48 hg_upstream += '/' + repo

49 hg_commit(repo, hg_upstream)	49 hg_commit(repo, hg_upstream)

50	50

51 finally:	51 finally:

52 shutil.rmtree(repo, ignore_errors=True)	52 shutil.rmtree(repo, ignore_errors=True)

53	53

54	54

55 def text_replace(dirpath, filename):	55 def text_replace(dirpath, filename):

56 with open(os.path.join(dirpath, filename), 'r+',	56 with open(os.path.join(dirpath, filename), 'r+',

57 encoding='utf-8', newline='') as file:	57 encoding='utf-8', newline='') as file:

58 try:	58 try:

59 text = file.read()	59 text = file.read()

60 except UnicodeDecodeError:	60 except UnicodeDecodeError:

61 print("Error: Couldn't read {}{}".format(dirpath, filename))

62 return	61 return

63	62

64 text = re.sub(	63 text = re.sub(

65 r'(copyright.*?\d{4})(?:-\d{4})?\s+eyeo gmbh',	64 r'(copyright.*?\d{4})(?:-\d{4})?\s+eyeo gmbh',

66 r'\1-{} eyeo GmbH'.format(CURRENT_YEAR), text, 0, re.I	65 r'\1-{} eyeo GmbH'.format(CURRENT_YEAR), text, 0, re.I

67 )	66 )

68 file.seek(0)	67 file.seek(0)

69 file.write(text)	68 file.write(text)

70 file.truncate()	69 file.truncate()

71	70

72	71

73 def hg_commit(repo, hg_upstream):	72 def hg_commit(repo, hg_upstream):

74 try:	73 try:

75 subprocess.check_call(['hg', 'commit', '-m',	74 subprocess.check_call(['hg', 'commit', '-m',

76 'Noissue - Updated copyright year',	75 'Noissue - Updated copyright year',

77 '--repository', repo])	76 '--repository', repo])

78 except subprocess.CalledProcessError as e:	77 except subprocess.CalledProcessError as e:

79 if e.returncode == 1: # no changes	78 if e.returncode == 1: # no changes

80 return	79 return

81 raise	80 raise

82	81

83 # Push changes, or save patch if access denied	82 # Push changes, or save patch if access denied

84 if 'ssh://hg@hg.adblockplus.org/' in hg_upstream:
Vasily Kuznetsov 2017/07/03 19:25:47 Here we still have hardcoded logic related to hg.a Here we still have hardcoded logic related to hg.adblockplus.org. I think a better way could be to remove this `if` here and add an `else` branch to the `if` in lines 47-48 so that if `hg_upstream` is given by an option, we consider it a base url and append the repository name to it. Something like: if hg_upstream is None: hg_upstream = url else: hg_upstream += '/' + repo Seems like it would cover our use case and also work correctly with other possible values of the -p option. rosie 2017/07/04 13:38:25 Done. Show quoted text On 2017/07/03 19:25:47, Vasily Kuznetsov wrote: > Here we still have hardcoded logic related to http://hg.adblockplus.org. I think a > better way could be to remove this `if` here and add an `else` branch to the > `if` in lines 47-48 so that if `hg_upstream` is given by an option, we consider > it a base url and append the repository name to it. Something like: > > if hg_upstream is None: > hg_upstream = url > else: > hg_upstream += '/' + repo > > Seems like it would cover our use case and also work correctly with other > possible values of the -p option. Done.
85 hg_upstream += repo

86 if subprocess.call(['hg', 'push', '--repository', repo, hg_upstream]) != 0:	83 if subprocess.call(['hg', 'push', '--repository', repo, hg_upstream]) != 0:

87 with open(repo + '.patch', 'wb') as file:	84 with open(repo + '.patch', 'wb') as file:

88 print('couldnt push, making patch instead')	85 print('couldnt push, making patch instead')

89 subprocess.check_call(['hg', 'export', '--repository', repo],	86 subprocess.check_call(['hg', 'export', '--repository', repo],

90 stdout=file)	87 stdout=file)

91	88

92	89

93 class Parser(html.parser.HTMLParser):	90 class Parser(html.parser.HTMLParser):

94 result = []	91 result = []

95 recordtr = False	92 recordtr = False

(...skipping 13 matching lines...) Expand all Loading...
109 def handle_endtag(self, tag):	106 def handle_endtag(self, tag):

110 if tag == 'tr':	107 if tag == 'tr':

111 self.recordtr = False	108 self.recordtr = False

112 self.cell = 0	109 self.cell = 0

113	110

114 def handle_data(self, data):	111 def handle_data(self, data):

115 if self.cell == 2 and self.recordtr is True:	112 if self.cell == 2 and self.recordtr is True:

116 self.recordtr = False	113 self.recordtr = False

117 self.cell = 0	114 self.cell = 0

118 # Only process the URL if the description is not Deprecated	115 # Only process the URL if the description is not Deprecated

119 deprecated = (re.search(r'\DEPRECATED\', data) or	116 if ('DEPRECATED' not in data and

120 re.search(r'(Deprecated)', data))	117 '(Deprecated)' not in data and

121 if not deprecated and len(self.current_url) > 2:	118 len(self.current_url) > 2):

122 self.result += [self.current_url]	119 self.result += [self.current_url]

123 return self.result	120 return self.result

124	121

125	122

126 def extract_urls(hg_page):	123 def extract_urls(hg_page):

127 base_url = dirname(hg_page) + '/'	124 base_url = os.path.dirname(hg_page) + '/'
Vasily Kuznetsov 2017/07/03 19:25:47 This will probably still break on some non-POSIX s This will probably still break on some non-POSIX systems (like Windows), but I guess it's ok, many other things wouldn't work either and we can probably live with this script being POSIX-only. rosie 2017/07/04 13:38:26 Acknowledged. Show quoted text On 2017/07/03 19:25:47, Vasily Kuznetsov wrote: > This will probably still break on some non-POSIX systems (like Windows), but I > guess it's ok, many other things wouldn't work either and we can probably live > with this script being POSIX-only. Acknowledged.
128 parser = Parser()	125 parser = Parser()

129 with urllib.request.urlopen(hg_page) as response:	126 with urllib.request.urlopen(hg_page) as response:

130 parser.feed(response.read().decode('utf-8'))	127 parser.feed(response.read().decode('utf-8'))

131 parser.close()	128 parser.close()

132 repo_urls = []	129 repo_urls = []

133 for url in parser.result:	130 for url in parser.result:

134 repo_urls.append(urllib.parse.urljoin(base_url, url))	131 repo_urls.append(urllib.parse.urljoin(base_url, url))

135 return repo_urls	132 return repo_urls

136	133

137	134

138 def main(hg_page, hg_upstream):	135 def main(hg_page, hg_upstream):

139 for repo in extract_urls(hg_page):	136 for repo in extract_urls(hg_page):

140 process_repo(repo, hg_upstream)	137 process_repo(repo, hg_upstream)

141	138

142	139

143 if __name__ == '__main__':	140 if __name__ == '__main__':

144 arg_parser = argparse.ArgumentParser()	141 arg_parser = argparse.ArgumentParser()

145 arg_parser.add_argument('-u', '--hg-url',	142 arg_parser.add_argument('-u', '--hg-url',

146 default=None,

147 help='specify which Mercurial URL site to scrape')	143 help='specify which Mercurial URL site to scrape')

148 arg_parser.add_argument('-p', '--push-url',	144 arg_parser.add_argument('-p', '--push-url',

149 default=None,

150 help='specify where to push the repository')	145 help='specify where to push the repository')

151 args = arg_parser.parse_args()	146 args = arg_parser.parse_args()

152 if args.hg_url is None:	147 if args.hg_url and not args.push_url:
Vasily Kuznetsov 2017/07/03 19:25:47 Actually this is not necessary. If you make this o Actually this is not necessary. If you make this option mandatory (check argparse documentation), argparse will signal errors for you. rosie 2017/07/04 13:38:25 Done. Show quoted text On 2017/07/03 19:25:47, Vasily Kuznetsov wrote: > Actually this is not necessary. If you make this option mandatory (check > argparse documentation), argparse will signal errors for you. Done.
153 arg_parser.error('-u HG_URL was not specified')	148 arg_parser.error('If -u is provided, -p is mandatory')

154 sys.exit(2)	149 main(args.hg_url or 'https://hg.adblockplus.org/',

155 hg_page = args.hg_url	150 args.push_url or 'ssh://hg@hg.adblockplus.org/')
Vasily Kuznetsov 2017/07/03 19:25:47 Do you think these intermediate variables add valu Do you think these intermediate variables add value? I would probably remove them (and call `main` with `args.hg_url` and `args.push_url` directly), but I don't feel very strongly about it so whatever seems better to you is cool. rosie 2017/07/04 13:38:25 Done. Show quoted text On 2017/07/03 19:25:47, Vasily Kuznetsov wrote: > Do you think these intermediate variables add value? I would probably remove > them (and call `main` with `args.hg_url` and `args.push_url` directly), but I > don't feel very strongly about it so whatever seems better to you is cool. Done.
156 hg_upstream = args.push_url

157 main(hg_page, hg_upstream)

LEFT	RIGHT