Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Delta Between Two Patch Sets: update-copyright/update_copyright.py

Issue 29459580: Issue 5250 - Add copyright update script (Closed) Base URL: https://hg.adblockplus.org/codingtools
Left Patch Set: Addressed comments Created June 23, 2017, 2:36 p.m.
Right Patch Set: Fix indentation and default args Created July 17, 2017, 1:22 p.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
Left: Side by side diff | Download
Right: Side by side diff | Download
« no previous file with change/comment | « update-copyright/tox.ini ('k') | no next file » | no next file with change/comment »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
LEFTRIGHT
1 #!/usr/bin/env python3 1 #!/usr/bin/env python3
2 2
3 import os 3 import os
4 import subprocess 4 import subprocess
5 import re 5 import re
6 import datetime 6 import datetime
7 import shutil 7 import shutil
8 import urllib.parse 8 import urllib.parse
9 import urllib.request 9 import urllib.request
10 import html.parser 10 import html.parser
11 import argparse 11 import argparse
12 from posixpath import dirname
13 12
14 13
15 CURRENT_YEAR = datetime.datetime.now().year 14 CURRENT_YEAR = datetime.datetime.now().year
16 15
17 16
18 def process_repo(url, hg_upstream): 17 def process_repo(url, hg_upstream):
Vasily Kuznetsov 2017/06/27 16:18:54 Is `hg_upstream` unused in this function? It shoul
rosie 2017/07/03 15:33:46 Done.
19 repo = os.path.basename(os.path.normpath(url)) 18 repo = url.rstrip('/').split('/')[-1]
Vasily Kuznetsov 2017/06/27 16:18:51 Why are we applying `os.path.normpath` to a url he
rosie 2017/07/03 15:33:45 Done.
19
20 if repo in { 20 if repo in {
21 # headers are copied from libadblockplus, no need to update seperately 21 # headers are copied from libadblockplus, no need to update seperately
22 'libadblockplus-binaries', 22 'libadblockplus-binaries',
23 # huge and only contains autogenerated builds 23 # huge and only contains autogenerated builds
24 'downloads', 24 'downloads',
25 }: 25 }:
26 return 26 return
27 27
28 try: 28 try:
29 subprocess.check_call(['hg', 'clone', url, repo]) 29 subprocess.check_call(['hg', 'clone', url, repo])
30 if repo == 'adblockbrowser': 30 if repo == 'adblockbrowser':
31 # adblockbrowser is a FF fork with its own changes in a 31 # adblockbrowser is a FF fork with its own changes in a
32 # seperate branch 32 # seperate branch
33 subprocess.check_call(['hg', 'up', '--rev', 'adblockbrowser', 33 subprocess.check_call(['hg', 'up', '--rev', 'adblockbrowser',
34 '--repository', repo]) 34 '--repository', repo])
35 else: 35 else:
36 # switch to 'master' bookmark if it exists 36 # switch to 'master' bookmark if it exists
37 subprocess.call(['hg', 'up', '--rev', 'master', 37 subprocess.call(['hg', 'up', '--rev', 'master',
Vasily Kuznetsov 2017/06/27 16:18:54 Any idea why we're not doing `check_call` here jus
rosie 2017/07/03 15:33:45 This line attempts to switch to the master branch.
Vasily Kuznetsov 2017/07/03 19:25:45 I see. Makes sense.
38 '--repository', repo]) 38 '--repository', repo])
39 for dirpath, dirnames, filenames in os.walk(repo): 39 for dirpath, dirnames, filenames in os.walk(repo):
40 if dirpath == repo: 40 if dirpath == repo:
41 dirnames.remove('.hg') 41 dirnames.remove('.hg')
42 42
43 for filename in filenames: 43 for filename in filenames:
44 text_replace(dirpath, filename) 44 text_replace(dirpath, filename)
45 hg_commit(repo, url) 45 if hg_upstream is None:
46 hg_upstream = url
47 else:
48 hg_upstream += '/' + repo
49 hg_commit(repo, hg_upstream)
46 50
47 finally: 51 finally:
48 shutil.rmtree(repo, ignore_errors=True) 52 shutil.rmtree(repo, ignore_errors=True)
49 53
50 54
51 def text_replace(dirpath, filename): 55 def text_replace(dirpath, filename):
52 with open(os.path.join(dirpath, filename), 'r+', 56 with open(os.path.join(dirpath, filename), 'r+',
53 encoding='utf-8', newline='') as file: 57 encoding='utf-8', newline='') as file:
54 try: 58 try:
55 text = file.read() 59 text = file.read()
56 except UnicodeDecodeError: 60 except UnicodeDecodeError:
57 print("Error: Couldn't read {}{}".format(dirpath, filename))
58 return 61 return
59 62
60 text = re.sub( 63 text = re.sub(
61 r'(copyright.*?\d{4})(?:-\d{4})?\s+eyeo gmbh', 64 r'(copyright.*?\d{4})(?:-\d{4})?\s+eyeo gmbh',
62 r'\1-{} eyeo GmbH'.format(CURRENT_YEAR), text, 0, re.I 65 r'\1-{} eyeo GmbH'.format(CURRENT_YEAR), text, 0, re.I
63 ) 66 )
64 file.seek(0) 67 file.seek(0)
65 file.write(text) 68 file.write(text)
66 file.truncate() 69 file.truncate()
67 70
68 71
69 def hg_commit(repo, hg_upstream): 72 def hg_commit(repo, hg_upstream):
70 try: 73 try:
71 subprocess.check_call(['hg', 'commit', '-m', 74 subprocess.check_call(['hg', 'commit', '-m',
72 'Noissue - Updated copyright year', 75 'Noissue - Updated copyright year',
73 '--repository', repo]) 76 '--repository', repo])
74 except subprocess.CalledProcessError as e: 77 except subprocess.CalledProcessError as e:
75 if e.returncode == 1: # no changes 78 if e.returncode == 1: # no changes
76 return 79 return
77 raise 80 raise
78 81
79 # Push changes, or save patch if access denied 82 # Push changes, or save patch if access denied
80 if 'ssh://hg@hg.adblockplus.org/' in hg_upstream:
Vasily Kuznetsov 2017/06/27 16:18:52 `hg_commit` is called with the original url of the
rosie 2017/07/03 15:33:45 If we're pushing to ssh://hg@hg.adblockplus.org/ s
rosie 2017/07/04 13:38:24 If we're pushing to ssh://hg@hg.adblockplus.org/ s
81 hg_upstream += repo
82 if subprocess.call(['hg', 'push', '--repository', repo, hg_upstream]) != 0: 83 if subprocess.call(['hg', 'push', '--repository', repo, hg_upstream]) != 0:
83 with open(repo + '.patch', 'wb') as file: 84 with open(repo + '.patch', 'wb') as file:
84 print('couldnt push, making patch instead') 85 print('couldnt push, making patch instead')
85 subprocess.check_call(['hg', 'export', '--repository', repo], 86 subprocess.check_call(['hg', 'export', '--repository', repo],
86 stdout=file) 87 stdout=file)
87 88
88 89
89 class Parser(html.parser.HTMLParser): 90 class Parser(html.parser.HTMLParser):
90 result = [] 91 result = []
91 recordtr = False 92 recordtr = False
92 cell = 0 93 cell = 0
93 current_url = '' 94 current_url = ''
94 dir_path = 'https://hg.adblockplus.org/'
Vasily Kuznetsov 2017/06/27 16:18:52 Is this used anywhere?
rosie 2017/07/03 15:33:44 Done.
95 95
96 def handle_starttag(self, tag, attrs): 96 def handle_starttag(self, tag, attrs):
97 if tag == 'tr': 97 if tag == 'tr':
98 self.recordtr = True 98 self.recordtr = True
99 if tag == 'td': 99 if tag == 'td':
100 self.cell += 1 100 self.cell += 1
101 if tag == 'a': 101 if tag == 'a':
102 attrs = dict(attrs) 102 attrs = dict(attrs)
103 if 'list' in attrs.get('class', '').split(): 103 if 'list' in attrs.get('class', '').split():
104 self.current_url = attrs['href'] 104 self.current_url = attrs['href']
105 105
106 def handle_endtag(self, tag): 106 def handle_endtag(self, tag):
107 if tag == 'tr': 107 if tag == 'tr':
108 self.recordtr = False 108 self.recordtr = False
109 self.cell = 0 109 self.cell = 0
110 110
111 def handle_data(self, data): 111 def handle_data(self, data):
112 if (self.cell == 2) and self.recordtr is True: 112 if self.cell == 2 and self.recordtr is True:
Vasily Kuznetsov 2017/06/27 16:18:51 The parentheses around the first operand of `and`
rosie 2017/07/03 15:33:45 Done.
113 self.recordtr = False 113 self.recordtr = False
114 self.cell = 0 114 self.cell = 0
115 # Only process the URL if the description is not Deprecated 115 # Only process the URL if the description is not Deprecated
116 depr1 = re.search(r'\*DEPRECATED\*', data) 116 if ('*DEPRECATED*' not in data and
Vasily Kuznetsov 2017/06/27 16:18:52 I think it would make the code more readable and e
rosie 2017/07/03 15:33:44 Done.
117 depr2 = re.search(r'(Deprecated)', data) 117 '(Deprecated)' not in data and
118 if not depr1 and not depr2 and len(self.current_url) > 2: 118 len(self.current_url) > 2):
119 self.result += [self.current_url] 119 self.result += [self.current_url]
120 return self.result 120 return self.result
121 121
122 122
123 def extract_urls(hg_page): 123 def extract_urls(hg_page):
124 dir_path = dirname(hg_page) + '/' 124 base_url = os.path.dirname(hg_page) + '/'
Vasily Kuznetsov 2017/06/27 16:18:52 Two points here: - `hg_page` is an url, could we u
rosie 2017/07/03 15:33:45 Done.
125 parser = Parser() 125 parser = Parser()
126 with urllib.request.urlopen(hg_page) as response: 126 with urllib.request.urlopen(hg_page) as response:
127 parser.feed(response.read().decode('utf-8')) 127 parser.feed(response.read().decode('utf-8'))
128 parser.close() 128 parser.close()
129 my_result = [] 129 repo_urls = []
Vasily Kuznetsov 2017/06/27 16:18:52 Maybe better call this `repo_urls`?
rosie 2017/07/03 15:33:46 Done.
130 for i in range(len(parser.result)): 130 for url in parser.result:
Vasily Kuznetsov 2017/06/27 16:18:53 Why not `for url in parser.result:`?
rosie 2017/07/03 15:33:45 Done.
131 my_result.append(urllib.parse.urljoin(dir_path, parser.result[i])) 131 repo_urls.append(urllib.parse.urljoin(base_url, url))
132 return my_result 132 return repo_urls
133 133
134 134
135 def main(hg_page, hg_upstream): 135 def main(hg_page, hg_upstream):
136 repo_list = extract_urls(hg_page) 136 for repo in extract_urls(hg_page):
137 for repo in repo_list:
Vasily Kuznetsov 2017/06/27 16:18:52 Could probably just be `for repo in extract_urls(h
rosie 2017/07/03 15:33:45 Done.
138 process_repo(repo, hg_upstream) 137 process_repo(repo, hg_upstream)
139 138
140 139
141 if __name__ == '__main__': 140 if __name__ == '__main__':
142 arg_parser = argparse.ArgumentParser() 141 arg_parser = argparse.ArgumentParser()
143 arg_parser.add_argument('-u', '--url', 142 arg_parser.add_argument('-u', '--hg-url',
144 default='https://hg.adblockplus.org/',
145 help='specify which Mercurial URL site to scrape') 143 help='specify which Mercurial URL site to scrape')
146 arg_parser.add_argument('-p', '--push', 144 arg_parser.add_argument('-p', '--push-url',
147 default='ssh://hg@hg.adblockplus.org/',
Vasily Kuznetsov 2017/06/27 16:18:52 This should probably default to `None` and then if
rosie 2017/07/03 15:33:46 Done.
148 help='specify where to push the repository') 145 help='specify where to push the repository')
149 args = arg_parser.parse_args() 146 args = arg_parser.parse_args()
150 hg_page = args.url 147 if args.hg_url and not args.push_url:
151 hg_upstream = args.push 148 arg_parser.error('If -u is provided, -p is mandatory')
152 main(hg_page, hg_upstream) 149 main(args.hg_url or 'https://hg.adblockplus.org/',
150 args.push_url or 'ssh://hg@hg.adblockplus.org/')
LEFTRIGHT

Powered by Google App Engine
This is Rietveld