Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Delta Between Two Patch Sets: update-copyright/update_copyright.py

Issue 29459580: Issue 5250 - Add copyright update script (Closed) Base URL: https://hg.adblockplus.org/codingtools
Left Patch Set: Addressed more comments Created July 3, 2017, 3:31 p.m.
Right Patch Set: Fix indentation and default args Created July 17, 2017, 1:22 p.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
Left: Side by side diff | Download
Right: Side by side diff | Download
« no previous file with change/comment | « update-copyright/tox.ini ('k') | no next file » | no next file with change/comment »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
LEFTRIGHT
1 #!/usr/bin/env python3 1 #!/usr/bin/env python3
2 2
3 import os 3 import os
4 import sys
5 import subprocess 4 import subprocess
6 import re 5 import re
7 import datetime 6 import datetime
8 import shutil 7 import shutil
9 import urllib.parse 8 import urllib.parse
10 import urllib.request 9 import urllib.request
11 import html.parser 10 import html.parser
12 import argparse 11 import argparse
13 from posixpath import dirname
14 12
15 13
16 CURRENT_YEAR = datetime.datetime.now().year 14 CURRENT_YEAR = datetime.datetime.now().year
17 15
18 16
19 def process_repo(url, hg_upstream): 17 def process_repo(url, hg_upstream):
20 repo = url.rstrip('/').split('/')[-1] 18 repo = url.rstrip('/').split('/')[-1]
21 19
22 if repo in { 20 if repo in {
23 # headers are copied from libadblockplus, no need to update seperately 21 # headers are copied from libadblockplus, no need to update seperately
(...skipping 15 matching lines...) Expand all
39 subprocess.call(['hg', 'up', '--rev', 'master', 37 subprocess.call(['hg', 'up', '--rev', 'master',
40 '--repository', repo]) 38 '--repository', repo])
41 for dirpath, dirnames, filenames in os.walk(repo): 39 for dirpath, dirnames, filenames in os.walk(repo):
42 if dirpath == repo: 40 if dirpath == repo:
43 dirnames.remove('.hg') 41 dirnames.remove('.hg')
44 42
45 for filename in filenames: 43 for filename in filenames:
46 text_replace(dirpath, filename) 44 text_replace(dirpath, filename)
47 if hg_upstream is None: 45 if hg_upstream is None:
48 hg_upstream = url 46 hg_upstream = url
47 else:
48 hg_upstream += '/' + repo
49 hg_commit(repo, hg_upstream) 49 hg_commit(repo, hg_upstream)
50 50
51 finally: 51 finally:
52 shutil.rmtree(repo, ignore_errors=True) 52 shutil.rmtree(repo, ignore_errors=True)
53 53
54 54
55 def text_replace(dirpath, filename): 55 def text_replace(dirpath, filename):
56 with open(os.path.join(dirpath, filename), 'r+', 56 with open(os.path.join(dirpath, filename), 'r+',
57 encoding='utf-8', newline='') as file: 57 encoding='utf-8', newline='') as file:
58 try: 58 try:
59 text = file.read() 59 text = file.read()
60 except UnicodeDecodeError: 60 except UnicodeDecodeError:
61 print("Error: Couldn't read {}{}".format(dirpath, filename))
62 return 61 return
63 62
64 text = re.sub( 63 text = re.sub(
65 r'(copyright.*?\d{4})(?:-\d{4})?\s+eyeo gmbh', 64 r'(copyright.*?\d{4})(?:-\d{4})?\s+eyeo gmbh',
66 r'\1-{} eyeo GmbH'.format(CURRENT_YEAR), text, 0, re.I 65 r'\1-{} eyeo GmbH'.format(CURRENT_YEAR), text, 0, re.I
67 ) 66 )
68 file.seek(0) 67 file.seek(0)
69 file.write(text) 68 file.write(text)
70 file.truncate() 69 file.truncate()
71 70
72 71
73 def hg_commit(repo, hg_upstream): 72 def hg_commit(repo, hg_upstream):
74 try: 73 try:
75 subprocess.check_call(['hg', 'commit', '-m', 74 subprocess.check_call(['hg', 'commit', '-m',
76 'Noissue - Updated copyright year', 75 'Noissue - Updated copyright year',
77 '--repository', repo]) 76 '--repository', repo])
78 except subprocess.CalledProcessError as e: 77 except subprocess.CalledProcessError as e:
79 if e.returncode == 1: # no changes 78 if e.returncode == 1: # no changes
80 return 79 return
81 raise 80 raise
82 81
83 # Push changes, or save patch if access denied 82 # Push changes, or save patch if access denied
84 if 'ssh://hg@hg.adblockplus.org/' in hg_upstream:
Vasily Kuznetsov 2017/07/03 19:25:47 Here we still have hardcoded logic related to hg.a
rosie 2017/07/04 13:38:25 Done.
85 hg_upstream += repo
86 if subprocess.call(['hg', 'push', '--repository', repo, hg_upstream]) != 0: 83 if subprocess.call(['hg', 'push', '--repository', repo, hg_upstream]) != 0:
87 with open(repo + '.patch', 'wb') as file: 84 with open(repo + '.patch', 'wb') as file:
88 print('couldnt push, making patch instead') 85 print('couldnt push, making patch instead')
89 subprocess.check_call(['hg', 'export', '--repository', repo], 86 subprocess.check_call(['hg', 'export', '--repository', repo],
90 stdout=file) 87 stdout=file)
91 88
92 89
93 class Parser(html.parser.HTMLParser): 90 class Parser(html.parser.HTMLParser):
94 result = [] 91 result = []
95 recordtr = False 92 recordtr = False
(...skipping 13 matching lines...) Expand all
109 def handle_endtag(self, tag): 106 def handle_endtag(self, tag):
110 if tag == 'tr': 107 if tag == 'tr':
111 self.recordtr = False 108 self.recordtr = False
112 self.cell = 0 109 self.cell = 0
113 110
114 def handle_data(self, data): 111 def handle_data(self, data):
115 if self.cell == 2 and self.recordtr is True: 112 if self.cell == 2 and self.recordtr is True:
116 self.recordtr = False 113 self.recordtr = False
117 self.cell = 0 114 self.cell = 0
118 # Only process the URL if the description is not Deprecated 115 # Only process the URL if the description is not Deprecated
119 deprecated = (re.search(r'\*DEPRECATED\*', data) or 116 if ('*DEPRECATED*' not in data and
120 re.search(r'(Deprecated)', data)) 117 '(Deprecated)' not in data and
121 if not deprecated and len(self.current_url) > 2: 118 len(self.current_url) > 2):
122 self.result += [self.current_url] 119 self.result += [self.current_url]
123 return self.result 120 return self.result
124 121
125 122
126 def extract_urls(hg_page): 123 def extract_urls(hg_page):
127 base_url = dirname(hg_page) + '/' 124 base_url = os.path.dirname(hg_page) + '/'
Vasily Kuznetsov 2017/07/03 19:25:47 This will probably still break on some non-POSIX s
rosie 2017/07/04 13:38:26 Acknowledged.
128 parser = Parser() 125 parser = Parser()
129 with urllib.request.urlopen(hg_page) as response: 126 with urllib.request.urlopen(hg_page) as response:
130 parser.feed(response.read().decode('utf-8')) 127 parser.feed(response.read().decode('utf-8'))
131 parser.close() 128 parser.close()
132 repo_urls = [] 129 repo_urls = []
133 for url in parser.result: 130 for url in parser.result:
134 repo_urls.append(urllib.parse.urljoin(base_url, url)) 131 repo_urls.append(urllib.parse.urljoin(base_url, url))
135 return repo_urls 132 return repo_urls
136 133
137 134
138 def main(hg_page, hg_upstream): 135 def main(hg_page, hg_upstream):
139 for repo in extract_urls(hg_page): 136 for repo in extract_urls(hg_page):
140 process_repo(repo, hg_upstream) 137 process_repo(repo, hg_upstream)
141 138
142 139
143 if __name__ == '__main__': 140 if __name__ == '__main__':
144 arg_parser = argparse.ArgumentParser() 141 arg_parser = argparse.ArgumentParser()
145 arg_parser.add_argument('-u', '--hg-url', 142 arg_parser.add_argument('-u', '--hg-url',
146 default=None,
147 help='specify which Mercurial URL site to scrape') 143 help='specify which Mercurial URL site to scrape')
148 arg_parser.add_argument('-p', '--push-url', 144 arg_parser.add_argument('-p', '--push-url',
149 default=None,
150 help='specify where to push the repository') 145 help='specify where to push the repository')
151 args = arg_parser.parse_args() 146 args = arg_parser.parse_args()
152 if args.hg_url is None: 147 if args.hg_url and not args.push_url:
Vasily Kuznetsov 2017/07/03 19:25:47 Actually this is not necessary. If you make this o
rosie 2017/07/04 13:38:25 Done.
153 arg_parser.error('-u HG_URL was not specified') 148 arg_parser.error('If -u is provided, -p is mandatory')
154 sys.exit(2) 149 main(args.hg_url or 'https://hg.adblockplus.org/',
155 hg_page = args.hg_url 150 args.push_url or 'ssh://hg@hg.adblockplus.org/')
Vasily Kuznetsov 2017/07/03 19:25:47 Do you think these intermediate variables add valu
rosie 2017/07/04 13:38:25 Done.
156 hg_upstream = args.push_url
157 main(hg_page, hg_upstream)
LEFTRIGHT

Powered by Google App Engine
This is Rietveld