Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Delta Between Two Patch Sets: update-copyright/update_copyright.py

Issue 29459580: Issue 5250 - Add copyright update script (Closed) Base URL: https://hg.adblockplus.org/codingtools
Left Patch Set: Minor formatting fixes Created July 4, 2017, 3:13 p.m.
Right Patch Set: Fix indentation and default args Created July 17, 2017, 1:22 p.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
Left: Side by side diff | Download
Right: Side by side diff | Download
« no previous file with change/comment | « update-copyright/tox.ini ('k') | no next file » | no next file with change/comment »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
LEFTRIGHT
1 #!/usr/bin/env python3 1 #!/usr/bin/env python3
Sebastian Noack 2017/07/05 15:46:53 I suppose we should make this script executable (i
rosie 2017/07/07 15:55:48 Done.
2 2
3 import os 3 import os
4 import subprocess 4 import subprocess
5 import re 5 import re
6 import datetime 6 import datetime
7 import shutil 7 import shutil
8 import urllib.parse 8 import urllib.parse
9 import urllib.request 9 import urllib.request
10 import html.parser 10 import html.parser
11 import argparse 11 import argparse
(...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after
51 finally: 51 finally:
52 shutil.rmtree(repo, ignore_errors=True) 52 shutil.rmtree(repo, ignore_errors=True)
53 53
54 54
55 def text_replace(dirpath, filename): 55 def text_replace(dirpath, filename):
56 with open(os.path.join(dirpath, filename), 'r+', 56 with open(os.path.join(dirpath, filename), 'r+',
57 encoding='utf-8', newline='') as file: 57 encoding='utf-8', newline='') as file:
58 try: 58 try:
59 text = file.read() 59 text = file.read()
60 except UnicodeDecodeError: 60 except UnicodeDecodeError:
61 print("Error: Couldn't read {}{}".format(dirpath, filename))
Sebastian Noack 2017/07/05 15:46:53 Failing silently (in the original code) was intend
rosie 2017/07/07 15:55:49 Done.
62 return 61 return
63 62
64 text = re.sub( 63 text = re.sub(
65 r'(copyright.*?\d{4})(?:-\d{4})?\s+eyeo gmbh', 64 r'(copyright.*?\d{4})(?:-\d{4})?\s+eyeo gmbh',
66 r'\1-{} eyeo GmbH'.format(CURRENT_YEAR), text, 0, re.I 65 r'\1-{} eyeo GmbH'.format(CURRENT_YEAR), text, 0, re.I
67 ) 66 )
68 file.seek(0) 67 file.seek(0)
69 file.write(text) 68 file.write(text)
70 file.truncate() 69 file.truncate()
71 70
(...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after
107 def handle_endtag(self, tag): 106 def handle_endtag(self, tag):
108 if tag == 'tr': 107 if tag == 'tr':
109 self.recordtr = False 108 self.recordtr = False
110 self.cell = 0 109 self.cell = 0
111 110
112 def handle_data(self, data): 111 def handle_data(self, data):
113 if self.cell == 2 and self.recordtr is True: 112 if self.cell == 2 and self.recordtr is True:
114 self.recordtr = False 113 self.recordtr = False
115 self.cell = 0 114 self.cell = 0
116 # Only process the URL if the description is not Deprecated 115 # Only process the URL if the description is not Deprecated
117 deprecated = (re.search(r'\*DEPRECATED\*', data) or 116 if ('*DEPRECATED*' not in data and
118 re.search(r'(Deprecated)', data)) 117 '(Deprecated)' not in data and
Sebastian Noack 2017/07/05 15:46:53 This regular expression seems incorrect. The paren
rosie 2017/07/07 15:55:48 Done.
119 if not deprecated and len(self.current_url) > 2: 118 len(self.current_url) > 2):
120 self.result += [self.current_url] 119 self.result += [self.current_url]
121 return self.result 120 return self.result
122 121
123 122
124 def extract_urls(hg_page): 123 def extract_urls(hg_page):
125 base_url = os.path.dirname(hg_page) + '/' 124 base_url = os.path.dirname(hg_page) + '/'
126 parser = Parser() 125 parser = Parser()
127 with urllib.request.urlopen(hg_page) as response: 126 with urllib.request.urlopen(hg_page) as response:
128 parser.feed(response.read().decode('utf-8')) 127 parser.feed(response.read().decode('utf-8'))
129 parser.close() 128 parser.close()
130 repo_urls = [] 129 repo_urls = []
131 for url in parser.result: 130 for url in parser.result:
132 repo_urls.append(urllib.parse.urljoin(base_url, url)) 131 repo_urls.append(urllib.parse.urljoin(base_url, url))
133 return repo_urls 132 return repo_urls
134 133
135 134
136 def main(hg_page, hg_upstream): 135 def main(hg_page, hg_upstream):
137 for repo in extract_urls(hg_page): 136 for repo in extract_urls(hg_page):
138 process_repo(repo, hg_upstream) 137 process_repo(repo, hg_upstream)
139 138
140 139
141 if __name__ == '__main__': 140 if __name__ == '__main__':
142 arg_parser = argparse.ArgumentParser() 141 arg_parser = argparse.ArgumentParser()
143 arg_parser.add_argument('-u', '--hg-url', 142 arg_parser.add_argument('-u', '--hg-url',
144 help='specify which Mercurial URL site to scrape', 143 help='specify which Mercurial URL site to scrape')
145 required=True)
146 arg_parser.add_argument('-p', '--push-url', 144 arg_parser.add_argument('-p', '--push-url',
147 default=None,
148 help='specify where to push the repository') 145 help='specify where to push the repository')
149 args = arg_parser.parse_args() 146 args = arg_parser.parse_args()
150 main(args.hg_url, args.push_url) 147 if args.hg_url and not args.push_url:
148 arg_parser.error('If -u is provided, -p is mandatory')
149 main(args.hg_url or 'https://hg.adblockplus.org/',
150 args.push_url or 'ssh://hg@hg.adblockplus.org/')
LEFTRIGHT

Powered by Google App Engine
This is Rietveld