update-copyright/update_copyright.py - Issue 29459580: Issue 5250 - Add copyright update script

Delta Between Two Patch Sets: update-copyright/update_copyright.py

Issue 29459580: Issue 5250 - Add copyright update script (Closed) Base URL: https://hg.adblockplus.org/codingtools

Left Patch Set: Clean up comments, ReadMe, formatting Created July 4, 2017, 1:38 p.m.

Right Patch Set: Fix indentation and default args Created July 17, 2017, 1:22 p.m.

Left:
Right:

Use n/p to move between diff chunks; N/P to move between comments.

Jump to:

Left: Side by side diff | Download
Right: Side by side diff | Download

LEFT	RIGHT
1 #!/usr/bin/env python3	1 #!/usr/bin/env python3

2	2

3 import os	3 import os

4 import subprocess	4 import subprocess

5 import re	5 import re

6 import datetime	6 import datetime

7 import shutil	7 import shutil

8 import urllib.parse	8 import urllib.parse

9 import urllib.request	9 import urllib.request

10 import html.parser	10 import html.parser

(...skipping 40 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
51 finally:	51 finally:

52 shutil.rmtree(repo, ignore_errors=True)	52 shutil.rmtree(repo, ignore_errors=True)

53	53

54	54

55 def text_replace(dirpath, filename):	55 def text_replace(dirpath, filename):

56 with open(os.path.join(dirpath, filename), 'r+',	56 with open(os.path.join(dirpath, filename), 'r+',

57 encoding='utf-8', newline='') as file:	57 encoding='utf-8', newline='') as file:

58 try:	58 try:

59 text = file.read()	59 text = file.read()

60 except UnicodeDecodeError:	60 except UnicodeDecodeError:

61 print("Error: Couldn't read {}{}".format(dirpath, filename))

62 return	61 return

63	62

64 text = re.sub(	63 text = re.sub(

65 r'(copyright.*?\d{4})(?:-\d{4})?\s+eyeo gmbh',	64 r'(copyright.*?\d{4})(?:-\d{4})?\s+eyeo gmbh',

66 r'\1-{} eyeo GmbH'.format(CURRENT_YEAR), text, 0, re.I	65 r'\1-{} eyeo GmbH'.format(CURRENT_YEAR), text, 0, re.I

67 )	66 )

68 file.seek(0)	67 file.seek(0)

69 file.write(text)	68 file.write(text)

70 file.truncate()	69 file.truncate()

71	70

(...skipping 35 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
107 def handle_endtag(self, tag):	106 def handle_endtag(self, tag):

108 if tag == 'tr':	107 if tag == 'tr':

109 self.recordtr = False	108 self.recordtr = False

110 self.cell = 0	109 self.cell = 0

111	110

112 def handle_data(self, data):	111 def handle_data(self, data):

113 if self.cell == 2 and self.recordtr is True:	112 if self.cell == 2 and self.recordtr is True:

114 self.recordtr = False	113 self.recordtr = False

115 self.cell = 0	114 self.cell = 0

116 # Only process the URL if the description is not Deprecated	115 # Only process the URL if the description is not Deprecated

117 deprecated = (re.search(r'\DEPRECATED\', data) or	116 if ('DEPRECATED' not in data and

118 re.search(r'(Deprecated)', data))	117 '(Deprecated)' not in data and

119 if not deprecated and len(self.current_url) > 2:	118 len(self.current_url) > 2):

120 self.result += [self.current_url]	119 self.result += [self.current_url]

121 return self.result	120 return self.result

122	121

123	122

124 def extract_urls(hg_page):	123 def extract_urls(hg_page):

125 base_url = os.path.dirname(hg_page) + '/'	124 base_url = os.path.dirname(hg_page) + '/'

126 parser = Parser()	125 parser = Parser()

127 with urllib.request.urlopen(hg_page) as response:	126 with urllib.request.urlopen(hg_page) as response:

128 parser.feed(response.read().decode('utf-8'))	127 parser.feed(response.read().decode('utf-8'))

129 parser.close()	128 parser.close()

130 repo_urls = []	129 repo_urls = []

131 for url in parser.result:	130 for url in parser.result:

132 repo_urls.append(urllib.parse.urljoin(base_url, url))	131 repo_urls.append(urllib.parse.urljoin(base_url, url))

133 return repo_urls	132 return repo_urls

134	133

135	134

136 def main(hg_page, hg_upstream):	135 def main(hg_page, hg_upstream):

137 for repo in extract_urls(hg_page):	136 for repo in extract_urls(hg_page):

138 process_repo(repo, hg_upstream)	137 process_repo(repo, hg_upstream)

139	138

140	139

141 if __name__ == '__main__':	140 if __name__ == '__main__':

142 arg_parser = argparse.ArgumentParser()	141 arg_parser = argparse.ArgumentParser()

143 arg_parser.add_argument('-u', '--hg-url',	142 arg_parser.add_argument('-u', '--hg-url',

144 default=None,	143 help='specify which Mercurial URL site to scrape')
Vasily Kuznetsov 2017/07/04 15:02:40 I think you don't need a default here since `-u` i I think you don't need a default here since `-u` is a required argument. rosie 2017/07/04 15:14:02 Done. Show quoted text On 2017/07/04 15:02:40, Vasily Kuznetsov wrote: > I think you don't need a default here since `-u` is a required argument. Done.
145 help='specify which Mercurial URL site to scrape',

146 required=True)

147 arg_parser.add_argument('-p', '--push-url',	144 arg_parser.add_argument('-p', '--push-url',

148 default=None,

149 help='specify where to push the repository')	145 help='specify where to push the repository')

150 args = arg_parser.parse_args()	146 args = arg_parser.parse_args()

151 main(args.hg_url, args.push_url)	147 if args.hg_url and not args.push_url:

	148 arg_parser.error('If -u is provided, -p is mandatory')

	149 main(args.hg_url or 'https://hg.adblockplus.org/',

	150 args.push_url or 'ssh://hg@hg.adblockplus.org/')

LEFT	RIGHT