Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Side by Side Diff: update-copyright/update_copyright.py

Issue 29459580: Issue 5250 - Add copyright update script (Closed) Base URL: https://hg.adblockplus.org/codingtools
Patch Set: Minor formatting fixes Created July 4, 2017, 3:13 p.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View unified diff | Download patch
« update-copyright/tox.ini ('K') | « update-copyright/tox.ini ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 #!/usr/bin/env python3
Sebastian Noack 2017/07/05 15:46:53 I suppose we should make this script executable (i
rosie 2017/07/07 15:55:48 Done.
2
3 import os
4 import subprocess
5 import re
6 import datetime
7 import shutil
8 import urllib.parse
9 import urllib.request
10 import html.parser
11 import argparse
12
13
14 CURRENT_YEAR = datetime.datetime.now().year
15
16
17 def process_repo(url, hg_upstream):
18 repo = url.rstrip('/').split('/')[-1]
19
20 if repo in {
21 # headers are copied from libadblockplus, no need to update seperately
22 'libadblockplus-binaries',
23 # huge and only contains autogenerated builds
24 'downloads',
25 }:
26 return
27
28 try:
29 subprocess.check_call(['hg', 'clone', url, repo])
30 if repo == 'adblockbrowser':
31 # adblockbrowser is a FF fork with its own changes in a
32 # seperate branch
33 subprocess.check_call(['hg', 'up', '--rev', 'adblockbrowser',
34 '--repository', repo])
35 else:
36 # switch to 'master' bookmark if it exists
37 subprocess.call(['hg', 'up', '--rev', 'master',
38 '--repository', repo])
39 for dirpath, dirnames, filenames in os.walk(repo):
40 if dirpath == repo:
41 dirnames.remove('.hg')
42
43 for filename in filenames:
44 text_replace(dirpath, filename)
45 if hg_upstream is None:
46 hg_upstream = url
47 else:
48 hg_upstream += '/' + repo
49 hg_commit(repo, hg_upstream)
50
51 finally:
52 shutil.rmtree(repo, ignore_errors=True)
53
54
55 def text_replace(dirpath, filename):
56 with open(os.path.join(dirpath, filename), 'r+',
57 encoding='utf-8', newline='') as file:
58 try:
59 text = file.read()
60 except UnicodeDecodeError:
61 print("Error: Couldn't read {}{}".format(dirpath, filename))
Sebastian Noack 2017/07/05 15:46:53 Failing silently (in the original code) was intend
rosie 2017/07/07 15:55:49 Done.
62 return
63
64 text = re.sub(
65 r'(copyright.*?\d{4})(?:-\d{4})?\s+eyeo gmbh',
66 r'\1-{} eyeo GmbH'.format(CURRENT_YEAR), text, 0, re.I
67 )
68 file.seek(0)
69 file.write(text)
70 file.truncate()
71
72
73 def hg_commit(repo, hg_upstream):
74 try:
75 subprocess.check_call(['hg', 'commit', '-m',
76 'Noissue - Updated copyright year',
77 '--repository', repo])
78 except subprocess.CalledProcessError as e:
79 if e.returncode == 1: # no changes
80 return
81 raise
82
83 # Push changes, or save patch if access denied
84 if subprocess.call(['hg', 'push', '--repository', repo, hg_upstream]) != 0:
85 with open(repo + '.patch', 'wb') as file:
86 print('couldnt push, making patch instead')
87 subprocess.check_call(['hg', 'export', '--repository', repo],
88 stdout=file)
89
90
91 class Parser(html.parser.HTMLParser):
92 result = []
93 recordtr = False
94 cell = 0
95 current_url = ''
96
97 def handle_starttag(self, tag, attrs):
98 if tag == 'tr':
99 self.recordtr = True
100 if tag == 'td':
101 self.cell += 1
102 if tag == 'a':
103 attrs = dict(attrs)
104 if 'list' in attrs.get('class', '').split():
105 self.current_url = attrs['href']
106
107 def handle_endtag(self, tag):
108 if tag == 'tr':
109 self.recordtr = False
110 self.cell = 0
111
112 def handle_data(self, data):
113 if self.cell == 2 and self.recordtr is True:
114 self.recordtr = False
115 self.cell = 0
116 # Only process the URL if the description is not Deprecated
117 deprecated = (re.search(r'\*DEPRECATED\*', data) or
118 re.search(r'(Deprecated)', data))
Sebastian Noack 2017/07/05 15:46:53 This regular expression seems incorrect. The paren
rosie 2017/07/07 15:55:48 Done.
119 if not deprecated and len(self.current_url) > 2:
120 self.result += [self.current_url]
121 return self.result
122
123
124 def extract_urls(hg_page):
125 base_url = os.path.dirname(hg_page) + '/'
126 parser = Parser()
127 with urllib.request.urlopen(hg_page) as response:
128 parser.feed(response.read().decode('utf-8'))
129 parser.close()
130 repo_urls = []
131 for url in parser.result:
132 repo_urls.append(urllib.parse.urljoin(base_url, url))
133 return repo_urls
134
135
136 def main(hg_page, hg_upstream):
137 for repo in extract_urls(hg_page):
138 process_repo(repo, hg_upstream)
139
140
141 if __name__ == '__main__':
142 arg_parser = argparse.ArgumentParser()
143 arg_parser.add_argument('-u', '--hg-url',
144 help='specify which Mercurial URL site to scrape',
145 required=True)
146 arg_parser.add_argument('-p', '--push-url',
147 default=None,
148 help='specify where to push the repository')
149 args = arg_parser.parse_args()
150 main(args.hg_url, args.push_url)
OLDNEW
« update-copyright/tox.ini ('K') | « update-copyright/tox.ini ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld