Left: | ||
Right: |
LEFT | RIGHT |
---|---|
1 #!/usr/bin/env python3 | 1 #!/usr/bin/env python3 |
2 | 2 |
3 import os | 3 import os |
4 import sys | |
5 import subprocess | 4 import subprocess |
6 import re | 5 import re |
7 import datetime | 6 import datetime |
8 import shutil | 7 import shutil |
9 import urllib.parse | 8 import urllib.parse |
10 import urllib.request | 9 import urllib.request |
11 import html.parser | 10 import html.parser |
12 import argparse | 11 import argparse |
13 from posixpath import dirname | |
14 | 12 |
15 | 13 |
16 CURRENT_YEAR = datetime.datetime.now().year | 14 CURRENT_YEAR = datetime.datetime.now().year |
17 | 15 |
18 | 16 |
19 def process_repo(url, hg_upstream): | 17 def process_repo(url, hg_upstream): |
20 repo = url.rstrip('/').split('/')[-1] | 18 repo = url.rstrip('/').split('/')[-1] |
21 | 19 |
22 if repo in { | 20 if repo in { |
23 # headers are copied from libadblockplus, no need to update seperately | 21 # headers are copied from libadblockplus, no need to update seperately |
(...skipping 15 matching lines...) Expand all Loading... | |
39 subprocess.call(['hg', 'up', '--rev', 'master', | 37 subprocess.call(['hg', 'up', '--rev', 'master', |
40 '--repository', repo]) | 38 '--repository', repo]) |
41 for dirpath, dirnames, filenames in os.walk(repo): | 39 for dirpath, dirnames, filenames in os.walk(repo): |
42 if dirpath == repo: | 40 if dirpath == repo: |
43 dirnames.remove('.hg') | 41 dirnames.remove('.hg') |
44 | 42 |
45 for filename in filenames: | 43 for filename in filenames: |
46 text_replace(dirpath, filename) | 44 text_replace(dirpath, filename) |
47 if hg_upstream is None: | 45 if hg_upstream is None: |
48 hg_upstream = url | 46 hg_upstream = url |
47 else: | |
48 hg_upstream += '/' + repo | |
49 hg_commit(repo, hg_upstream) | 49 hg_commit(repo, hg_upstream) |
50 | 50 |
51 finally: | 51 finally: |
52 shutil.rmtree(repo, ignore_errors=True) | 52 shutil.rmtree(repo, ignore_errors=True) |
53 | 53 |
54 | 54 |
55 def text_replace(dirpath, filename): | 55 def text_replace(dirpath, filename): |
56 with open(os.path.join(dirpath, filename), 'r+', | 56 with open(os.path.join(dirpath, filename), 'r+', |
57 encoding='utf-8', newline='') as file: | 57 encoding='utf-8', newline='') as file: |
58 try: | 58 try: |
59 text = file.read() | 59 text = file.read() |
60 except UnicodeDecodeError: | 60 except UnicodeDecodeError: |
61 print("Error: Couldn't read {}{}".format(dirpath, filename)) | |
62 return | 61 return |
63 | 62 |
64 text = re.sub( | 63 text = re.sub( |
65 r'(copyright.*?\d{4})(?:-\d{4})?\s+eyeo gmbh', | 64 r'(copyright.*?\d{4})(?:-\d{4})?\s+eyeo gmbh', |
66 r'\1-{} eyeo GmbH'.format(CURRENT_YEAR), text, 0, re.I | 65 r'\1-{} eyeo GmbH'.format(CURRENT_YEAR), text, 0, re.I |
67 ) | 66 ) |
68 file.seek(0) | 67 file.seek(0) |
69 file.write(text) | 68 file.write(text) |
70 file.truncate() | 69 file.truncate() |
71 | 70 |
72 | 71 |
73 def hg_commit(repo, hg_upstream): | 72 def hg_commit(repo, hg_upstream): |
74 try: | 73 try: |
75 subprocess.check_call(['hg', 'commit', '-m', | 74 subprocess.check_call(['hg', 'commit', '-m', |
76 'Noissue - Updated copyright year', | 75 'Noissue - Updated copyright year', |
77 '--repository', repo]) | 76 '--repository', repo]) |
78 except subprocess.CalledProcessError as e: | 77 except subprocess.CalledProcessError as e: |
79 if e.returncode == 1: # no changes | 78 if e.returncode == 1: # no changes |
80 return | 79 return |
81 raise | 80 raise |
82 | 81 |
83 # Push changes, or save patch if access denied | 82 # Push changes, or save patch if access denied |
84 if 'ssh://hg@hg.adblockplus.org/' in hg_upstream: | |
Vasily Kuznetsov
2017/07/03 19:25:47
Here we still have hardcoded logic related to hg.a
rosie
2017/07/04 13:38:25
Done.
| |
85 hg_upstream += repo | |
86 if subprocess.call(['hg', 'push', '--repository', repo, hg_upstream]) != 0: | 83 if subprocess.call(['hg', 'push', '--repository', repo, hg_upstream]) != 0: |
87 with open(repo + '.patch', 'wb') as file: | 84 with open(repo + '.patch', 'wb') as file: |
88 print('couldnt push, making patch instead') | 85 print('couldnt push, making patch instead') |
89 subprocess.check_call(['hg', 'export', '--repository', repo], | 86 subprocess.check_call(['hg', 'export', '--repository', repo], |
90 stdout=file) | 87 stdout=file) |
91 | 88 |
92 | 89 |
93 class Parser(html.parser.HTMLParser): | 90 class Parser(html.parser.HTMLParser): |
94 result = [] | 91 result = [] |
95 recordtr = False | 92 recordtr = False |
(...skipping 13 matching lines...) Expand all Loading... | |
109 def handle_endtag(self, tag): | 106 def handle_endtag(self, tag): |
110 if tag == 'tr': | 107 if tag == 'tr': |
111 self.recordtr = False | 108 self.recordtr = False |
112 self.cell = 0 | 109 self.cell = 0 |
113 | 110 |
114 def handle_data(self, data): | 111 def handle_data(self, data): |
115 if self.cell == 2 and self.recordtr is True: | 112 if self.cell == 2 and self.recordtr is True: |
116 self.recordtr = False | 113 self.recordtr = False |
117 self.cell = 0 | 114 self.cell = 0 |
118 # Only process the URL if the description is not Deprecated | 115 # Only process the URL if the description is not Deprecated |
119 deprecated = (re.search(r'\*DEPRECATED\*', data) or | 116 if ('*DEPRECATED*' not in data and |
120 re.search(r'(Deprecated)', data)) | 117 '(Deprecated)' not in data and |
121 if not deprecated and len(self.current_url) > 2: | 118 len(self.current_url) > 2): |
122 self.result += [self.current_url] | 119 self.result += [self.current_url] |
123 return self.result | 120 return self.result |
124 | 121 |
125 | 122 |
126 def extract_urls(hg_page): | 123 def extract_urls(hg_page): |
127 base_url = dirname(hg_page) + '/' | 124 base_url = os.path.dirname(hg_page) + '/' |
Vasily Kuznetsov
2017/07/03 19:25:47
This will probably still break on some non-POSIX s
rosie
2017/07/04 13:38:26
Acknowledged.
| |
128 parser = Parser() | 125 parser = Parser() |
129 with urllib.request.urlopen(hg_page) as response: | 126 with urllib.request.urlopen(hg_page) as response: |
130 parser.feed(response.read().decode('utf-8')) | 127 parser.feed(response.read().decode('utf-8')) |
131 parser.close() | 128 parser.close() |
132 repo_urls = [] | 129 repo_urls = [] |
133 for url in parser.result: | 130 for url in parser.result: |
134 repo_urls.append(urllib.parse.urljoin(base_url, url)) | 131 repo_urls.append(urllib.parse.urljoin(base_url, url)) |
135 return repo_urls | 132 return repo_urls |
136 | 133 |
137 | 134 |
138 def main(hg_page, hg_upstream): | 135 def main(hg_page, hg_upstream): |
139 for repo in extract_urls(hg_page): | 136 for repo in extract_urls(hg_page): |
140 process_repo(repo, hg_upstream) | 137 process_repo(repo, hg_upstream) |
141 | 138 |
142 | 139 |
143 if __name__ == '__main__': | 140 if __name__ == '__main__': |
144 arg_parser = argparse.ArgumentParser() | 141 arg_parser = argparse.ArgumentParser() |
145 arg_parser.add_argument('-u', '--hg-url', | 142 arg_parser.add_argument('-u', '--hg-url', |
146 default=None, | |
147 help='specify which Mercurial URL site to scrape') | 143 help='specify which Mercurial URL site to scrape') |
148 arg_parser.add_argument('-p', '--push-url', | 144 arg_parser.add_argument('-p', '--push-url', |
149 default=None, | |
150 help='specify where to push the repository') | 145 help='specify where to push the repository') |
151 args = arg_parser.parse_args() | 146 args = arg_parser.parse_args() |
152 if args.hg_url is None: | 147 if args.hg_url and not args.push_url: |
Vasily Kuznetsov
2017/07/03 19:25:47
Actually this is not necessary. If you make this o
rosie
2017/07/04 13:38:25
Done.
| |
153 arg_parser.error('-u HG_URL was not specified') | 148 arg_parser.error('If -u is provided, -p is mandatory') |
154 sys.exit(2) | 149 main(args.hg_url or 'https://hg.adblockplus.org/', |
155 hg_page = args.hg_url | 150 args.push_url or 'ssh://hg@hg.adblockplus.org/') |
Vasily Kuznetsov
2017/07/03 19:25:47
Do you think these intermediate variables add valu
rosie
2017/07/04 13:38:25
Done.
| |
156 hg_upstream = args.push_url | |
157 main(hg_page, hg_upstream) | |
LEFT | RIGHT |