| Left: | ||
| Right: |
| LEFT | RIGHT |
|---|---|
| 1 #!/usr/bin/env python3 | 1 #!/usr/bin/env python3 |
|
Sebastian Noack
2017/07/05 15:46:53
I suppose we should make this script executable (i
rosie
2017/07/07 15:55:48
Done.
| |
| 2 | 2 |
| 3 import os | 3 import os |
| 4 import subprocess | 4 import subprocess |
| 5 import re | 5 import re |
| 6 import datetime | 6 import datetime |
| 7 import shutil | 7 import shutil |
| 8 import urllib.parse | 8 import urllib.parse |
| 9 import urllib.request | 9 import urllib.request |
| 10 import html.parser | 10 import html.parser |
| 11 import argparse | 11 import argparse |
| (...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 51 finally: | 51 finally: |
| 52 shutil.rmtree(repo, ignore_errors=True) | 52 shutil.rmtree(repo, ignore_errors=True) |
| 53 | 53 |
| 54 | 54 |
| 55 def text_replace(dirpath, filename): | 55 def text_replace(dirpath, filename): |
| 56 with open(os.path.join(dirpath, filename), 'r+', | 56 with open(os.path.join(dirpath, filename), 'r+', |
| 57 encoding='utf-8', newline='') as file: | 57 encoding='utf-8', newline='') as file: |
| 58 try: | 58 try: |
| 59 text = file.read() | 59 text = file.read() |
| 60 except UnicodeDecodeError: | 60 except UnicodeDecodeError: |
| 61 print("Error: Couldn't read {}{}".format(dirpath, filename)) | |
|
Sebastian Noack
2017/07/05 15:46:53
Failing silently (in the original code) was intend
rosie
2017/07/07 15:55:49
Done.
| |
| 62 return | 61 return |
| 63 | 62 |
| 64 text = re.sub( | 63 text = re.sub( |
| 65 r'(copyright.*?\d{4})(?:-\d{4})?\s+eyeo gmbh', | 64 r'(copyright.*?\d{4})(?:-\d{4})?\s+eyeo gmbh', |
| 66 r'\1-{} eyeo GmbH'.format(CURRENT_YEAR), text, 0, re.I | 65 r'\1-{} eyeo GmbH'.format(CURRENT_YEAR), text, 0, re.I |
| 67 ) | 66 ) |
| 68 file.seek(0) | 67 file.seek(0) |
| 69 file.write(text) | 68 file.write(text) |
| 70 file.truncate() | 69 file.truncate() |
| 71 | 70 |
| (...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 107 def handle_endtag(self, tag): | 106 def handle_endtag(self, tag): |
| 108 if tag == 'tr': | 107 if tag == 'tr': |
| 109 self.recordtr = False | 108 self.recordtr = False |
| 110 self.cell = 0 | 109 self.cell = 0 |
| 111 | 110 |
| 112 def handle_data(self, data): | 111 def handle_data(self, data): |
| 113 if self.cell == 2 and self.recordtr is True: | 112 if self.cell == 2 and self.recordtr is True: |
| 114 self.recordtr = False | 113 self.recordtr = False |
| 115 self.cell = 0 | 114 self.cell = 0 |
| 116 # Only process the URL if the description is not Deprecated | 115 # Only process the URL if the description is not Deprecated |
| 117 deprecated = (re.search(r'\*DEPRECATED\*', data) or | 116 if ('*DEPRECATED*' not in data and |
| 118 re.search(r'(Deprecated)', data)) | 117 '(Deprecated)' not in data and |
|
Sebastian Noack
2017/07/05 15:46:53
This regular expression seems incorrect. The paren
rosie
2017/07/07 15:55:48
Done.
| |
| 119 if not deprecated and len(self.current_url) > 2: | 118 len(self.current_url) > 2): |
| 120 self.result += [self.current_url] | 119 self.result += [self.current_url] |
| 121 return self.result | 120 return self.result |
| 122 | 121 |
| 123 | 122 |
| 124 def extract_urls(hg_page): | 123 def extract_urls(hg_page): |
| 125 base_url = os.path.dirname(hg_page) + '/' | 124 base_url = os.path.dirname(hg_page) + '/' |
| 126 parser = Parser() | 125 parser = Parser() |
| 127 with urllib.request.urlopen(hg_page) as response: | 126 with urllib.request.urlopen(hg_page) as response: |
| 128 parser.feed(response.read().decode('utf-8')) | 127 parser.feed(response.read().decode('utf-8')) |
| 129 parser.close() | 128 parser.close() |
| 130 repo_urls = [] | 129 repo_urls = [] |
| 131 for url in parser.result: | 130 for url in parser.result: |
| 132 repo_urls.append(urllib.parse.urljoin(base_url, url)) | 131 repo_urls.append(urllib.parse.urljoin(base_url, url)) |
| 133 return repo_urls | 132 return repo_urls |
| 134 | 133 |
| 135 | 134 |
| 136 def main(hg_page, hg_upstream): | 135 def main(hg_page, hg_upstream): |
| 137 for repo in extract_urls(hg_page): | 136 for repo in extract_urls(hg_page): |
| 138 process_repo(repo, hg_upstream) | 137 process_repo(repo, hg_upstream) |
| 139 | 138 |
| 140 | 139 |
| 141 if __name__ == '__main__': | 140 if __name__ == '__main__': |
| 142 arg_parser = argparse.ArgumentParser() | 141 arg_parser = argparse.ArgumentParser() |
| 143 arg_parser.add_argument('-u', '--hg-url', | 142 arg_parser.add_argument('-u', '--hg-url', |
| 144 help='specify which Mercurial URL site to scrape', | 143 help='specify which Mercurial URL site to scrape') |
| 145 required=True) | |
| 146 arg_parser.add_argument('-p', '--push-url', | 144 arg_parser.add_argument('-p', '--push-url', |
| 147 default=None, | |
| 148 help='specify where to push the repository') | 145 help='specify where to push the repository') |
| 149 args = arg_parser.parse_args() | 146 args = arg_parser.parse_args() |
| 150 main(args.hg_url, args.push_url) | 147 if args.hg_url and not args.push_url: |
| 148 arg_parser.error('If -u is provided, -p is mandatory') | |
| 149 main(args.hg_url or 'https://hg.adblockplus.org/', | |
| 150 args.push_url or 'ssh://hg@hg.adblockplus.org/') | |
| LEFT | RIGHT |