| LEFT | RIGHT | 
|---|
| 1 #!/usr/bin/env python3 | 1 #!/usr/bin/env python3 | 
| 2 | 2 | 
| 3 import os | 3 import os | 
| 4 import subprocess | 4 import subprocess | 
| 5 import re | 5 import re | 
| 6 import datetime | 6 import datetime | 
| 7 import shutil | 7 import shutil | 
| 8 import urllib.parse | 8 import urllib.parse | 
| 9 import urllib.request | 9 import urllib.request | 
| 10 import html.parser | 10 import html.parser | 
| (...skipping 95 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 106     def handle_endtag(self, tag): | 106     def handle_endtag(self, tag): | 
| 107         if tag == 'tr': | 107         if tag == 'tr': | 
| 108             self.recordtr = False | 108             self.recordtr = False | 
| 109             self.cell = 0 | 109             self.cell = 0 | 
| 110 | 110 | 
| 111     def handle_data(self, data): | 111     def handle_data(self, data): | 
| 112         if self.cell == 2 and self.recordtr is True: | 112         if self.cell == 2 and self.recordtr is True: | 
| 113             self.recordtr = False | 113             self.recordtr = False | 
| 114             self.cell = 0 | 114             self.cell = 0 | 
| 115             # Only process the URL if the description is not Deprecated | 115             # Only process the URL if the description is not Deprecated | 
| 116             if ('*DEPRECATED*' not in data and '(Deprecated)' not in data and | 116             if ('*DEPRECATED*' not in data and | 
| 117                len(self.current_url) > 2): | 117                     '(Deprecated)' not in data and | 
|  | 118                     len(self.current_url) > 2): | 
| 118                 self.result += [self.current_url] | 119                 self.result += [self.current_url] | 
| 119                 return self.result | 120                 return self.result | 
| 120 | 121 | 
| 121 | 122 | 
| 122 def extract_urls(hg_page): | 123 def extract_urls(hg_page): | 
| 123     base_url = os.path.dirname(hg_page) + '/' | 124     base_url = os.path.dirname(hg_page) + '/' | 
| 124     parser = Parser() | 125     parser = Parser() | 
| 125     with urllib.request.urlopen(hg_page) as response: | 126     with urllib.request.urlopen(hg_page) as response: | 
| 126         parser.feed(response.read().decode('utf-8')) | 127         parser.feed(response.read().decode('utf-8')) | 
| 127     parser.close() | 128     parser.close() | 
| 128     repo_urls = [] | 129     repo_urls = [] | 
| 129     for url in parser.result: | 130     for url in parser.result: | 
| 130         repo_urls.append(urllib.parse.urljoin(base_url, url)) | 131         repo_urls.append(urllib.parse.urljoin(base_url, url)) | 
| 131     return repo_urls | 132     return repo_urls | 
| 132 | 133 | 
| 133 | 134 | 
| 134 def main(hg_page, hg_upstream): | 135 def main(hg_page, hg_upstream): | 
| 135     for repo in extract_urls(hg_page): | 136     for repo in extract_urls(hg_page): | 
| 136         process_repo(repo, hg_upstream) | 137         process_repo(repo, hg_upstream) | 
| 137 | 138 | 
| 138 | 139 | 
| 139 if __name__ == '__main__': | 140 if __name__ == '__main__': | 
| 140     arg_parser = argparse.ArgumentParser() | 141     arg_parser = argparse.ArgumentParser() | 
| 141     arg_parser.add_argument('-u', '--hg-url', | 142     arg_parser.add_argument('-u', '--hg-url', | 
| 142                             help='specify which Mercurial URL site to scrape', | 143                             help='specify which Mercurial URL site to scrape') | 
| 143                             default='https://hg.adblockplus.org/') |  | 
| 144     arg_parser.add_argument('-p', '--push-url', | 144     arg_parser.add_argument('-p', '--push-url', | 
| 145                             default='ssh://hg@hg.adblockplus.org/', |  | 
| 146                             help='specify where to push the repository') | 145                             help='specify where to push the repository') | 
| 147     args = arg_parser.parse_args() | 146     args = arg_parser.parse_args() | 
| 148     if (args.hg_url != 'https://hg.adblockplus.org/' and args.push_url == | 147     if args.hg_url and not args.push_url: | 
| 149        'ssh://hg@hg.adblockplus.org/'): |  | 
| 150         arg_parser.error('If -u is provided, -p is mandatory') | 148         arg_parser.error('If -u is provided, -p is mandatory') | 
| 151     main(args.hg_url, args.push_url) | 149     main(args.hg_url or 'https://hg.adblockplus.org/', | 
|  | 150          args.push_url or 'ssh://hg@hg.adblockplus.org/') | 
| LEFT | RIGHT | 
|---|