| Left: | ||
| Right: |
| OLD | NEW |
|---|---|
| 1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
| 2 # coding: utf-8 | 2 # coding: utf-8 |
| 3 | 3 |
| 4 import argparse | 4 import argparse |
| 5 import datetime | 5 import datetime |
| 6 import errno | 6 import errno |
| 7 import hashlib | 7 import hashlib |
| 8 import io | 8 import io |
| 9 import json | 9 import json |
| 10 import os | 10 import os |
| (...skipping 66 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 77 | 77 |
| 78 with io.open(datapath, 'w', encoding='utf-8') as handle: | 78 with io.open(datapath, 'w', encoding='utf-8') as handle: |
| 79 handle.write(unicode(json.dumps(data, indent=2, ensure_ascii=Fal se, sort_keys=True)) + u'\n') | 79 handle.write(unicode(json.dumps(data, indent=2, ensure_ascii=Fal se, sort_keys=True)) + u'\n') |
| 80 start_response('204 No Content', []) | 80 start_response('204 No Content', []) |
| 81 return '' | 81 return '' |
| 82 | 82 |
| 83 start_response('404 Not Found', []) | 83 start_response('404 Not Found', []) |
| 84 return '' | 84 return '' |
| 85 | 85 |
| 86 | 86 |
| 87 def read_as_json(file_path): | |
| 88 with open(file_path, mode='r') as json_file: | |
| 89 return json.load(json_file, encoding='UTF-8') | |
| 90 | |
| 91 | |
| 92 class Parameters: | |
| 93 """This class loads config file and parses command line parameters. | |
| 94 Values are stored in attibutes of this class instance. | |
| 95 """ | |
| 96 def __init__(self): | |
| 97 cli_parameters = vars(Parameters._parse_command_line()) | |
| 98 config_parameters = Parameters._load_config(cli_parameters["config"]) | |
| 99 for field in cli_parameters.keys(): | |
| 100 config_value = config_parameters[field] if field in config_parameter s else None | |
| 101 value = cli_parameters[field] if cli_parameters[field] is not None e lse config_value | |
|
tschuster
2016/11/21 18:07:55
Can't you just write:
value = cli_parameters[field
sergei
2016/11/22 09:23:31
Changed. Now lines are not so long and you are rig
| |
| 102 setattr(self, field, value) | |
| 103 | |
| 104 @staticmethod | |
| 105 def _parse_command_line(): | |
| 106 parser = argparse.ArgumentParser(description='Run crawler') | |
| 107 parser.add_argument( | |
| 108 '-c', '--config', type=str, | |
| 109 help='path to config file, example is config.json.example' | |
| 110 ) | |
| 111 parser.add_argument( | |
| 112 '-b', '--binary', type=str, | |
| 113 help='path to the Firefox binary' | |
| 114 ) | |
| 115 parser.add_argument( | |
| 116 '-a', '--abpdir', type=str, | |
| 117 help='path to the Adblock Plus repository' | |
| 118 ) | |
| 119 parser.add_argument( | |
| 120 '-f', '--filters', metavar='url', type=str, nargs='+', | |
| 121 help='filter lists to install in Adblock Plus. The arguments can als o have the format path=url, the data will be read from the specified path then.' | |
| 122 ) | |
| 123 parser.add_argument( | |
| 124 '-t', '--timeout', type=int, | |
| 125 help='Load timeout (seconds)' | |
| 126 ) | |
| 127 parser.add_argument( | |
| 128 '-x', '--maxtabs', type=int, | |
| 129 help='Maximal number of tabs to open in parallel' | |
| 130 ) | |
| 131 parser.add_argument( | |
| 132 '-l', '--list', type=str, | |
| 133 help='URL list to process', | |
| 134 ) | |
| 135 parser.add_argument( | |
| 136 '-o', '--outdir', type=str, | |
| 137 help='directory to write data into', | |
| 138 ) | |
| 139 parameters = parser.parse_args() | |
| 140 return parameters | |
| 141 | |
| 142 @staticmethod | |
| 143 def _load_config(config_file_path): | |
| 144 config = { | |
| 145 "filters": [ | |
| 146 "https://easylist-downloads.adblockplus.org/easylist.txt", | |
| 147 "https://easylist-downloads.adblockplus.org/exceptionrules.txt" | |
| 148 ], | |
| 149 "timeout": 300, | |
| 150 "maxtabs": 15 | |
| 151 } | |
| 152 if config_file_path is not None: | |
| 153 config.update(read_as_json(config_file_path)) | |
| 154 return config | |
| 155 | |
| 156 | |
| 87 def run(): | 157 def run(): |
| 88 parser = argparse.ArgumentParser(description='Run crawler') | 158 parameters = Parameters() |
| 89 parser.add_argument( | |
| 90 '-b', '--binary', type=str, | |
| 91 help='path to the Firefox binary' | |
| 92 ) | |
| 93 parser.add_argument( | |
| 94 '-a', '--abpdir', type=str, | |
| 95 help='path to the Adblock Plus repository' | |
| 96 ) | |
| 97 parser.add_argument( | |
| 98 '-f', '--filters', metavar='url', type=str, nargs='+', | |
| 99 default=["https://easylist-downloads.adblockplus.org/easylist.txt", "htt ps://easylist-downloads.adblockplus.org/exceptionrules.txt"], | |
| 100 help='filter lists to install in Adblock Plus. The arguments can also ha ve the format path=url, the data will be read from the specified path then.' | |
| 101 ) | |
| 102 parser.add_argument( | |
| 103 '-t', '--timeout', type=int, default=300, | |
| 104 help='Load timeout (seconds)' | |
| 105 ) | |
| 106 parser.add_argument( | |
| 107 '-x', '--maxtabs', type=int, default=15, | |
| 108 help='Maximal number of tabs to open in parallel' | |
| 109 ) | |
| 110 parser.add_argument( | |
| 111 'list', type=str, | |
| 112 help='URL list to process' | |
| 113 ) | |
| 114 parser.add_argument( | |
| 115 'outdir', type=str, | |
| 116 help='directory to write data into' | |
| 117 ) | |
| 118 parameters = parser.parse_args() | |
| 119 | |
| 120 import buildtools.packagerGecko as packager | 159 import buildtools.packagerGecko as packager |
| 121 cleanup = [] | 160 cleanup = [] |
| 122 try: | 161 try: |
| 123 base_dir = os.path.dirname(os.path.abspath(__file__)) | 162 base_dir = os.path.dirname(os.path.abspath(__file__)) |
| 124 handle, crawlerxpi = tempfile.mkstemp(suffix='.xpi') | 163 handle, crawlerxpi = tempfile.mkstemp(suffix='.xpi') |
| 125 os.close(handle) | 164 os.close(handle) |
| 126 cleanup.append(crawlerxpi) | 165 cleanup.append(crawlerxpi) |
| 127 packager.createBuild(base_dir, outFile=crawlerxpi, releaseBuild=True) | 166 packager.createBuild(base_dir, outFile=crawlerxpi, releaseBuild=True) |
| 128 | 167 |
| 129 abpxpi = 'https://addons.mozilla.org/firefox/downloads/latest/1865/addon -1865-latest.xpi' | 168 abpxpi = 'https://addons.mozilla.org/firefox/downloads/latest/1865/addon -1865-latest.xpi' |
| (...skipping 67 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 197 BASE_DIR = os.path.dirname(os.path.abspath(__file__)) | 236 BASE_DIR = os.path.dirname(os.path.abspath(__file__)) |
| 198 DEPENDENCY_SCRIPT = os.path.join(BASE_DIR, "ensure_dependencies.py") | 237 DEPENDENCY_SCRIPT = os.path.join(BASE_DIR, "ensure_dependencies.py") |
| 199 | 238 |
| 200 try: | 239 try: |
| 201 subprocess.check_call([sys.executable, DEPENDENCY_SCRIPT, BASE_DIR]) | 240 subprocess.check_call([sys.executable, DEPENDENCY_SCRIPT, BASE_DIR]) |
| 202 except subprocess.CalledProcessError as e: | 241 except subprocess.CalledProcessError as e: |
| 203 print >>sys.stderr, e | 242 print >>sys.stderr, e |
| 204 print >>sys.stderr, "Failed to ensure dependencies being up-to-date!" | 243 print >>sys.stderr, "Failed to ensure dependencies being up-to-date!" |
| 205 | 244 |
| 206 run() | 245 run() |
| OLD | NEW |