Left: | ||
Right: |
OLD | NEW |
---|---|
1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
2 # coding: utf-8 | 2 # coding: utf-8 |
3 | 3 |
4 import argparse | 4 import argparse |
5 import datetime | 5 import datetime |
6 import errno | 6 import errno |
7 import hashlib | 7 import hashlib |
8 import io | 8 import io |
9 import json | 9 import json |
10 import os | 10 import os |
(...skipping 66 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
77 | 77 |
78 with io.open(datapath, 'w', encoding='utf-8') as handle: | 78 with io.open(datapath, 'w', encoding='utf-8') as handle: |
79 handle.write(unicode(json.dumps(data, indent=2, ensure_ascii=Fal se, sort_keys=True)) + u'\n') | 79 handle.write(unicode(json.dumps(data, indent=2, ensure_ascii=Fal se, sort_keys=True)) + u'\n') |
80 start_response('204 No Content', []) | 80 start_response('204 No Content', []) |
81 return '' | 81 return '' |
82 | 82 |
83 start_response('404 Not Found', []) | 83 start_response('404 Not Found', []) |
84 return '' | 84 return '' |
85 | 85 |
86 | 86 |
87 def read_as_json(file_path): | |
88 with open(file_path, mode='r') as json_file: | |
89 return json.load(json_file, encoding='UTF-8') | |
90 | |
91 | |
92 class Parameters: | |
93 """This class loads config file and parses command line parameters. | |
94 Values are stored in attibutes of this class instance. | |
95 """ | |
96 def __init__(self): | |
97 cli_parameters = vars(Parameters._parse_command_line()) | |
98 config_parameters = Parameters._load_config(cli_parameters["config"]) | |
99 for field in cli_parameters.keys(): | |
100 config_value = config_parameters[field] if field in config_parameter s else None | |
101 value = cli_parameters[field] if cli_parameters[field] is not None e lse config_value | |
tschuster
2016/11/21 18:07:55
Can't you just write:
value = cli_parameters[field
sergei
2016/11/22 09:23:31
Changed. Now lines are not so long and you are rig
| |
102 setattr(self, field, value) | |
103 | |
104 @staticmethod | |
105 def _parse_command_line(): | |
106 parser = argparse.ArgumentParser(description='Run crawler') | |
107 parser.add_argument( | |
108 '-c', '--config', type=str, | |
109 help='path to config file, example is config.json.example' | |
110 ) | |
111 parser.add_argument( | |
112 '-b', '--binary', type=str, | |
113 help='path to the Firefox binary' | |
114 ) | |
115 parser.add_argument( | |
116 '-a', '--abpdir', type=str, | |
117 help='path to the Adblock Plus repository' | |
118 ) | |
119 parser.add_argument( | |
120 '-f', '--filters', metavar='url', type=str, nargs='+', | |
121 help='filter lists to install in Adblock Plus. The arguments can als o have the format path=url, the data will be read from the specified path then.' | |
122 ) | |
123 parser.add_argument( | |
124 '-t', '--timeout', type=int, | |
125 help='Load timeout (seconds)' | |
126 ) | |
127 parser.add_argument( | |
128 '-x', '--maxtabs', type=int, | |
129 help='Maximal number of tabs to open in parallel' | |
130 ) | |
131 parser.add_argument( | |
132 '-l', '--list', type=str, | |
133 help='URL list to process', | |
134 ) | |
135 parser.add_argument( | |
136 '-o', '--outdir', type=str, | |
137 help='directory to write data into', | |
138 ) | |
139 parameters = parser.parse_args() | |
140 return parameters | |
141 | |
142 @staticmethod | |
143 def _load_config(config_file_path): | |
144 config = { | |
145 "filters": [ | |
146 "https://easylist-downloads.adblockplus.org/easylist.txt", | |
147 "https://easylist-downloads.adblockplus.org/exceptionrules.txt" | |
148 ], | |
149 "timeout": 300, | |
150 "maxtabs": 15 | |
151 } | |
152 if config_file_path is not None: | |
153 config.update(read_as_json(config_file_path)) | |
154 return config | |
155 | |
156 | |
87 def run(): | 157 def run(): |
88 parser = argparse.ArgumentParser(description='Run crawler') | 158 parameters = Parameters() |
89 parser.add_argument( | |
90 '-b', '--binary', type=str, | |
91 help='path to the Firefox binary' | |
92 ) | |
93 parser.add_argument( | |
94 '-a', '--abpdir', type=str, | |
95 help='path to the Adblock Plus repository' | |
96 ) | |
97 parser.add_argument( | |
98 '-f', '--filters', metavar='url', type=str, nargs='+', | |
99 default=["https://easylist-downloads.adblockplus.org/easylist.txt", "htt ps://easylist-downloads.adblockplus.org/exceptionrules.txt"], | |
100 help='filter lists to install in Adblock Plus. The arguments can also ha ve the format path=url, the data will be read from the specified path then.' | |
101 ) | |
102 parser.add_argument( | |
103 '-t', '--timeout', type=int, default=300, | |
104 help='Load timeout (seconds)' | |
105 ) | |
106 parser.add_argument( | |
107 '-x', '--maxtabs', type=int, default=15, | |
108 help='Maximal number of tabs to open in parallel' | |
109 ) | |
110 parser.add_argument( | |
111 'list', type=str, | |
112 help='URL list to process' | |
113 ) | |
114 parser.add_argument( | |
115 'outdir', type=str, | |
116 help='directory to write data into' | |
117 ) | |
118 parameters = parser.parse_args() | |
119 | |
120 import buildtools.packagerGecko as packager | 159 import buildtools.packagerGecko as packager |
121 cleanup = [] | 160 cleanup = [] |
122 try: | 161 try: |
123 base_dir = os.path.dirname(os.path.abspath(__file__)) | 162 base_dir = os.path.dirname(os.path.abspath(__file__)) |
124 handle, crawlerxpi = tempfile.mkstemp(suffix='.xpi') | 163 handle, crawlerxpi = tempfile.mkstemp(suffix='.xpi') |
125 os.close(handle) | 164 os.close(handle) |
126 cleanup.append(crawlerxpi) | 165 cleanup.append(crawlerxpi) |
127 packager.createBuild(base_dir, outFile=crawlerxpi, releaseBuild=True) | 166 packager.createBuild(base_dir, outFile=crawlerxpi, releaseBuild=True) |
128 | 167 |
129 abpxpi = 'https://addons.mozilla.org/firefox/downloads/latest/1865/addon -1865-latest.xpi' | 168 abpxpi = 'https://addons.mozilla.org/firefox/downloads/latest/1865/addon -1865-latest.xpi' |
(...skipping 67 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
197 BASE_DIR = os.path.dirname(os.path.abspath(__file__)) | 236 BASE_DIR = os.path.dirname(os.path.abspath(__file__)) |
198 DEPENDENCY_SCRIPT = os.path.join(BASE_DIR, "ensure_dependencies.py") | 237 DEPENDENCY_SCRIPT = os.path.join(BASE_DIR, "ensure_dependencies.py") |
199 | 238 |
200 try: | 239 try: |
201 subprocess.check_call([sys.executable, DEPENDENCY_SCRIPT, BASE_DIR]) | 240 subprocess.check_call([sys.executable, DEPENDENCY_SCRIPT, BASE_DIR]) |
202 except subprocess.CalledProcessError as e: | 241 except subprocess.CalledProcessError as e: |
203 print >>sys.stderr, e | 242 print >>sys.stderr, e |
204 print >>sys.stderr, "Failed to ensure dependencies being up-to-date!" | 243 print >>sys.stderr, "Failed to ensure dependencies being up-to-date!" |
205 | 244 |
206 run() | 245 run() |
OLD | NEW |