Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Side by Side Diff: run.py

Issue 29356103: Issue 3943 - add support of configuration file
Patch Set: address comments Created Nov. 22, 2016, 9:20 a.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View unified diff | Download patch
« no previous file with comments | « config.json.example ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 #!/usr/bin/env python 1 #!/usr/bin/env python
2 # coding: utf-8 2 # coding: utf-8
3 3
4 import argparse 4 import argparse
5 import datetime 5 import datetime
6 import errno 6 import errno
7 import hashlib 7 import hashlib
8 import io 8 import io
9 import json 9 import json
10 import os 10 import os
(...skipping 66 matching lines...) Expand 10 before | Expand all | Expand 10 after
77 77
78 with io.open(datapath, 'w', encoding='utf-8') as handle: 78 with io.open(datapath, 'w', encoding='utf-8') as handle:
79 handle.write(unicode(json.dumps(data, indent=2, ensure_ascii=Fal se, sort_keys=True)) + u'\n') 79 handle.write(unicode(json.dumps(data, indent=2, ensure_ascii=Fal se, sort_keys=True)) + u'\n')
80 start_response('204 No Content', []) 80 start_response('204 No Content', [])
81 return '' 81 return ''
82 82
83 start_response('404 Not Found', []) 83 start_response('404 Not Found', [])
84 return '' 84 return ''
85 85
86 86
87 def read_as_json(file_path):
88 with open(file_path, mode='r') as json_file:
89 return json.load(json_file, encoding='UTF-8')
90
91
92 class Parameters:
93 """This class loads config file and parses command line parameters.
94 Values are stored in attibutes of this class instance.
95 """
96 def __init__(self):
97 cli_parameters = vars(Parameters._parse_command_line())
98 config_parameters = Parameters._load_config(cli_parameters["config"])
99 for field in cli_parameters.keys():
100 value = cli_parameters[field]
101 if value is None:
102 value = config_parameters.get(field)
103 setattr(self, field, value)
104
105 @staticmethod
106 def _parse_command_line():
107 parser = argparse.ArgumentParser(description='Run crawler')
108 parser.add_argument(
109 '-c', '--config', type=str,
110 help='path to config file, example is config.json.example'
111 )
112 parser.add_argument(
113 '-b', '--binary', type=str,
114 help='path to the Firefox binary'
115 )
116 parser.add_argument(
117 '-a', '--abpdir', type=str,
118 help='path to the Adblock Plus repository'
119 )
120 parser.add_argument(
121 '-f', '--filters', metavar='url', type=str, nargs='+',
122 help='filter lists to install in Adblock Plus. The arguments can als o have the format path=url, the data will be read from the specified path then.'
123 )
124 parser.add_argument(
125 '-t', '--timeout', type=int,
126 help='Load timeout (seconds)'
127 )
128 parser.add_argument(
129 '-x', '--maxtabs', type=int,
130 help='Maximal number of tabs to open in parallel'
131 )
132 parser.add_argument(
133 '-l', '--list', type=str,
134 help='URL list to process',
135 )
136 parser.add_argument(
137 '-o', '--outdir', type=str,
138 help='directory to write data into',
139 )
140 parameters = parser.parse_args()
141 return parameters
142
143 @staticmethod
144 def _load_config(config_file_path):
145 config = {
146 "filters": [
147 "https://easylist-downloads.adblockplus.org/easylist.txt",
148 "https://easylist-downloads.adblockplus.org/exceptionrules.txt"
149 ],
150 "timeout": 300,
151 "maxtabs": 15
152 }
153 if config_file_path is not None:
154 config.update(read_as_json(config_file_path))
155 return config
156
157
87 def run(): 158 def run():
88 parser = argparse.ArgumentParser(description='Run crawler') 159 parameters = Parameters()
89 parser.add_argument(
90 '-b', '--binary', type=str,
91 help='path to the Firefox binary'
92 )
93 parser.add_argument(
94 '-a', '--abpdir', type=str,
95 help='path to the Adblock Plus repository'
96 )
97 parser.add_argument(
98 '-f', '--filters', metavar='url', type=str, nargs='+',
99 default=["https://easylist-downloads.adblockplus.org/easylist.txt", "htt ps://easylist-downloads.adblockplus.org/exceptionrules.txt"],
100 help='filter lists to install in Adblock Plus. The arguments can also ha ve the format path=url, the data will be read from the specified path then.'
101 )
102 parser.add_argument(
103 '-t', '--timeout', type=int, default=300,
104 help='Load timeout (seconds)'
105 )
106 parser.add_argument(
107 '-x', '--maxtabs', type=int, default=15,
108 help='Maximal number of tabs to open in parallel'
109 )
110 parser.add_argument(
111 'list', type=str,
112 help='URL list to process'
113 )
114 parser.add_argument(
115 'outdir', type=str,
116 help='directory to write data into'
117 )
118 parameters = parser.parse_args()
119
120 import buildtools.packagerGecko as packager 160 import buildtools.packagerGecko as packager
121 cleanup = [] 161 cleanup = []
122 try: 162 try:
123 base_dir = os.path.dirname(os.path.abspath(__file__)) 163 base_dir = os.path.dirname(os.path.abspath(__file__))
124 handle, crawlerxpi = tempfile.mkstemp(suffix='.xpi') 164 handle, crawlerxpi = tempfile.mkstemp(suffix='.xpi')
125 os.close(handle) 165 os.close(handle)
126 cleanup.append(crawlerxpi) 166 cleanup.append(crawlerxpi)
127 packager.createBuild(base_dir, outFile=crawlerxpi, releaseBuild=True) 167 packager.createBuild(base_dir, outFile=crawlerxpi, releaseBuild=True)
128 168
129 abpxpi = 'https://addons.mozilla.org/firefox/downloads/latest/1865/addon -1865-latest.xpi' 169 abpxpi = 'https://addons.mozilla.org/firefox/downloads/latest/1865/addon -1865-latest.xpi'
(...skipping 67 matching lines...) Expand 10 before | Expand all | Expand 10 after
197 BASE_DIR = os.path.dirname(os.path.abspath(__file__)) 237 BASE_DIR = os.path.dirname(os.path.abspath(__file__))
198 DEPENDENCY_SCRIPT = os.path.join(BASE_DIR, "ensure_dependencies.py") 238 DEPENDENCY_SCRIPT = os.path.join(BASE_DIR, "ensure_dependencies.py")
199 239
200 try: 240 try:
201 subprocess.check_call([sys.executable, DEPENDENCY_SCRIPT, BASE_DIR]) 241 subprocess.check_call([sys.executable, DEPENDENCY_SCRIPT, BASE_DIR])
202 except subprocess.CalledProcessError as e: 242 except subprocess.CalledProcessError as e:
203 print >>sys.stderr, e 243 print >>sys.stderr, e
204 print >>sys.stderr, "Failed to ensure dependencies being up-to-date!" 244 print >>sys.stderr, "Failed to ensure dependencies being up-to-date!"
205 245
206 run() 246 run()
OLDNEW
« no previous file with comments | « config.json.example ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld