Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Side by Side Diff: run.py

Issue 29356103: Issue 3943 - add support of configuration file
Patch Set: Created Oct. 6, 2016, 1:03 p.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View unified diff | Download patch
« no previous file with comments | « config.json.example ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 #!/usr/bin/env python 1 #!/usr/bin/env python
2 # coding: utf-8 2 # coding: utf-8
3 3
4 import argparse 4 import argparse
5 import datetime 5 import datetime
6 import errno 6 import errno
7 import hashlib 7 import hashlib
8 import io 8 import io
9 import json 9 import json
10 import os 10 import os
(...skipping 66 matching lines...) Expand 10 before | Expand all | Expand 10 after
77 77
78 with io.open(datapath, 'w', encoding='utf-8') as handle: 78 with io.open(datapath, 'w', encoding='utf-8') as handle:
79 handle.write(unicode(json.dumps(data, indent=2, ensure_ascii=Fal se, sort_keys=True)) + u'\n') 79 handle.write(unicode(json.dumps(data, indent=2, ensure_ascii=Fal se, sort_keys=True)) + u'\n')
80 start_response('204 No Content', []) 80 start_response('204 No Content', [])
81 return '' 81 return ''
82 82
83 start_response('404 Not Found', []) 83 start_response('404 Not Found', [])
84 return '' 84 return ''
85 85
86 86
87 def read_as_json(file_path):
88 with open(file_path, mode='r') as json_file:
89 return json.load(json_file, encoding='UTF-8')
90
91
92 class Parameters:
93 """This class loads config file and parses command line parameters.
94 Values are stored in attibutes of this class instance.
95 """
96 def __init__(self):
97 cli_parameters = vars(Parameters._parse_command_line())
98 config_parameters = Parameters._load_config(cli_parameters["config"])
99 for field in cli_parameters.keys():
100 config_value = config_parameters[field] if field in config_parameter s else None
101 value = cli_parameters[field] if cli_parameters[field] is not None e lse config_value
tschuster 2016/11/21 18:07:55 Can't you just write: value = cli_parameters[field
sergei 2016/11/22 09:23:31 Changed. Now lines are not so long and you are rig
102 setattr(self, field, value)
103
104 @staticmethod
105 def _parse_command_line():
106 parser = argparse.ArgumentParser(description='Run crawler')
107 parser.add_argument(
108 '-c', '--config', type=str,
109 help='path to config file, example is config.json.example'
110 )
111 parser.add_argument(
112 '-b', '--binary', type=str,
113 help='path to the Firefox binary'
114 )
115 parser.add_argument(
116 '-a', '--abpdir', type=str,
117 help='path to the Adblock Plus repository'
118 )
119 parser.add_argument(
120 '-f', '--filters', metavar='url', type=str, nargs='+',
121 help='filter lists to install in Adblock Plus. The arguments can als o have the format path=url, the data will be read from the specified path then.'
122 )
123 parser.add_argument(
124 '-t', '--timeout', type=int,
125 help='Load timeout (seconds)'
126 )
127 parser.add_argument(
128 '-x', '--maxtabs', type=int,
129 help='Maximal number of tabs to open in parallel'
130 )
131 parser.add_argument(
132 '-l', '--list', type=str,
133 help='URL list to process',
134 )
135 parser.add_argument(
136 '-o', '--outdir', type=str,
137 help='directory to write data into',
138 )
139 parameters = parser.parse_args()
140 return parameters
141
142 @staticmethod
143 def _load_config(config_file_path):
144 config = {
145 "filters": [
146 "https://easylist-downloads.adblockplus.org/easylist.txt",
147 "https://easylist-downloads.adblockplus.org/exceptionrules.txt"
148 ],
149 "timeout": 300,
150 "maxtabs": 15
151 }
152 if config_file_path is not None:
153 config.update(read_as_json(config_file_path))
154 return config
155
156
87 def run(): 157 def run():
88 parser = argparse.ArgumentParser(description='Run crawler') 158 parameters = Parameters()
89 parser.add_argument(
90 '-b', '--binary', type=str,
91 help='path to the Firefox binary'
92 )
93 parser.add_argument(
94 '-a', '--abpdir', type=str,
95 help='path to the Adblock Plus repository'
96 )
97 parser.add_argument(
98 '-f', '--filters', metavar='url', type=str, nargs='+',
99 default=["https://easylist-downloads.adblockplus.org/easylist.txt", "htt ps://easylist-downloads.adblockplus.org/exceptionrules.txt"],
100 help='filter lists to install in Adblock Plus. The arguments can also ha ve the format path=url, the data will be read from the specified path then.'
101 )
102 parser.add_argument(
103 '-t', '--timeout', type=int, default=300,
104 help='Load timeout (seconds)'
105 )
106 parser.add_argument(
107 '-x', '--maxtabs', type=int, default=15,
108 help='Maximal number of tabs to open in parallel'
109 )
110 parser.add_argument(
111 'list', type=str,
112 help='URL list to process'
113 )
114 parser.add_argument(
115 'outdir', type=str,
116 help='directory to write data into'
117 )
118 parameters = parser.parse_args()
119
120 import buildtools.packagerGecko as packager 159 import buildtools.packagerGecko as packager
121 cleanup = [] 160 cleanup = []
122 try: 161 try:
123 base_dir = os.path.dirname(os.path.abspath(__file__)) 162 base_dir = os.path.dirname(os.path.abspath(__file__))
124 handle, crawlerxpi = tempfile.mkstemp(suffix='.xpi') 163 handle, crawlerxpi = tempfile.mkstemp(suffix='.xpi')
125 os.close(handle) 164 os.close(handle)
126 cleanup.append(crawlerxpi) 165 cleanup.append(crawlerxpi)
127 packager.createBuild(base_dir, outFile=crawlerxpi, releaseBuild=True) 166 packager.createBuild(base_dir, outFile=crawlerxpi, releaseBuild=True)
128 167
129 abpxpi = 'https://addons.mozilla.org/firefox/downloads/latest/1865/addon -1865-latest.xpi' 168 abpxpi = 'https://addons.mozilla.org/firefox/downloads/latest/1865/addon -1865-latest.xpi'
(...skipping 67 matching lines...) Expand 10 before | Expand all | Expand 10 after
197 BASE_DIR = os.path.dirname(os.path.abspath(__file__)) 236 BASE_DIR = os.path.dirname(os.path.abspath(__file__))
198 DEPENDENCY_SCRIPT = os.path.join(BASE_DIR, "ensure_dependencies.py") 237 DEPENDENCY_SCRIPT = os.path.join(BASE_DIR, "ensure_dependencies.py")
199 238
200 try: 239 try:
201 subprocess.check_call([sys.executable, DEPENDENCY_SCRIPT, BASE_DIR]) 240 subprocess.check_call([sys.executable, DEPENDENCY_SCRIPT, BASE_DIR])
202 except subprocess.CalledProcessError as e: 241 except subprocess.CalledProcessError as e:
203 print >>sys.stderr, e 242 print >>sys.stderr, e
204 print >>sys.stderr, "Failed to ensure dependencies being up-to-date!" 243 print >>sys.stderr, "Failed to ensure dependencies being up-to-date!"
205 244
206 run() 245 run()
OLDNEW
« no previous file with comments | « config.json.example ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld