Index: run.py |
diff --git a/run.py b/run.py |
index 6661018b71c232bd2ba0e033bed09a83a7f051fa..f93b186d48c5156213d28546d946951d0fac49ba 100755 |
--- a/run.py |
+++ b/run.py |
@@ -84,39 +84,79 @@ class CrawlerApp: |
return '' |
-def run(): |
- parser = argparse.ArgumentParser(description='Run crawler') |
- parser.add_argument( |
- '-b', '--binary', type=str, |
- help='path to the Firefox binary' |
- ) |
- parser.add_argument( |
- '-a', '--abpdir', type=str, |
- help='path to the Adblock Plus repository' |
- ) |
- parser.add_argument( |
- '-f', '--filters', metavar='url', type=str, nargs='+', |
- default=["https://easylist-downloads.adblockplus.org/easylist.txt", "https://easylist-downloads.adblockplus.org/exceptionrules.txt"], |
- help='filter lists to install in Adblock Plus. The arguments can also have the format path=url, the data will be read from the specified path then.' |
- ) |
- parser.add_argument( |
- '-t', '--timeout', type=int, default=300, |
- help='Load timeout (seconds)' |
- ) |
- parser.add_argument( |
- '-x', '--maxtabs', type=int, default=15, |
- help='Maximal number of tabs to open in parallel' |
- ) |
- parser.add_argument( |
- 'list', type=str, |
- help='URL list to process' |
- ) |
- parser.add_argument( |
- 'outdir', type=str, |
- help='directory to write data into' |
- ) |
- parameters = parser.parse_args() |
+def read_as_json(file_path): |
+ with open(file_path, mode='r') as json_file: |
+ return json.load(json_file, encoding='UTF-8') |
+ |
+ |
+class Parameters: |
+ """This class loads config file and parses command line parameters. |
+ Values are stored in attibutes of this class instance. |
+ """ |
+ def __init__(self): |
+ cli_parameters = vars(Parameters._parse_command_line()) |
+ config_parameters = Parameters._load_config(cli_parameters["config"]) |
+ for field in cli_parameters.keys(): |
+ value = cli_parameters[field] |
+ if value is None: |
+ value = config_parameters.get(field) |
+ setattr(self, field, value) |
+ |
+ @staticmethod |
+ def _parse_command_line(): |
+ parser = argparse.ArgumentParser(description='Run crawler') |
+ parser.add_argument( |
+ '-c', '--config', type=str, |
+ help='path to config file, example is config.json.example' |
+ ) |
+ parser.add_argument( |
+ '-b', '--binary', type=str, |
+ help='path to the Firefox binary' |
+ ) |
+ parser.add_argument( |
+ '-a', '--abpdir', type=str, |
+ help='path to the Adblock Plus repository' |
+ ) |
+ parser.add_argument( |
+ '-f', '--filters', metavar='url', type=str, nargs='+', |
+ help='filter lists to install in Adblock Plus. The arguments can also have the format path=url, the data will be read from the specified path then.' |
+ ) |
+ parser.add_argument( |
+ '-t', '--timeout', type=int, |
+ help='Load timeout (seconds)' |
+ ) |
+ parser.add_argument( |
+ '-x', '--maxtabs', type=int, |
+ help='Maximal number of tabs to open in parallel' |
+ ) |
+ parser.add_argument( |
+ '-l', '--list', type=str, |
+ help='URL list to process', |
+ ) |
+ parser.add_argument( |
+ '-o', '--outdir', type=str, |
+ help='directory to write data into', |
+ ) |
+ parameters = parser.parse_args() |
+ return parameters |
+ |
+ @staticmethod |
+ def _load_config(config_file_path): |
+ config = { |
+ "filters": [ |
+ "https://easylist-downloads.adblockplus.org/easylist.txt", |
+ "https://easylist-downloads.adblockplus.org/exceptionrules.txt" |
+ ], |
+ "timeout": 300, |
+ "maxtabs": 15 |
+ } |
+ if config_file_path is not None: |
+ config.update(read_as_json(config_file_path)) |
+ return config |
+ |
+def run(): |
+ parameters = Parameters() |
import buildtools.packagerGecko as packager |
cleanup = [] |
try: |