| Index: run.py |
| diff --git a/run.py b/run.py |
| index 6661018b71c232bd2ba0e033bed09a83a7f051fa..f93b186d48c5156213d28546d946951d0fac49ba 100755 |
| --- a/run.py |
| +++ b/run.py |
| @@ -84,39 +84,79 @@ class CrawlerApp: |
| return '' |
| -def run(): |
| - parser = argparse.ArgumentParser(description='Run crawler') |
| - parser.add_argument( |
| - '-b', '--binary', type=str, |
| - help='path to the Firefox binary' |
| - ) |
| - parser.add_argument( |
| - '-a', '--abpdir', type=str, |
| - help='path to the Adblock Plus repository' |
| - ) |
| - parser.add_argument( |
| - '-f', '--filters', metavar='url', type=str, nargs='+', |
| - default=["https://easylist-downloads.adblockplus.org/easylist.txt", "https://easylist-downloads.adblockplus.org/exceptionrules.txt"], |
| - help='filter lists to install in Adblock Plus. The arguments can also have the format path=url, the data will be read from the specified path then.' |
| - ) |
| - parser.add_argument( |
| - '-t', '--timeout', type=int, default=300, |
| - help='Load timeout (seconds)' |
| - ) |
| - parser.add_argument( |
| - '-x', '--maxtabs', type=int, default=15, |
| - help='Maximal number of tabs to open in parallel' |
| - ) |
| - parser.add_argument( |
| - 'list', type=str, |
| - help='URL list to process' |
| - ) |
| - parser.add_argument( |
| - 'outdir', type=str, |
| - help='directory to write data into' |
| - ) |
| - parameters = parser.parse_args() |
| +def read_as_json(file_path): |
| + with open(file_path, mode='r') as json_file: |
| + return json.load(json_file, encoding='UTF-8') |
| + |
| + |
| +class Parameters: |
| + """This class loads config file and parses command line parameters. |
| + Values are stored in attibutes of this class instance. |
| + """ |
| + def __init__(self): |
| + cli_parameters = vars(Parameters._parse_command_line()) |
| + config_parameters = Parameters._load_config(cli_parameters["config"]) |
| + for field in cli_parameters.keys(): |
| + value = cli_parameters[field] |
| + if value is None: |
| + value = config_parameters.get(field) |
| + setattr(self, field, value) |
| + |
| + @staticmethod |
| + def _parse_command_line(): |
| + parser = argparse.ArgumentParser(description='Run crawler') |
| + parser.add_argument( |
| + '-c', '--config', type=str, |
| + help='path to config file, example is config.json.example' |
| + ) |
| + parser.add_argument( |
| + '-b', '--binary', type=str, |
| + help='path to the Firefox binary' |
| + ) |
| + parser.add_argument( |
| + '-a', '--abpdir', type=str, |
| + help='path to the Adblock Plus repository' |
| + ) |
| + parser.add_argument( |
| + '-f', '--filters', metavar='url', type=str, nargs='+', |
| + help='filter lists to install in Adblock Plus. The arguments can also have the format path=url, the data will be read from the specified path then.' |
| + ) |
| + parser.add_argument( |
| + '-t', '--timeout', type=int, |
| + help='Load timeout (seconds)' |
| + ) |
| + parser.add_argument( |
| + '-x', '--maxtabs', type=int, |
| + help='Maximal number of tabs to open in parallel' |
| + ) |
| + parser.add_argument( |
| + '-l', '--list', type=str, |
| + help='URL list to process', |
| + ) |
| + parser.add_argument( |
| + '-o', '--outdir', type=str, |
| + help='directory to write data into', |
| + ) |
| + parameters = parser.parse_args() |
| + return parameters |
| + |
| + @staticmethod |
| + def _load_config(config_file_path): |
| + config = { |
| + "filters": [ |
| + "https://easylist-downloads.adblockplus.org/easylist.txt", |
| + "https://easylist-downloads.adblockplus.org/exceptionrules.txt" |
| + ], |
| + "timeout": 300, |
| + "maxtabs": 15 |
| + } |
| + if config_file_path is not None: |
| + config.update(read_as_json(config_file_path)) |
| + return config |
| + |
| +def run(): |
| + parameters = Parameters() |
| import buildtools.packagerGecko as packager |
| cleanup = [] |
| try: |