| OLD | NEW |
| 1 # coding: utf-8 | 1 # coding: utf-8 |
| 2 | 2 |
| 3 # This file is part of the Adblock Plus web scripts, | 3 # This file is part of the Adblock Plus web scripts, |
| 4 # Copyright (C) 2006-2014 Eyeo GmbH | 4 # Copyright (C) 2006-2014 Eyeo GmbH |
| 5 # | 5 # |
| 6 # Adblock Plus is free software: you can redistribute it and/or modify | 6 # Adblock Plus is free software: you can redistribute it and/or modify |
| 7 # it under the terms of the GNU General Public License version 3 as | 7 # it under the terms of the GNU General Public License version 3 as |
| 8 # published by the Free Software Foundation. | 8 # published by the Free Software Foundation. |
| 9 # | 9 # |
| 10 # Adblock Plus is distributed in the hope that it will be useful, | 10 # Adblock Plus is distributed in the hope that it will be useful, |
| (...skipping 354 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 365 applicationVersion = re.sub(r"^(\d+\.\d+).*", r"\1", applicationVersion) | 365 applicationVersion = re.sub(r"^(\d+\.\d+).*", r"\1", applicationVersion) |
| 366 | 366 |
| 367 return version, application, applicationVersion | 367 return version, application, applicationVersion |
| 368 | 368 |
| 369 def parse_update_flag(query): | 369 def parse_update_flag(query): |
| 370 return "update" if query == "update" else "install" | 370 return "update" if query == "update" else "install" |
| 371 | 371 |
| 372 def parse_record(line, ignored, geo, geov6): | 372 def parse_record(line, ignored, geo, geov6): |
| 373 global log_regexp | 373 global log_regexp |
| 374 if log_regexp == None: | 374 if log_regexp == None: |
| 375 log_regexp = re.compile(r'(\S+) \S+ \S+ \[([^]\s]+) ([+\-]\d\d)(\d\d)\] "GET
([^"\s]+) [^"]+" (\d+) (\d+) "[^"]*" "([^"]*)"(?: "[^"]*" \S+ "[^"]*" "[^"]*" "
([^"]*)")?') | 375 log_regexp = re.compile(r'(\S+) \S+ \S+ \[([^]\s]+) ([+\-]\d\d)(\d\d)\] "GET
([^"\s]+) [^"]+" (\d+) (\d+) "([^"]*)" "([^"]*)"(?: "[^"]*" \S+ "[^"]*" "[^"]*"
"([^"]*)")?') |
| 376 | 376 |
| 377 match = re.search(log_regexp, line) | 377 match = re.search(log_regexp, line) |
| 378 if not match: | 378 if not match: |
| 379 return None | 379 return None |
| 380 | 380 |
| 381 status = int(match.group(6)) | 381 status = int(match.group(6)) |
| 382 if status != 200: | 382 if status != 200: |
| 383 return None | 383 return None |
| 384 | 384 |
| 385 info = { | 385 info = { |
| 386 "size": int(match.group(7)), | 386 "size": int(match.group(7)), |
| 387 } | 387 } |
| 388 | 388 |
| 389 info["ip"], info["country"] = process_ip(match.group(1), geo, geov6) | 389 info["ip"], info["country"] = process_ip(match.group(1), geo, geov6) |
| 390 info["time"], info["month"], info["day"], info["weekday"], info["hour"] = pars
e_time(match.group(2), int(match.group(3)), int(match.group(4))) | 390 info["time"], info["month"], info["day"], info["weekday"], info["hour"] = pars
e_time(match.group(2), int(match.group(3)), int(match.group(4))) |
| 391 info["file"], info["query"] = parse_path(match.group(5)) | 391 info["file"], info["query"] = parse_path(match.group(5)) |
| 392 info["ua"], info["uaversion"] = parse_ua(match.group(8)) | 392 info["referrer"] = match.group(8) |
| 393 info["ua"], info["uaversion"] = parse_ua(match.group(9)) |
| 393 info["fullua"] = "%s %s" % (info["ua"], info["uaversion"]) | 394 info["fullua"] = "%s %s" % (info["ua"], info["uaversion"]) |
| 394 info["clientid"] = match.group(9) | 395 info["clientid"] = match.group(10) |
| 395 | 396 |
| 396 # Additional metadata depends on file type | 397 # Additional metadata depends on file type |
| 397 filename = os.path.basename(info["file"]) | 398 filename = os.path.basename(info["file"]) |
| 398 ext = os.path.splitext(filename)[1] | 399 ext = os.path.splitext(filename)[1] |
| 399 if ext == ".txt" or filename == "update.json" or filename == "notification.jso
n": | 400 if ext == ".txt" or filename == "update.json" or filename == "notification.jso
n": |
| 400 # Subscription downloads, libadblockplus update checks and notification | 401 # Subscription downloads, libadblockplus update checks and notification |
| 401 # checks are performed by the downloader | 402 # checks are performed by the downloader |
| 402 parse_downloader_query(info) | 403 parse_downloader_query(info) |
| 403 elif ext == ".tpl": | 404 elif ext == ".tpl": |
| 404 # MSIE TPL download, no additional data here | 405 # MSIE TPL download, no additional data here |
| (...skipping 140 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 545 parser.add_argument("mirror_name", nargs="?", help="Name of the mirror server
that the file belongs to") | 546 parser.add_argument("mirror_name", nargs="?", help="Name of the mirror server
that the file belongs to") |
| 546 parser.add_argument("server_type", nargs="?", help="Server type like download,
update or subscription") | 547 parser.add_argument("server_type", nargs="?", help="Server type like download,
update or subscription") |
| 547 parser.add_argument("log_file", nargs="?", help="Log file path, can be a local
file path, http:// or ssh:// URL") | 548 parser.add_argument("log_file", nargs="?", help="Log file path, can be a local
file path, http:// or ssh:// URL") |
| 548 args = parser.parse_args() | 549 args = parser.parse_args() |
| 549 | 550 |
| 550 if args.mirror_name and args.server_type and args.log_file: | 551 if args.mirror_name and args.server_type and args.log_file: |
| 551 sources = [(args.mirror_name, args.server_type, args.log_file)] | 552 sources = [(args.mirror_name, args.server_type, args.log_file)] |
| 552 else: | 553 else: |
| 553 sources = get_stats_files() | 554 sources = get_stats_files() |
| 554 parse_sources(sources, args.factor, args.verbose) | 555 parse_sources(sources, args.factor, args.verbose) |
| OLD | NEW |