LEFT | RIGHT |
1 # coding: utf-8 | 1 # coding: utf-8 |
2 | 2 |
3 # This file is part of the Adblock Plus web scripts, | 3 # This file is part of the Adblock Plus web scripts, |
4 # Copyright (C) 2006-2013 Eyeo GmbH | 4 # Copyright (C) 2006-2013 Eyeo GmbH |
5 # | 5 # |
6 # Adblock Plus is free software: you can redistribute it and/or modify | 6 # Adblock Plus is free software: you can redistribute it and/or modify |
7 # it under the terms of the GNU General Public License version 3 as | 7 # it under the terms of the GNU General Public License version 3 as |
8 # published by the Free Software Foundation. | 8 # published by the Free Software Foundation. |
9 # | 9 # |
10 # Adblock Plus is distributed in the hope that it will be useful, | 10 # Adblock Plus is distributed in the hope that it will be useful, |
(...skipping 439 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
450 data[info["month"]] = {} | 450 data[info["month"]] = {} |
451 section = data[info["month"]] | 451 section = data[info["month"]] |
452 | 452 |
453 if info["file"] not in section: | 453 if info["file"] not in section: |
454 section[info["file"]] = {} | 454 section[info["file"]] = {} |
455 section = section[info["file"]] | 455 section = section[info["file"]] |
456 | 456 |
457 add_record(info, section) | 457 add_record(info, section) |
458 return data | 458 return data |
459 | 459 |
460 def merge_objects(object1, object2): | 460 def merge_objects(object1, object2, factor=1): |
461 for key, value in object2.iteritems(): | 461 for key, value in object2.iteritems(): |
462 key = unicode(key) | 462 try: |
463 if key in object1: | 463 key = unicode(key) |
464 if isinstance(value, numbers.Number): | 464 except UnicodeDecodeError: |
465 object1[key] += value | 465 key = unicode(key, encoding="latin-1") |
466 else: | 466 if isinstance(value, numbers.Number): |
467 merge_objects(object1[key], value) | 467 object1[key] = object1.get(key, 0) + factor * value |
468 else: | 468 else: |
469 object1[key] = value | 469 merge_objects(object1.setdefault(key, {}), value, factor) |
470 | 470 |
471 def save_stats(server_type, data): | 471 def save_stats(server_type, data, factor=1): |
472 base_dir = os.path.join(get_config().get("stats", "dataDirectory"), common.fil
ename_encode(server_type)) | 472 base_dir = os.path.join(get_config().get("stats", "dataDirectory"), common.fil
ename_encode(server_type)) |
473 for month, month_data in data.iteritems(): | 473 for month, month_data in data.iteritems(): |
474 for name, file_data in month_data.iteritems(): | 474 for name, file_data in month_data.iteritems(): |
475 path = os.path.join(base_dir, common.filename_encode(month), common.filena
me_encode(name + ".json")) | 475 path = os.path.join(base_dir, common.filename_encode(month), common.filena
me_encode(name + ".json")) |
476 if os.path.exists(path): | 476 if os.path.exists(path): |
477 with codecs.open(path, "rb", encoding="utf-8") as fileobj: | 477 with codecs.open(path, "rb", encoding="utf-8") as fileobj: |
478 existing = json.load(fileobj) | 478 existing = json.load(fileobj) |
479 else: | 479 else: |
480 existing = {} | 480 existing = {} |
481 | 481 |
482 merge_objects(existing, file_data) | 482 merge_objects(existing, file_data, factor) |
483 | 483 |
484 dir = os.path.dirname(path) | 484 dir = os.path.dirname(path) |
485 try: | 485 try: |
486 os.makedirs(dir) | 486 os.makedirs(dir) |
487 except OSError, e: | 487 except OSError, e: |
488 if e.errno != errno.EEXIST: | 488 if e.errno != errno.EEXIST: |
489 raise | 489 raise |
490 | 490 |
491 with codecs.open(path, "wb", encoding="utf-8") as fileobj: | 491 with codecs.open(path, "wb", encoding="utf-8") as fileobj: |
492 json.dump(existing, fileobj, indent=2, sort_keys=True) | 492 json.dump(existing, fileobj, indent=2, sort_keys=True) |
493 | 493 |
494 def parse_source((mirror_name, server_type, log_file)): | 494 def parse_source((mirror_name, server_type, log_file)): |
495 try: | 495 try: |
496 geo = pygeoip.GeoIP(get_config().get("stats", "geoip_db"), pygeoip.MEMORY_CA
CHE) | 496 geo = pygeoip.GeoIP(get_config().get("stats", "geoip_db"), pygeoip.MEMORY_CA
CHE) |
497 geov6 = pygeoip.GeoIP(get_config().get("stats", "geoipv6_db"), pygeoip.MEMOR
Y_CACHE) | 497 geov6 = pygeoip.GeoIP(get_config().get("stats", "geoipv6_db"), pygeoip.MEMOR
Y_CACHE) |
498 | 498 |
499 ignored = set() | 499 ignored = set() |
500 fileobj = open_stats_file(log_file) | 500 fileobj = open_stats_file(log_file) |
501 try: | 501 try: |
502 data = parse_fileobj(mirror_name, fileobj, geo, geov6, ignored) | 502 data = parse_fileobj(mirror_name, fileobj, geo, geov6, ignored) |
503 finally: | 503 finally: |
504 fileobj.close() | 504 fileobj.close() |
505 return server_type, log_file, data, ignored | 505 return server_type, log_file, data, ignored |
506 except: | 506 except: |
507 print >>sys.stderr, "Unable to process log file '%s'" % log_file | 507 print >>sys.stderr, "Unable to process log file '%s'" % log_file |
508 traceback.print_exc() | 508 traceback.print_exc() |
509 return None, None, None, None | 509 return None, None, None, None |
510 | 510 |
511 def parse_sources(sources, verbose): | 511 def parse_sources(sources, factor=1, verbose=False): |
512 pool = multiprocessing.Pool() | 512 pool = multiprocessing.Pool() |
513 for server_type, log_file, data, ignored in pool.imap(parse_source, sources, c
hunksize=1): | 513 try: |
514 if server_type == None: | 514 for server_type, log_file, data, ignored in pool.imap(parse_source, sources,
chunksize=1): |
515 continue | 515 if server_type == None: |
516 | 516 continue |
517 save_stats(server_type, data) | 517 |
518 if verbose: | 518 save_stats(server_type, data, factor) |
519 print "Ignored files for %s" % log_file | 519 if verbose: |
520 print "============================================================" | 520 print "Ignored files for %s" % log_file |
521 print "\n".join(sorted(ignored)) | 521 print "============================================================" |
| 522 print "\n".join(sorted(ignored)) |
| 523 finally: |
| 524 pool.close() |
522 | 525 |
523 if __name__ == "__main__": | 526 if __name__ == "__main__": |
524 setupStderr() | 527 setupStderr() |
525 | 528 |
526 parser = argparse.ArgumentParser(description="Processes log files and merges t
hem into the stats database") | 529 parser = argparse.ArgumentParser(description="Processes log files and merges t
hem into the stats database") |
527 parser.add_argument("--verbose", dest="verbose", action="store_const", const=T
rue, default=False, help="Verbose mode, ignored requests will be listed") | 530 parser.add_argument("--verbose", dest="verbose", action="store_const", const=T
rue, default=False, help="Verbose mode, ignored requests will be listed") |
| 531 parser.add_argument("--revert", dest="factor", action="store_const", const=-1,
default=1, help="Remove log data from the database") |
528 parser.add_argument("mirror_name", nargs="?", help="Name of the mirror server
that the file belongs to") | 532 parser.add_argument("mirror_name", nargs="?", help="Name of the mirror server
that the file belongs to") |
529 parser.add_argument("server_type", nargs="?", help="Server type like download,
update or subscription") | 533 parser.add_argument("server_type", nargs="?", help="Server type like download,
update or subscription") |
530 parser.add_argument("log_file", nargs="?", help="Log file path, can be a local
file path, http:// or ssh:// URL") | 534 parser.add_argument("log_file", nargs="?", help="Log file path, can be a local
file path, http:// or ssh:// URL") |
531 args = parser.parse_args() | 535 args = parser.parse_args() |
532 | 536 |
533 if args.mirror_name and args.server_type and args.log_file: | 537 if args.mirror_name and args.server_type and args.log_file: |
534 sources = [(args.mirror_name, args.server_type, args.log_file)] | 538 sources = [(args.mirror_name, args.server_type, args.log_file)] |
535 else: | 539 else: |
536 sources = get_stats_files() | 540 sources = get_stats_files() |
537 parse_sources(sources, args.verbose) | 541 parse_sources(sources, args.factor, args.verbose) |
LEFT | RIGHT |