Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Side by Side Diff: sitescripts/stats/bin/logprocessor.py

Issue 6312256635666432: Issue 1426 - Don`t ignore redirects during stats processing (Closed)
Patch Set: Created Sept. 22, 2014, 10:50 a.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | sitescripts/stats/common.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 # coding: utf-8 1 # coding: utf-8
2 2
3 # This file is part of the Adblock Plus web scripts, 3 # This file is part of the Adblock Plus web scripts,
4 # Copyright (C) 2006-2014 Eyeo GmbH 4 # Copyright (C) 2006-2014 Eyeo GmbH
5 # 5 #
6 # Adblock Plus is free software: you can redistribute it and/or modify 6 # Adblock Plus is free software: you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License version 3 as 7 # it under the terms of the GNU General Public License version 3 as
8 # published by the Free Software Foundation. 8 # published by the Free Software Foundation.
9 # 9 #
10 # Adblock Plus is distributed in the hope that it will be useful, 10 # Adblock Plus is distributed in the hope that it will be useful,
(...skipping 361 matching lines...) Expand 10 before | Expand all | Expand 10 after
372 def parse_record(line, ignored, geo, geov6): 372 def parse_record(line, ignored, geo, geov6):
373 global log_regexp 373 global log_regexp
374 if log_regexp == None: 374 if log_regexp == None:
375 log_regexp = re.compile(r'(\S+) \S+ \S+ \[([^]\s]+) ([+\-]\d\d)(\d\d)\] "GET ([^"\s]+) [^"]+" (\d+) (\d+) "([^"]*)" "([^"]*)"(?: "[^"]*" \S+ "[^"]*" "[^"]*" "([^"]*)")?') 375 log_regexp = re.compile(r'(\S+) \S+ \S+ \[([^]\s]+) ([+\-]\d\d)(\d\d)\] "GET ([^"\s]+) [^"]+" (\d+) (\d+) "([^"]*)" "([^"]*)"(?: "[^"]*" \S+ "[^"]*" "[^"]*" "([^"]*)")?')
376 376
377 match = re.search(log_regexp, line) 377 match = re.search(log_regexp, line)
378 if not match: 378 if not match:
379 return None 379 return None
380 380
381 status = int(match.group(6)) 381 status = int(match.group(6))
382 if status != 200: 382 if status not in (200, 301, 302):
383 return None 383 return None
384 384
385 info = { 385 info = {
386 "status": status,
386 "size": int(match.group(7)), 387 "size": int(match.group(7)),
387 } 388 }
388 389
389 info["ip"], info["country"] = process_ip(match.group(1), geo, geov6) 390 info["ip"], info["country"] = process_ip(match.group(1), geo, geov6)
390 info["time"], info["month"], info["day"], info["weekday"], info["hour"] = pars e_time(match.group(2), int(match.group(3)), int(match.group(4))) 391 info["time"], info["month"], info["day"], info["weekday"], info["hour"] = pars e_time(match.group(2), int(match.group(3)), int(match.group(4)))
391 info["file"], info["query"] = parse_path(match.group(5)) 392 info["file"], info["query"] = parse_path(match.group(5))
392 info["referrer"] = match.group(8) 393 info["referrer"] = match.group(8)
393 info["ua"], info["uaversion"] = parse_ua(match.group(9)) 394 info["ua"], info["uaversion"] = parse_ua(match.group(9))
394 info["fullua"] = "%s %s" % (info["ua"], info["uaversion"]) 395 info["fullua"] = "%s %s" % (info["ua"], info["uaversion"])
395 info["clientid"] = match.group(10) 396 info["clientid"] = match.group(10)
(...skipping 150 matching lines...) Expand 10 before | Expand all | Expand 10 after
546 parser.add_argument("mirror_name", nargs="?", help="Name of the mirror server that the file belongs to") 547 parser.add_argument("mirror_name", nargs="?", help="Name of the mirror server that the file belongs to")
547 parser.add_argument("server_type", nargs="?", help="Server type like download, update or subscription") 548 parser.add_argument("server_type", nargs="?", help="Server type like download, update or subscription")
548 parser.add_argument("log_file", nargs="?", help="Log file path, can be a local file path, http:// or ssh:// URL") 549 parser.add_argument("log_file", nargs="?", help="Log file path, can be a local file path, http:// or ssh:// URL")
549 args = parser.parse_args() 550 args = parser.parse_args()
550 551
551 if args.mirror_name and args.server_type and args.log_file: 552 if args.mirror_name and args.server_type and args.log_file:
552 sources = [(args.mirror_name, args.server_type, args.log_file)] 553 sources = [(args.mirror_name, args.server_type, args.log_file)]
553 else: 554 else:
554 sources = get_stats_files() 555 sources = get_stats_files()
555 parse_sources(sources, args.factor, args.verbose) 556 parse_sources(sources, args.factor, args.verbose)
OLDNEW
« no previous file with comments | « no previous file | sitescripts/stats/common.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld