Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Side by Side Diff: sitescripts/stats/bin/logprocessor.py

Issue 4635035198029824: Issue 1390 - Record referrer for download stats (Closed)
Patch Set: Removed Unicode hack Created Sept. 22, 2014, 10:38 a.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | sitescripts/stats/common.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 # coding: utf-8 1 # coding: utf-8
2 2
3 # This file is part of the Adblock Plus web scripts, 3 # This file is part of the Adblock Plus web scripts,
4 # Copyright (C) 2006-2014 Eyeo GmbH 4 # Copyright (C) 2006-2014 Eyeo GmbH
5 # 5 #
6 # Adblock Plus is free software: you can redistribute it and/or modify 6 # Adblock Plus is free software: you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License version 3 as 7 # it under the terms of the GNU General Public License version 3 as
8 # published by the Free Software Foundation. 8 # published by the Free Software Foundation.
9 # 9 #
10 # Adblock Plus is distributed in the hope that it will be useful, 10 # Adblock Plus is distributed in the hope that it will be useful,
(...skipping 354 matching lines...) Expand 10 before | Expand all | Expand 10 after
365 applicationVersion = re.sub(r"^(\d+\.\d+).*", r"\1", applicationVersion) 365 applicationVersion = re.sub(r"^(\d+\.\d+).*", r"\1", applicationVersion)
366 366
367 return version, application, applicationVersion 367 return version, application, applicationVersion
368 368
369 def parse_update_flag(query): 369 def parse_update_flag(query):
370 return "update" if query == "update" else "install" 370 return "update" if query == "update" else "install"
371 371
372 def parse_record(line, ignored, geo, geov6): 372 def parse_record(line, ignored, geo, geov6):
373 global log_regexp 373 global log_regexp
374 if log_regexp == None: 374 if log_regexp == None:
375 log_regexp = re.compile(r'(\S+) \S+ \S+ \[([^]\s]+) ([+\-]\d\d)(\d\d)\] "GET ([^"\s]+) [^"]+" (\d+) (\d+) "[^"]*" "([^"]*)"(?: "[^"]*" \S+ "[^"]*" "[^"]*" " ([^"]*)")?') 375 log_regexp = re.compile(r'(\S+) \S+ \S+ \[([^]\s]+) ([+\-]\d\d)(\d\d)\] "GET ([^"\s]+) [^"]+" (\d+) (\d+) "([^"]*)" "([^"]*)"(?: "[^"]*" \S+ "[^"]*" "[^"]*" "([^"]*)")?')
376 376
377 match = re.search(log_regexp, line) 377 match = re.search(log_regexp, line)
378 if not match: 378 if not match:
379 return None 379 return None
380 380
381 status = int(match.group(6)) 381 status = int(match.group(6))
382 if status != 200: 382 if status != 200:
383 return None 383 return None
384 384
385 info = { 385 info = {
386 "size": int(match.group(7)), 386 "size": int(match.group(7)),
387 } 387 }
388 388
389 info["ip"], info["country"] = process_ip(match.group(1), geo, geov6) 389 info["ip"], info["country"] = process_ip(match.group(1), geo, geov6)
390 info["time"], info["month"], info["day"], info["weekday"], info["hour"] = pars e_time(match.group(2), int(match.group(3)), int(match.group(4))) 390 info["time"], info["month"], info["day"], info["weekday"], info["hour"] = pars e_time(match.group(2), int(match.group(3)), int(match.group(4)))
391 info["file"], info["query"] = parse_path(match.group(5)) 391 info["file"], info["query"] = parse_path(match.group(5))
392 info["ua"], info["uaversion"] = parse_ua(match.group(8)) 392 info["referrer"] = match.group(8)
393 info["ua"], info["uaversion"] = parse_ua(match.group(9))
393 info["fullua"] = "%s %s" % (info["ua"], info["uaversion"]) 394 info["fullua"] = "%s %s" % (info["ua"], info["uaversion"])
394 info["clientid"] = match.group(9) 395 info["clientid"] = match.group(10)
395 396
396 # Additional metadata depends on file type 397 # Additional metadata depends on file type
397 filename = os.path.basename(info["file"]) 398 filename = os.path.basename(info["file"])
398 ext = os.path.splitext(filename)[1] 399 ext = os.path.splitext(filename)[1]
399 if ext == ".txt" or filename == "update.json" or filename == "notification.jso n": 400 if ext == ".txt" or filename == "update.json" or filename == "notification.jso n":
400 # Subscription downloads, libadblockplus update checks and notification 401 # Subscription downloads, libadblockplus update checks and notification
401 # checks are performed by the downloader 402 # checks are performed by the downloader
402 parse_downloader_query(info) 403 parse_downloader_query(info)
403 elif ext == ".tpl": 404 elif ext == ".tpl":
404 # MSIE TPL download, no additional data here 405 # MSIE TPL download, no additional data here
(...skipping 140 matching lines...) Expand 10 before | Expand all | Expand 10 after
545 parser.add_argument("mirror_name", nargs="?", help="Name of the mirror server that the file belongs to") 546 parser.add_argument("mirror_name", nargs="?", help="Name of the mirror server that the file belongs to")
546 parser.add_argument("server_type", nargs="?", help="Server type like download, update or subscription") 547 parser.add_argument("server_type", nargs="?", help="Server type like download, update or subscription")
547 parser.add_argument("log_file", nargs="?", help="Log file path, can be a local file path, http:// or ssh:// URL") 548 parser.add_argument("log_file", nargs="?", help="Log file path, can be a local file path, http:// or ssh:// URL")
548 args = parser.parse_args() 549 args = parser.parse_args()
549 550
550 if args.mirror_name and args.server_type and args.log_file: 551 if args.mirror_name and args.server_type and args.log_file:
551 sources = [(args.mirror_name, args.server_type, args.log_file)] 552 sources = [(args.mirror_name, args.server_type, args.log_file)]
552 else: 553 else:
553 sources = get_stats_files() 554 sources = get_stats_files()
554 parse_sources(sources, args.factor, args.verbose) 555 parse_sources(sources, args.factor, args.verbose)
OLDNEW
« no previous file with comments | « no previous file | sitescripts/stats/common.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld