| Index: sitescripts/stats/bin/logprocessor.py |
| =================================================================== |
| --- a/sitescripts/stats/bin/logprocessor.py |
| +++ b/sitescripts/stats/bin/logprocessor.py |
| @@ -367,36 +367,37 @@ def parse_chrome_query(query): |
| return version, application, applicationVersion |
| def parse_update_flag(query): |
| return "update" if query == "update" else "install" |
| def parse_record(line, ignored, geo, geov6): |
| global log_regexp |
| if log_regexp == None: |
| - log_regexp = re.compile(r'(\S+) \S+ \S+ \[([^]\s]+) ([+\-]\d\d)(\d\d)\] "GET ([^"\s]+) [^"]+" (\d+) (\d+) "[^"]*" "([^"]*)"(?: "[^"]*" \S+ "[^"]*" "[^"]*" "([^"]*)")?') |
| + log_regexp = re.compile(r'(\S+) \S+ \S+ \[([^]\s]+) ([+\-]\d\d)(\d\d)\] "GET ([^"\s]+) [^"]+" (\d+) (\d+) "([^"]*)" "([^"]*)"(?: "[^"]*" \S+ "[^"]*" "[^"]*" "([^"]*)")?') |
| match = re.search(log_regexp, line) |
| if not match: |
| return None |
| status = int(match.group(6)) |
| if status != 200: |
| return None |
| info = { |
| "size": int(match.group(7)), |
| } |
| info["ip"], info["country"] = process_ip(match.group(1), geo, geov6) |
| info["time"], info["month"], info["day"], info["weekday"], info["hour"] = parse_time(match.group(2), int(match.group(3)), int(match.group(4))) |
| info["file"], info["query"] = parse_path(match.group(5)) |
| - info["ua"], info["uaversion"] = parse_ua(match.group(8)) |
| + info["referrer"] = match.group(8) |
| + info["ua"], info["uaversion"] = parse_ua(match.group(9)) |
| info["fullua"] = "%s %s" % (info["ua"], info["uaversion"]) |
| - info["clientid"] = match.group(9) |
| + info["clientid"] = match.group(10) |
| # Additional metadata depends on file type |
| filename = os.path.basename(info["file"]) |
| ext = os.path.splitext(filename)[1] |
| if ext == ".txt" or filename == "update.json" or filename == "notification.json": |
| # Subscription downloads, libadblockplus update checks and notification |
| # checks are performed by the downloader |
| parse_downloader_query(info) |