| Index: sitescripts/stats/bin/logprocessor.py |
| =================================================================== |
| --- a/sitescripts/stats/bin/logprocessor.py |
| +++ b/sitescripts/stats/bin/logprocessor.py |
| @@ -47,28 +47,28 @@ |
| self._processes = [] |
| parseresult = urlparse.urlparse(path) |
| - if parseresult.scheme == "ssh" and parseresult.username and parseresult.hostname and parseresult.path: |
| + if parseresult.scheme == 'ssh' and parseresult.username and parseresult.hostname and parseresult.path: |
| command = [ |
| - "ssh", "-q", "-o", "NumberOfPasswordPrompts 0", "-T", "-k", |
| - "-l", parseresult.username, |
| + 'ssh', '-q', '-o', 'NumberOfPasswordPrompts 0', '-T', '-k', |
| + '-l', parseresult.username, |
| parseresult.hostname, |
| - parseresult.path.lstrip("/") |
| + parseresult.path.lstrip('/') |
| ] |
| if parseresult.port: |
| - command[1:1] = ["-P", str(parseresult.port)] |
| + command[1:1] = ['-P', str(parseresult.port)] |
| ssh_process = subprocess.Popen(command, stdout=subprocess.PIPE) |
| self._processes.append(ssh_process) |
| self._file = ssh_process.stdout |
| - elif parseresult.scheme in ("http", "https"): |
| + elif parseresult.scheme in ('http', 'https'): |
| self._file = urllib.urlopen(path) |
| elif os.path.exists(path): |
| - self._file = open(path, "rb") |
| + self._file = open(path, 'rb') |
| else: |
| raise IOError("Path '%s' not recognized" % path) |
| - if path.endswith(".gz"): |
| + if path.endswith('.gz'): |
| # Built-in gzip module doesn't support streaming (fixed in Python 3.2) |
| - gzip_process = subprocess.Popen(["gzip", "-cd"], stdin=self._file, stdout=subprocess.PIPE) |
| + gzip_process = subprocess.Popen(['gzip', '-cd'], stdin=self._file, stdout=subprocess.PIPE) |
| self._processes.append(gzip_process) |
| self._file, self._inner_file = gzip_process.stdout, self._file |
| @@ -86,12 +86,12 @@ |
| def get_stats_files(): |
| config = get_config() |
| - prefix = "mirror_" |
| - options = filter(lambda o: o.startswith(prefix), config.options("stats")) |
| + prefix = 'mirror_' |
| + options = filter(lambda o: o.startswith(prefix), config.options('stats')) |
| for option in options: |
| - if config.has_option("stats", option): |
| - value = config.get("stats", option) |
| - if " " in value: |
| + if config.has_option('stats', option): |
| + value = config.get('stats', option) |
| + if ' ' in value: |
| yield [option[len(prefix):]] + value.split(None, 1) |
| else: |
| print >>sys.stderr, "Option '%s' has invalid value: '%s'" % (option, value) |
| @@ -133,125 +133,125 @@ |
| Decorator that memoizes the last return value of a function in case it is |
| called again with the same parameters. |
| """ |
| - result = {"args": None, "result": None} |
| + result = {'args': None, 'result': None} |
| def wrapped(*args): |
| - if args != result["args"]: |
| - result["result"] = func(*args) |
| - result["args"] = args |
| - return result["result"] |
| + if args != result['args']: |
| + result['result'] = func(*args) |
| + result['args'] = args |
| + return result['result'] |
| return wrapped |
| @cache_lru |
| def parse_ua(ua): |
| # Opera might disguise itself as other browser so it needs to go first |
| - match = re.search(r"\bOpera/([\d\.]+)", ua) |
| + match = re.search(r'\bOpera/([\d\.]+)', ua) |
| if match: |
| # Opera 10+ declares itself as Opera 9.80 but adds Version/1x.x to the UA |
| - match2 = re.search(r"\bVersion/([\d\.]+)", ua) |
| + match2 = re.search(r'\bVersion/([\d\.]+)', ua) |
| if match2: |
| - return "Opera", match2.group(1) |
| + return 'Opera', match2.group(1) |
| else: |
| - return "Opera", match.group(1) |
| + return 'Opera', match.group(1) |
| # Opera 15+ has the same UA as Chrome but adds OPR/1x.x to it |
| - match = re.search(r"\bOPR/(\d+\.\d+)", ua) |
| + match = re.search(r'\bOPR/(\d+\.\d+)', ua) |
| if match: |
| - return "Opera", match.group(1) |
| + return 'Opera', match.group(1) |
| # Have to check for these before Firefox, they will usually have a Firefox identifier as well |
| - match = re.search(r"\b(Fennec|Thunderbird|SeaMonkey|Songbird|K-Meleon|Prism)/(\d+\.\d+)", ua) |
| + match = re.search(r'\b(Fennec|Thunderbird|SeaMonkey|Songbird|K-Meleon|Prism)/(\d+\.\d+)', ua) |
| if match: |
| - if match.group(1) == "Fennec": |
| - return "Firefox Mobile", match.group(2) |
| + if match.group(1) == 'Fennec': |
| + return 'Firefox Mobile', match.group(2) |
| else: |
| return match.group(1), match.group(2) |
| - match = re.search(r"\bFirefox/(\d+\.\d+)", ua) |
| + match = re.search(r'\bFirefox/(\d+\.\d+)', ua) |
| if match: |
| - if re.search(r"\bMobile;", ua): |
| - return "Firefox Mobile", match.group(1) |
| - elif re.search(r"\bTablet;", ua): |
| - return "Firefox Tablet", match.group(1) |
| + if re.search(r'\bMobile;', ua): |
| + return 'Firefox Mobile', match.group(1) |
| + elif re.search(r'\bTablet;', ua): |
| + return 'Firefox Tablet', match.group(1) |
| else: |
| - return "Firefox", match.group(1) |
| + return 'Firefox', match.group(1) |
| - match = re.search(r"\brv:(\d+)\.(\d+)(?:\.(\d+))?", ua) |
| - if match and re.search(r"\bGecko/", ua): |
| + match = re.search(r'\brv:(\d+)\.(\d+)(?:\.(\d+))?', ua) |
| + if match and re.search(r'\bGecko/', ua): |
| if match.group(3) and int(match.group(1)) < 2: |
| - return "Gecko", "%s.%s.%s" % (match.group(1), match.group(2), match.group(3)) |
| + return 'Gecko', '%s.%s.%s' % (match.group(1), match.group(2), match.group(3)) |
| else: |
| - return "Gecko", "%s.%s" % (match.group(1), match.group(2)) |
| + return 'Gecko', '%s.%s' % (match.group(1), match.group(2)) |
| - match = re.search(r"\bCoolNovo/(\d+\.\d+\.\d+)", ua) |
| + match = re.search(r'\bCoolNovo/(\d+\.\d+\.\d+)', ua) |
| if match: |
| - return "CoolNovo", match.group(1) |
| + return 'CoolNovo', match.group(1) |
| - match = re.search(r"\bEdge/(\d+)\.\d+", ua) |
| + match = re.search(r'\bEdge/(\d+)\.\d+', ua) |
| if match: |
| - return "Edge", match.group(1) |
| + return 'Edge', match.group(1) |
| - match = re.search(r"\bChrome/(\d+\.\d+)", ua) |
| + match = re.search(r'\bChrome/(\d+\.\d+)', ua) |
| if match: |
| - return "Chrome", match.group(1) |
| + return 'Chrome', match.group(1) |
| - match = re.search(r"\bVersion/(\d+\.\d+)", ua) |
| - if match and re.search(r"\bMobile Safari/", ua): |
| - return "Mobile Safari", match.group(1) |
| - if match and re.search(r"\bSafari/", ua): |
| - return "Safari", match.group(1) |
| + match = re.search(r'\bVersion/(\d+\.\d+)', ua) |
| + if match and re.search(r'\bMobile Safari/', ua): |
| + return 'Mobile Safari', match.group(1) |
| + if match and re.search(r'\bSafari/', ua): |
| + return 'Safari', match.group(1) |
| - if re.search(r"\bAppleWebKit/", ua): |
| - return "WebKit", "" |
| + if re.search(r'\bAppleWebKit/', ua): |
| + return 'WebKit', '' |
| - match = re.search(r"\bMSIE (\d+\.\d+)", ua) |
| + match = re.search(r'\bMSIE (\d+\.\d+)', ua) |
| if match: |
| - return "MSIE", match.group(1) |
| + return 'MSIE', match.group(1) |
| - match = re.search(r"\bTrident/(\d+\.\d+)", ua) |
| + match = re.search(r'\bTrident/(\d+\.\d+)', ua) |
| if match: |
| - match2 = re.search(r"\brv:(\d+\.\d+)", ua) |
| + match2 = re.search(r'\brv:(\d+\.\d+)', ua) |
| if match2: |
| - return "MSIE", match2.group(1) |
| + return 'MSIE', match2.group(1) |
| else: |
| - return "Trident", match.group(1) |
| + return 'Trident', match.group(1) |
| - match = re.search(r"\bAndroidDownloadManager(?:/(\d+\.\d+))?", ua) |
| + match = re.search(r'\bAndroidDownloadManager(?:/(\d+\.\d+))?', ua) |
| if match: |
| - return "Android", match.group(1) or "" |
| + return 'Android', match.group(1) or '' |
| - match = re.search(r"\bDalvik/.*\bAndroid (\d+\.\d+)", ua) |
| + match = re.search(r'\bDalvik/.*\bAndroid (\d+\.\d+)', ua) |
| if match: |
| - return "Android", match.group(1) |
| + return 'Android', match.group(1) |
| # ABP/Android downloads use that user agent |
| - if ua.startswith("Apache-HttpClient/UNAVAILABLE"): |
| - return "Android", "" |
| + if ua.startswith('Apache-HttpClient/UNAVAILABLE'): |
| + return 'Android', '' |
| # ABP/IE downloads use that user agent |
| - if ua == "Adblock Plus": |
| - return "ABP", "" |
| + if ua == 'Adblock Plus': |
| + return 'ABP', '' |
| - return "Other", "" |
| + return 'Other', '' |
| def process_ip(ip, geo, geov6): |
| - match = re.search(r"^::ffff:(\d+\.\d+\.\d+\.\d+)$", ip) |
| + match = re.search(r'^::ffff:(\d+\.\d+\.\d+\.\d+)$', ip) |
| if match: |
| ip = match.group(1) |
| try: |
| - if ":" in ip: |
| + if ':' in ip: |
| country = geov6.country_code_by_addr(ip) |
| else: |
| country = geo.country_code_by_addr(ip) |
| except: |
| traceback.print_exc() |
| - country = "" |
| + country = '' |
| - if country in (None, "", "--"): |
| - country = "unknown" |
| + if country in (None, '', '--'): |
| + country = 'unknown' |
| country = country.lower() |
| return ip, country |
| @@ -259,16 +259,16 @@ |
| @cache_last |
| def parse_time(timestr, tz_hours, tz_minutes): |
| - result = datetime.strptime(timestr, "%d/%b/%Y:%H:%M:%S") |
| + result = datetime.strptime(timestr, '%d/%b/%Y:%H:%M:%S') |
| result -= timedelta(hours=tz_hours, minutes=math.copysign(tz_minutes, tz_hours)) |
| - return result, result.strftime("%Y%m"), result.day, result.weekday(), result.hour |
| + return result, result.strftime('%Y%m'), result.day, result.weekday(), result.hour |
| @cache_lru |
| def parse_path(path): |
| urlparts = urlparse.urlparse(path) |
| try: |
| - path = urllib.unquote(urlparts.path).decode("utf-8") |
| + path = urllib.unquote(urlparts.path).decode('utf-8') |
| except: |
| path = urlparts.path |
| return path[1:], urlparts.query |
| @@ -283,7 +283,7 @@ |
| def parse_lastversion(last_version): |
| if '-' in last_version: |
| last_version = last_version.split('-', 1)[0] |
| - return datetime.strptime(last_version, "%Y%m%d%H%M") |
| + return datetime.strptime(last_version, '%Y%m%d%H%M') |
| @cache_lru |
| @@ -292,77 +292,77 @@ |
| def parse_downloader_query(info): |
| - params = parse_query(info["query"]) |
| - for param in ("addonName", "addonVersion", "application", "applicationVersion", "platform", "platformVersion"): |
| - info[param] = params.get(param, ["unknown"])[0] |
| + params = parse_query(info['query']) |
| + for param in ('addonName', 'addonVersion', 'application', 'applicationVersion', 'platform', 'platformVersion'): |
| + info[param] = params.get(param, ['unknown'])[0] |
| # Only leave the major and minor release number for application and platform |
| - info["applicationVersion"] = re.sub(r"^(\d+\.\d+).*", r"\1", info["applicationVersion"]) |
| - info["platformVersion"] = re.sub(r"^(\d+\.\d+).*", r"\1", info["platformVersion"]) |
| + info['applicationVersion'] = re.sub(r'^(\d+\.\d+).*', r'\1', info['applicationVersion']) |
| + info['platformVersion'] = re.sub(r'^(\d+\.\d+).*', r'\1', info['platformVersion']) |
| # Chrome Adblock sends an X-Client-ID header insteads of URL parameters |
| - match = re.match(r"^adblock/([\d\.]+)$", info["clientid"], re.I) if info["clientid"] else None |
| + match = re.match(r'^adblock/([\d\.]+)$', info['clientid'], re.I) if info['clientid'] else None |
| if match: |
| - info["addonName"] = "chromeadblock" |
| - info["addonVersion"] = match.group(1) |
| + info['addonName'] = 'chromeadblock' |
| + info['addonVersion'] = match.group(1) |
| - last_version = params.get("lastVersion", ["unknown"])[0] |
| - if info["file"] == "notification.json" and last_version == "0" and ( |
| - (info["addonName"] == "adblockplus" and info["addonVersion"] == "2.3.1") or |
| - (info["addonName"] in ("adblockpluschrome", "adblockplusopera") and info["addonVersion"] == "1.5.2") |
| + last_version = params.get('lastVersion', ['unknown'])[0] |
| + if info['file'] == 'notification.json' and last_version == '0' and ( |
| + (info['addonName'] == 'adblockplus' and info['addonVersion'] == '2.3.1') or |
| + (info['addonName'] in ('adblockpluschrome', 'adblockplusopera') and info['addonVersion'] == '1.5.2') |
| ): |
| # Broken notification version number in these releases, treat like unknown |
| - last_version = "unknown" |
| + last_version = 'unknown' |
| - if last_version == "unknown": |
| - info["downloadInterval"] = "unknown" |
| - info["previousDownload"] = "unknown" |
| - elif last_version == "0": |
| - info["downloadInterval"] = "unknown" |
| - info["previousDownload"] = "unknown" |
| - info["firstDownload"] = True |
| + if last_version == 'unknown': |
| + info['downloadInterval'] = 'unknown' |
| + info['previousDownload'] = 'unknown' |
| + elif last_version == '0': |
| + info['downloadInterval'] = 'unknown' |
| + info['previousDownload'] = 'unknown' |
| + info['firstDownload'] = True |
| else: |
| try: |
| last_update = parse_lastversion(last_version) |
| - diff = info["time"] - last_update |
| + diff = info['time'] - last_update |
| if diff.days >= 365: |
| - info["downloadInterval"] = "%i year(s)" % (diff.days / 365) |
| + info['downloadInterval'] = '%i year(s)' % (diff.days / 365) |
| elif diff.days >= 30: |
| - info["downloadInterval"] = "%i month(s)" % (diff.days / 30) |
| + info['downloadInterval'] = '%i month(s)' % (diff.days / 30) |
| elif diff.days >= 1: |
| - info["downloadInterval"] = "%i day(s)" % diff.days |
| + info['downloadInterval'] = '%i day(s)' % diff.days |
| else: |
| - info["downloadInterval"] = "%i hour(s)" % (diff.seconds / 3600) |
| + info['downloadInterval'] = '%i hour(s)' % (diff.seconds / 3600) |
| - if info["addonName"].startswith("adblockplus"): |
| - diffdays = (info["time"].date() - last_update.date()).days |
| + if info['addonName'].startswith('adblockplus'): |
| + diffdays = (info['time'].date() - last_update.date()).days |
| if diffdays == 0: |
| - info["previousDownload"] = "same day" |
| + info['previousDownload'] = 'same day' |
| elif diffdays < 30: |
| - info["previousDownload"] = "%i day(s)" % diffdays |
| + info['previousDownload'] = '%i day(s)' % diffdays |
| elif diffdays < 365: |
| - info["previousDownload"] = "%i month(s)" % (diffdays / 30) |
| + info['previousDownload'] = '%i month(s)' % (diffdays / 30) |
| else: |
| - info["previousDownload"] = "%i year(s)" % (diffdays / 365) |
| + info['previousDownload'] = '%i year(s)' % (diffdays / 365) |
| else: |
| - info["previousDownload"] = "unknown" |
| + info['previousDownload'] = 'unknown' |
| - if last_update.year != info["time"].year or last_update.month != info["time"].month: |
| - info["firstInMonth"] = info["firstInDay"] = True |
| - elif last_update.day != info["time"].day: |
| - info["firstInDay"] = True |
| + if last_update.year != info['time'].year or last_update.month != info['time'].month: |
| + info['firstInMonth'] = info['firstInDay'] = True |
| + elif last_update.day != info['time'].day: |
| + info['firstInDay'] = True |
| - if get_week(last_update) != get_week(info["time"]): |
| - info["firstInWeek"] = True |
| + if get_week(last_update) != get_week(info['time']): |
| + info['firstInWeek'] = True |
| except ValueError: |
| - info["downloadInterval"] = "unknown" |
| - info["previousDownload"] = "unknown" |
| + info['downloadInterval'] = 'unknown' |
| + info['previousDownload'] = 'unknown' |
| pass |
| def parse_addon_name(file): |
| - if "/" in file: |
| - return file.split("/")[-2] |
| + if '/' in file: |
| + return file.split('/')[-2] |
| else: |
| return None |
| @@ -370,19 +370,19 @@ |
| def parse_gecko_query(query): |
| params = urlparse.parse_qs(query) |
| - version = params.get("version", ["unknown"])[0] |
| + version = params.get('version', ['unknown'])[0] |
| global gecko_apps |
| if gecko_apps == None: |
| from buildtools.packagerGecko import KNOWN_APPS |
| gecko_apps = {v: k for k, v in KNOWN_APPS.iteritems()} |
| - appID = params.get("appID", ["unknown"])[0] |
| + appID = params.get('appID', ['unknown'])[0] |
| - application = gecko_apps.get(appID, "unknown") |
| - applicationVersion = params.get("appVersion", ["unknown"])[0] |
| + application = gecko_apps.get(appID, 'unknown') |
| + applicationVersion = params.get('appVersion', ['unknown'])[0] |
| # Only leave the major and minor release number for application |
| - applicationVersion = re.sub(r"^(\d+\.\d+).*", r"\1", applicationVersion) |
| + applicationVersion = re.sub(r'^(\d+\.\d+).*', r'\1', applicationVersion) |
| return version, application, applicationVersion |
| @@ -390,23 +390,23 @@ |
| def parse_chrome_query(query): |
| params = urlparse.parse_qs(query) |
| - if params.get("prod", ["unknown"])[0] in ("chromecrx", "chromiumcrx"): |
| - application = "chrome" |
| + if params.get('prod', ['unknown'])[0] in ('chromecrx', 'chromiumcrx'): |
| + application = 'chrome' |
| else: |
| - application = "unknown" |
| - applicationVersion = params.get("prodversion", ["unknown"])[0] |
| + application = 'unknown' |
| + applicationVersion = params.get('prodversion', ['unknown'])[0] |
| - params2 = urlparse.parse_qs(params.get("x", [""])[0]) |
| - version = params2.get("v", ["unknown"])[0] |
| + params2 = urlparse.parse_qs(params.get('x', [''])[0]) |
| + version = params2.get('v', ['unknown'])[0] |
| # Only leave the major and minor release number for application |
| - applicationVersion = re.sub(r"^(\d+\.\d+).*", r"\1", applicationVersion) |
| + applicationVersion = re.sub(r'^(\d+\.\d+).*', r'\1', applicationVersion) |
| return version, application, applicationVersion |
| def parse_update_flag(query): |
| - return "update" if query == "update" else "install" |
| + return 'update' if query == 'update' else 'install' |
| def parse_record(line, ignored, geo, geov6): |
| @@ -423,62 +423,62 @@ |
| return None |
| info = { |
| - "status": status, |
| - "size": int(match.group(7)), |
| + 'status': status, |
| + 'size': int(match.group(7)), |
| } |
| - info["ip"], info["country"] = process_ip(match.group(1), geo, geov6) |
| - info["time"], info["month"], info["day"], info["weekday"], info["hour"] = parse_time(match.group(2), int(match.group(3)), int(match.group(4))) |
| - info["file"], info["query"] = parse_path(match.group(5)) |
| - info["referrer"] = match.group(8) |
| - info["ua"], info["uaversion"] = parse_ua(match.group(9)) |
| - info["fullua"] = "%s %s" % (info["ua"], info["uaversion"]) |
| - info["clientid"] = match.group(10) |
| + info['ip'], info['country'] = process_ip(match.group(1), geo, geov6) |
| + info['time'], info['month'], info['day'], info['weekday'], info['hour'] = parse_time(match.group(2), int(match.group(3)), int(match.group(4))) |
| + info['file'], info['query'] = parse_path(match.group(5)) |
| + info['referrer'] = match.group(8) |
| + info['ua'], info['uaversion'] = parse_ua(match.group(9)) |
| + info['fullua'] = '%s %s' % (info['ua'], info['uaversion']) |
| + info['clientid'] = match.group(10) |
| # Additional metadata depends on file type |
| - filename = os.path.basename(info["file"]) |
| + filename = os.path.basename(info['file']) |
| ext = os.path.splitext(filename)[1] |
| - if ext == ".txt" or filename == "update.json" or filename == "notification.json": |
| + if ext == '.txt' or filename == 'update.json' or filename == 'notification.json': |
| # Subscription downloads, libadblockplus update checks and notification |
| # checks are performed by the downloader |
| parse_downloader_query(info) |
| - elif ext == ".tpl": |
| + elif ext == '.tpl': |
| # MSIE TPL download, no additional data here |
| pass |
| - elif ext in (".xpi", ".crx", ".apk", ".msi", ".exe", ".safariextz"): |
| + elif ext in ('.xpi', '.crx', '.apk', '.msi', '.exe', '.safariextz'): |
| # Package download, might be an update |
| - info["installType"] = parse_update_flag(info["query"]) |
| - elif filename == "update.rdf": |
| + info['installType'] = parse_update_flag(info['query']) |
| + elif filename == 'update.rdf': |
| # Gecko update check or a legacy Android update check. The latter doesn't |
| # have usable data anyway so trying the Chrome route won't do any harm. |
| - info["addonName"] = parse_addon_name(info["file"]) |
| - info["addonVersion"], info["application"], info["applicationVersion"] = parse_gecko_query(info["query"]) |
| - elif filename == "updates.xml": |
| + info['addonName'] = parse_addon_name(info['file']) |
| + info['addonVersion'], info['application'], info['applicationVersion'] = parse_gecko_query(info['query']) |
| + elif filename == 'updates.xml': |
| # Chrome update check |
| - info["addonName"] = parse_addon_name(info["file"]) |
| - info["addonVersion"], info["application"], info["applicationVersion"] = parse_chrome_query(info["query"]) |
| - elif filename == "updates.plist": |
| + info['addonName'] = parse_addon_name(info['file']) |
| + info['addonVersion'], info['application'], info['applicationVersion'] = parse_chrome_query(info['query']) |
| + elif filename == 'updates.plist': |
| # Safari update check, no additional data |
| pass |
| else: |
| - ignored.add(info["file"]) |
| + ignored.add(info['file']) |
| return None |
| - if "addonName" in info: |
| - info["fullAddon"] = "%s %s" % (info["addonName"], info["addonVersion"]) |
| - if "application" in info: |
| - info["fullApplication"] = "%s %s" % (info["application"], info["applicationVersion"]) |
| - if "platform" in info: |
| - info["fullPlatform"] = "%s %s" % (info["platform"], info["platformVersion"]) |
| + if 'addonName' in info: |
| + info['fullAddon'] = '%s %s' % (info['addonName'], info['addonVersion']) |
| + if 'application' in info: |
| + info['fullApplication'] = '%s %s' % (info['application'], info['applicationVersion']) |
| + if 'platform' in info: |
| + info['fullPlatform'] = '%s %s' % (info['platform'], info['platformVersion']) |
| return info |
| def add_record(info, section, ignore_fields=()): |
| - section["hits"] = section.get("hits", 0) + 1 |
| - section["bandwidth"] = section.get("bandwidth", 0) + info["size"] |
| + section['hits'] = section.get('hits', 0) + 1 |
| + section['bandwidth'] = section.get('bandwidth', 0) + info['size'] |
| if len(ignore_fields) < 2: |
| - for field in map(lambda f: f["name"], common.fields): |
| + for field in map(lambda f: f['name'], common.fields): |
| if field in ignore_fields or field not in info: |
| continue |
| @@ -498,14 +498,14 @@ |
| if info == None: |
| continue |
| - info["mirror"] = mirror_name |
| - if info["month"] not in data: |
| - data[info["month"]] = {} |
| - section = data[info["month"]] |
| + info['mirror'] = mirror_name |
| + if info['month'] not in data: |
| + data[info['month']] = {} |
| + section = data[info['month']] |
| - if info["file"] not in section: |
| - section[info["file"]] = {} |
| - section = section[info["file"]] |
| + if info['file'] not in section: |
| + section[info['file']] = {} |
| + section = section[info['file']] |
| add_record(info, section) |
| return data |
| @@ -516,7 +516,7 @@ |
| try: |
| key = unicode(key) |
| except UnicodeDecodeError: |
| - key = unicode(key, encoding="latin-1") |
| + key = unicode(key, encoding='latin-1') |
| if isinstance(value, numbers.Number): |
| object1[key] = object1.get(key, 0) + factor * value |
| else: |
| @@ -524,12 +524,12 @@ |
| def save_stats(server_type, data, factor=1): |
| - base_dir = os.path.join(get_config().get("stats", "dataDirectory"), common.filename_encode(server_type)) |
| + base_dir = os.path.join(get_config().get('stats', 'dataDirectory'), common.filename_encode(server_type)) |
| for month, month_data in data.iteritems(): |
| for name, file_data in month_data.iteritems(): |
| - path = os.path.join(base_dir, common.filename_encode(month), common.filename_encode(name + ".json")) |
| + path = os.path.join(base_dir, common.filename_encode(month), common.filename_encode(name + '.json')) |
| if os.path.exists(path): |
| - with codecs.open(path, "rb", encoding="utf-8") as fileobj: |
| + with codecs.open(path, 'rb', encoding='utf-8') as fileobj: |
| existing = json.load(fileobj) |
| else: |
| existing = {} |
| @@ -543,14 +543,14 @@ |
| if e.errno != errno.EEXIST: |
| raise |
| - with codecs.open(path, "wb", encoding="utf-8") as fileobj: |
| + with codecs.open(path, 'wb', encoding='utf-8') as fileobj: |
| json.dump(existing, fileobj, indent=2, sort_keys=True) |
| def parse_source(factor, lock, (mirror_name, server_type, log_file)): |
| try: |
| - geo = pygeoip.GeoIP(get_config().get("stats", "geoip_db"), pygeoip.MEMORY_CACHE) |
| - geov6 = pygeoip.GeoIP(get_config().get("stats", "geoipv6_db"), pygeoip.MEMORY_CACHE) |
| + geo = pygeoip.GeoIP(get_config().get('stats', 'geoip_db'), pygeoip.MEMORY_CACHE) |
| + geov6 = pygeoip.GeoIP(get_config().get('stats', 'geoipv6_db'), pygeoip.MEMORY_CACHE) |
| ignored = set() |
| fileobj = StatsFile(log_file) |
| @@ -578,21 +578,21 @@ |
| try: |
| for log_file, ignored in pool.imap_unordered(callback, sources, chunksize=1): |
| if verbose and ignored: |
| - print "Ignored files for %s" % log_file |
| - print "============================================================" |
| - print "\n".join(sorted(ignored)) |
| + print 'Ignored files for %s' % log_file |
| + print '============================================================' |
| + print '\n'.join(sorted(ignored)) |
| finally: |
| pool.close() |
| -if __name__ == "__main__": |
| +if __name__ == '__main__': |
| setupStderr() |
| - parser = argparse.ArgumentParser(description="Processes log files and merges them into the stats database") |
| - parser.add_argument("--verbose", dest="verbose", action="store_const", const=True, default=False, help="Verbose mode, ignored requests will be listed") |
| - parser.add_argument("--revert", dest="factor", action="store_const", const=-1, default=1, help="Remove log data from the database") |
| - parser.add_argument("mirror_name", nargs="?", help="Name of the mirror server that the file belongs to") |
| - parser.add_argument("server_type", nargs="?", help="Server type like download, update or subscription") |
| - parser.add_argument("log_file", nargs="?", help="Log file path, can be a local file path, http:// or ssh:// URL") |
| + parser = argparse.ArgumentParser(description='Processes log files and merges them into the stats database') |
| + parser.add_argument('--verbose', dest='verbose', action='store_const', const=True, default=False, help='Verbose mode, ignored requests will be listed') |
| + parser.add_argument('--revert', dest='factor', action='store_const', const=-1, default=1, help='Remove log data from the database') |
| + parser.add_argument('mirror_name', nargs='?', help='Name of the mirror server that the file belongs to') |
| + parser.add_argument('server_type', nargs='?', help='Server type like download, update or subscription') |
| + parser.add_argument('log_file', nargs='?', help='Log file path, can be a local file path, http:// or ssh:// URL') |
| args = parser.parse_args() |
| if args.mirror_name and args.server_type and args.log_file: |