| Left: | ||
| Right: |
| LEFT | RIGHT |
|---|---|
| 1 # coding: utf-8 | 1 # coding: utf-8 |
| 2 | 2 |
| 3 # This file is part of the Adblock Plus web scripts, | 3 # This file is part of the Adblock Plus web scripts, |
| 4 # Copyright (C) 2006-2013 Eyeo GmbH | 4 # Copyright (C) 2006-2013 Eyeo GmbH |
| 5 # | 5 # |
| 6 # Adblock Plus is free software: you can redistribute it and/or modify | 6 # Adblock Plus is free software: you can redistribute it and/or modify |
| 7 # it under the terms of the GNU General Public License version 3 as | 7 # it under the terms of the GNU General Public License version 3 as |
| 8 # published by the Free Software Foundation. | 8 # published by the Free Software Foundation. |
| 9 # | 9 # |
| 10 # Adblock Plus is distributed in the hope that it will be useful, | 10 # Adblock Plus is distributed in the hope that it will be useful, |
| 11 # but WITHOUT ANY WARRANTY; without even the implied warranty of | 11 # but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | 12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 13 # GNU General Public License for more details. | 13 # GNU General Public License for more details. |
| 14 # | 14 # |
| 15 # You should have received a copy of the GNU General Public License | 15 # You should have received a copy of the GNU General Public License |
| 16 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. | 16 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. |
| 17 | 17 |
| 18 import os, sys, codecs, re, math, GeoIP, urllib, urlparse, socket, simplejson | 18 import os, sys, codecs, re, math, GeoIP, urllib, urlparse, socket, simplejson |
| 19 from collections import OrderedDict | |
| 19 import sitescripts.stats.common as common | 20 import sitescripts.stats.common as common |
| 20 from sitescripts.utils import get_config, setupStderr | 21 from sitescripts.utils import get_config, setupStderr |
| 21 from datetime import datetime, timedelta | 22 from datetime import datetime, timedelta |
| 22 | 23 |
| 23 log_regexp = None | 24 log_regexp = None |
| 24 mirror_name = None | 25 mirror_name = None |
| 25 gecko_apps = None | 26 gecko_apps = None |
| 26 | 27 |
| 28 def cache_lru(func): | |
| 29 """ | |
| 30 Decorator that memoizes the return values of a single-parameter function in | |
| 31 case it is called again with the same parameter. The 1024 most recent | |
| 32 results are saved. | |
| 33 """ | |
| 34 | |
| 35 results = OrderedDict() | |
| 36 results.entries_left = 1024 | |
| 37 | |
| 38 def wrapped(arg): | |
| 39 if arg in results: | |
| 40 result = results[arg] | |
| 41 del results[arg] | |
| 42 else: | |
| 43 if results.entries_left > 0: | |
| 44 results.entries_left -= 1 | |
| 45 else: | |
| 46 results.popitem(last=False) | |
| 47 result = func(arg) | |
| 48 results[arg] = result | |
| 49 return result | |
| 50 return wrapped | |
| 51 | |
| 52 | |
| 53 def cache_last(func): | |
| 54 """ | |
| 55 Decorator that memoizes the last return value of a function in case it is | |
| 56 called again with the same parameters. | |
| 57 """ | |
| 58 result = {"args": None, "result": None} | |
| 59 | |
| 60 def wrapped(*args): | |
| 61 if args != result["args"]: | |
| 62 result["result"] = func(*args) | |
| 63 result["args"] = args | |
| 64 return result["result"] | |
| 65 return wrapped | |
| 66 | |
| 67 | |
| 68 @cache_lru | |
| 27 def parse_ua(ua): | 69 def parse_ua(ua): |
| 28 # Opera might disguise itself as other browser so it needs to go first | 70 # Opera might disguise itself as other browser so it needs to go first |
| 29 match = re.search(r"\bOpera/([\d\.]+)", ua) | 71 match = re.search(r"\bOpera/([\d\.]+)", ua) |
| 30 if match: | 72 if match: |
| 31 # Opera 10+ declares itself as Opera 9.80 but adds Version/1x.x to the UA | 73 # Opera 10+ declares itself as Opera 9.80 but adds Version/1x.x to the UA |
| 32 match2 = re.search(r"\bVersion/([\d\.]+)", ua) | 74 match2 = re.search(r"\bVersion/([\d\.]+)", ua) |
| 33 if match2: | 75 if match2: |
| 34 return "Opera", match2.group(1) | 76 return "Opera", match2.group(1) |
| 35 else: | 77 else: |
| 36 return "Opera", match.group(1) | 78 return "Opera", match.group(1) |
| 37 | 79 |
| 38 # Opera 15+ has the same UA as Chrome but adds OPR/1x.x to it | 80 # Opera 15+ has the same UA as Chrome but adds OPR/1x.x to it |
| 39 match = re.search(r"\bOPR/(\d+\.\d+)", ua) | 81 match = re.search(r"\bOPR/(\d+\.\d+)", ua) |
| 40 if match: | 82 if match: |
| 41 return "Opera", match.group(1) | 83 return "Opera", match.group(1) |
| 42 | 84 |
| 43 for appName in ("Fennec", "Thunderbird", "SeaMonkey", "Songbird", "K-Meleon", "Prism", "Firefox"): | 85 # Have to check for these before Firefox, they will usually have a Firefox ide ntifier as well |
| 44 match = re.search(r"\b%s/(\d+\.\d+)" % appName, ua) | 86 match = re.search(r"\b(Fennec|Thunderbird|SeaMonkey|Songbird|K-Meleon|Prism)/( \d+\.\d+)", ua) |
|
Sebastian Noack
2013/08/26 16:05:22
Instead of iterating over the list of browsers and
Wladimir Palant
2013/08/27 07:34:28
This was done like that intentionally - quite a fe
| |
| 45 if match: | 87 if match: |
| 46 if appName == "Fennec" or (appName == "Firefox" and re.search(r"\bMobile;" , ua)): | 88 if match.group(1) == "Fennec": |
| 47 return "Firefox Mobile", match.group(1) | 89 return "Firefox Mobile", match.group(2) |
| 48 else: | 90 else: |
| 49 return appName, match.group(1) | 91 return match.group(1), match.group(2) |
| 92 | |
| 93 match = re.search(r"\bFirefox/(\d+\.\d+)", ua) | |
| 94 if match: | |
| 95 if re.search(r"\bMobile;", ua): | |
| 96 return "Firefox Mobile", match.group(1) | |
| 97 else: | |
| 98 return "Firefox", match.group(1) | |
| 50 | 99 |
| 51 match = re.search(r"\brv:(\d+)\.(\d+)(?:\.(\d+))?", ua) | 100 match = re.search(r"\brv:(\d+)\.(\d+)(?:\.(\d+))?", ua) |
| 52 if match and re.search(r"\bGecko/", ua): | 101 if match and re.search(r"\bGecko/", ua): |
| 53 if match.group(3) and int(match.group(1)) < 2: | 102 if match.group(3) and int(match.group(1)) < 2: |
| 54 return "Gecko", "%s.%s.%s" % (match.group(1), match.group(2), match.group( 3)) | 103 return "Gecko", "%s.%s.%s" % (match.group(1), match.group(2), match.group( 3)) |
|
Sebastian Noack
2013/08/26 16:05:22
You could just just call match.groups(), which alr
Wladimir Palant
2013/08/27 07:34:28
I think I rather keep this as is for consistency w
| |
| 55 else: | 104 else: |
| 56 return "Gecko", "%s.%s" % (match.group(1), match.group(2)) | 105 return "Gecko", "%s.%s" % (match.group(1), match.group(2)) |
| 57 | 106 |
| 58 match = re.search(r"\bCoolNovo/(\d+\.\d+\.\d+)", ua) | 107 match = re.search(r"\bCoolNovo/(\d+\.\d+\.\d+)", ua) |
| 59 if match: | 108 if match: |
| 60 return "CoolNovo", match.group(1) | 109 return "CoolNovo", match.group(1) |
| 61 | 110 |
| 62 match = re.search(r"\bChrome/(\d+\.\d+)", ua) | 111 match = re.search(r"\bChrome/(\d+\.\d+)", ua) |
| 63 if match: | 112 if match: |
| 64 return "Chrome", match.group(1) | 113 return "Chrome", match.group(1) |
| (...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 103 if match: | 152 if match: |
| 104 ip = match.group(1) | 153 ip = match.group(1) |
| 105 | 154 |
| 106 country = geo.country_code_by_addr(ip) | 155 country = geo.country_code_by_addr(ip) |
| 107 if country in (None, "", "--"): | 156 if country in (None, "", "--"): |
| 108 country = "unknown" | 157 country = "unknown" |
| 109 country = country.lower() | 158 country = country.lower() |
| 110 | 159 |
| 111 return ip, country | 160 return ip, country |
| 112 | 161 |
| 162 @cache_last | |
| 113 def parse_time(timestr, tz_hours, tz_minutes): | 163 def parse_time(timestr, tz_hours, tz_minutes): |
| 114 result = datetime.strptime(timestr, "%d/%b/%Y:%H:%M:%S") | 164 result = datetime.strptime(timestr, "%d/%b/%Y:%H:%M:%S") |
| 115 result -= timedelta(hours = tz_hours, minutes = math.copysign(tz_minutes, tz_h ours)) | 165 result -= timedelta(hours = tz_hours, minutes = math.copysign(tz_minutes, tz_h ours)) |
| 116 return result, result.strftime("%Y%m"), result.day, result.weekday(), result.h our | 166 return result, result.strftime("%Y%m"), result.day, result.weekday(), result.h our |
| 117 | 167 |
| 168 @cache_lru | |
| 118 def parse_path(path): | 169 def parse_path(path): |
| 119 urlparts = urlparse.urlparse(path) | 170 urlparts = urlparse.urlparse(path) |
| 120 try: | 171 try: |
| 121 path = urllib.unquote(urlparts.path).decode("utf-8") | 172 path = urllib.unquote(urlparts.path).decode("utf-8") |
| 122 except: | 173 except: |
| 123 path = urlparts.path | 174 path = urlparts.path |
| 124 return path[1:], urlparts.query | 175 return path[1:], urlparts.query |
| 125 | 176 |
| 177 @cache_lru | |
| 178 def parse_query(query): | |
| 179 return urlparse.parse_qs(query) | |
| 180 | |
| 181 @cache_lru | |
| 182 def parse_lastversion(last_version): | |
| 183 return datetime.strptime(last_version, "%Y%m%d%H%M") | |
| 184 | |
| 185 @cache_lru | |
| 186 def get_week(date): | |
| 187 return date.isocalendar()[0:2] | |
| 188 | |
| 126 def parse_downloader_query(info): | 189 def parse_downloader_query(info): |
| 127 params = urlparse.parse_qs(info["query"]) | 190 params = parse_query(info["query"]) |
| 128 for param in ("addonName", "addonVersion", "application", "applicationVersion" , "platform", "platformVersion"): | 191 for param in ("addonName", "addonVersion", "application", "applicationVersion" , "platform", "platformVersion"): |
| 129 info[param] = params.get(param, ["unknown"])[0] | 192 info[param] = params.get(param, ["unknown"])[0] |
| 130 | 193 |
| 131 # Only leave the major and minor release number for application and platform | 194 # Only leave the major and minor release number for application and platform |
| 132 info["applicationVersion"] = re.sub(r"^(\d+\.\d+).*", r"\1", info["application Version"]) | 195 info["applicationVersion"] = re.sub(r"^(\d+\.\d+).*", r"\1", info["application Version"]) |
| 133 info["platformVersion"] = re.sub(r"^(\d+\.\d+).*", r"\1", info["platformVersio n"]) | 196 info["platformVersion"] = re.sub(r"^(\d+\.\d+).*", r"\1", info["platformVersio n"]) |
| 134 | 197 |
| 135 # Chrome Adblock sends an X-Client-ID header insteads of URL parameters | 198 # Chrome Adblock sends an X-Client-ID header insteads of URL parameters |
| 136 match = re.match(r"^adblock/([\d\.]+)$", info["clientid"], re.I) if info["clie ntid"] else None | 199 match = re.match(r"^adblock/([\d\.]+)$", info["clientid"], re.I) if info["clie ntid"] else None |
| 137 if match: | 200 if match: |
| 138 info["addonName"] = "chromeadblock" | 201 info["addonName"] = "chromeadblock" |
| 139 info["addonVersion"] = match.group(1) | 202 info["addonVersion"] = match.group(1) |
| 140 | 203 |
| 141 last_version = params.get("lastVersion", ["unknown"])[0] | 204 last_version = params.get("lastVersion", ["unknown"])[0] |
| 142 if info["file"] == "notification.json" and last_version == "0" and ( | 205 if info["file"] == "notification.json" and last_version == "0" and ( |
| 143 (info["addonName"] == "adblockplus" and info["addonVersion"] == "2.3.1") o r | 206 (info["addonName"] == "adblockplus" and info["addonVersion"] == "2.3.1") o r |
| 144 (info["addonName"] in ("adblockpluschrome", "adblockplusopera") and info[" addonVersion"] == "1.5.2") | 207 (info["addonName"] in ("adblockpluschrome", "adblockplusopera") and info[" addonVersion"] == "1.5.2") |
| 145 ): | 208 ): |
| 146 # Broken notification version number in these releases, treat like unknown | 209 # Broken notification version number in these releases, treat like unknown |
| 147 last_version = "unknown" | 210 last_version = "unknown" |
| 148 | 211 |
| 149 if last_version == "unknown": | 212 if last_version == "unknown": |
| 150 info["downloadInterval"] = "unknown" | 213 info["downloadInterval"] = "unknown" |
| 151 elif last_version == "0": | 214 elif last_version == "0": |
| 152 info["downloadInterval"] = "unknown" | 215 info["downloadInterval"] = "unknown" |
| 153 info["firstDownload"] = info["firstInMonth"] = info["firstInWeek"] = info["f irstInDay"] = True | 216 info["firstDownload"] = info["firstInMonth"] = info["firstInWeek"] = info["f irstInDay"] = True |
| 154 else: | 217 else: |
| 155 try: | 218 try: |
| 156 last_update = datetime.strptime(last_version, "%Y%m%d%H%M") | 219 last_update = parse_lastversion(last_version) |
| 157 diff = info["time"] - last_update | 220 diff = info["time"] - last_update |
| 158 if diff.days >= 365: | 221 if diff.days >= 365: |
| 159 info["downloadInterval"] = "%i year(s)" % (diff.days / 365) | 222 info["downloadInterval"] = "%i year(s)" % (diff.days / 365) |
| 160 elif diff.days >= 30: | 223 elif diff.days >= 30: |
| 161 info["downloadInterval"] = "%i month(s)" % (diff.days / 30) | 224 info["downloadInterval"] = "%i month(s)" % (diff.days / 30) |
| 162 elif diff.days >= 1: | 225 elif diff.days >= 1: |
| 163 info["downloadInterval"] = "%i day(s)" % diff.days | 226 info["downloadInterval"] = "%i day(s)" % diff.days |
| 164 else: | 227 else: |
| 165 info["downloadInterval"] = "%i hour(s)" % (diff.seconds / 3600) | 228 info["downloadInterval"] = "%i hour(s)" % (diff.seconds / 3600) |
| 166 | 229 |
| 167 if last_update.year != info["time"].year or last_update.month != info["tim e"].month: | 230 if last_update.year != info["time"].year or last_update.month != info["tim e"].month: |
| 168 info["firstInMonth"] = info["firstInDay"] = True | 231 info["firstInMonth"] = info["firstInDay"] = True |
| 169 elif last_update.day != info["time"].day: | 232 elif last_update.day != info["time"].day: |
| 170 info["firstInDay"] = True | 233 info["firstInDay"] = True |
| 171 | 234 |
| 172 if last_update.isocalendar()[0:2] != info["time"].isocalendar()[0:2]: | 235 if get_week(last_update) != get_week(info["time"]): |
| 173 info["firstInWeek"] = True | 236 info["firstInWeek"] = True |
| 174 except ValueError: | 237 except ValueError: |
| 175 info["downloadInterval"] = "unknown" | 238 info["downloadInterval"] = "unknown" |
| 176 pass | 239 pass |
| 177 | 240 |
| 178 def parse_addon_name(file): | 241 def parse_addon_name(file): |
| 179 if "/" in file: | 242 if "/" in file: |
| 180 return file.split("/")[-2] | 243 return file.split("/")[-2] |
| 181 else: | 244 else: |
| 182 return None | 245 return None |
| (...skipping 141 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 324 | 387 |
| 325 if __name__ == "__main__": | 388 if __name__ == "__main__": |
| 326 setupStderr() | 389 setupStderr() |
| 327 | 390 |
| 328 verbose = (len(sys.argv) >= 2 and sys.argv[1] == "verbose") | 391 verbose = (len(sys.argv) >= 2 and sys.argv[1] == "verbose") |
| 329 geo = GeoIP.open(get_config().get("stats", "geoip_db"), GeoIP.GEOIP_MEMORY_CAC HE) | 392 geo = GeoIP.open(get_config().get("stats", "geoip_db"), GeoIP.GEOIP_MEMORY_CAC HE) |
| 330 result = parse_stdin(geo, verbose) | 393 result = parse_stdin(geo, verbose) |
| 331 | 394 |
| 332 with codecs.open(get_config().get("stats", "tempFile"), "wb", encoding="utf-8" ) as file: | 395 with codecs.open(get_config().get("stats", "tempFile"), "wb", encoding="utf-8" ) as file: |
| 333 simplejson.dump(result, file, indent=2, sort_keys=True) | 396 simplejson.dump(result, file, indent=2, sort_keys=True) |
| LEFT | RIGHT |