Left: | ||
Right: |
LEFT | RIGHT |
---|---|
1 # coding: utf-8 | 1 # coding: utf-8 |
2 | 2 |
3 # This file is part of the Adblock Plus web scripts, | 3 # This file is part of the Adblock Plus web scripts, |
4 # Copyright (C) 2006-2013 Eyeo GmbH | 4 # Copyright (C) 2006-2013 Eyeo GmbH |
5 # | 5 # |
6 # Adblock Plus is free software: you can redistribute it and/or modify | 6 # Adblock Plus is free software: you can redistribute it and/or modify |
7 # it under the terms of the GNU General Public License version 3 as | 7 # it under the terms of the GNU General Public License version 3 as |
8 # published by the Free Software Foundation. | 8 # published by the Free Software Foundation. |
9 # | 9 # |
10 # Adblock Plus is distributed in the hope that it will be useful, | 10 # Adblock Plus is distributed in the hope that it will be useful, |
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of | 11 # but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | 12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
13 # GNU General Public License for more details. | 13 # GNU General Public License for more details. |
14 # | 14 # |
15 # You should have received a copy of the GNU General Public License | 15 # You should have received a copy of the GNU General Public License |
16 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. | 16 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. |
17 | 17 |
18 import os, sys, codecs, re, math, GeoIP, urllib, urlparse, socket, simplejson | 18 import os, sys, codecs, re, math, GeoIP, urllib, urlparse, socket, simplejson |
19 from collections import OrderedDict | |
19 import sitescripts.stats.common as common | 20 import sitescripts.stats.common as common |
20 from sitescripts.utils import get_config, setupStderr | 21 from sitescripts.utils import get_config, setupStderr |
21 from datetime import datetime, timedelta | 22 from datetime import datetime, timedelta |
22 | 23 |
23 log_regexp = None | 24 log_regexp = None |
24 mirror_name = None | 25 mirror_name = None |
25 gecko_apps = None | 26 gecko_apps = None |
26 | 27 |
28 def cache_lru(func): | |
29 """ | |
30 Decorator that memoizes the return values of a single-parameter function in | |
31 case it is called again with the same parameter. The 1024 most recent | |
32 results are saved. | |
33 """ | |
34 | |
35 results = OrderedDict() | |
36 results.entries_left = 1024 | |
37 | |
38 def wrapped(arg): | |
39 if arg in results: | |
40 result = results[arg] | |
41 del results[arg] | |
42 else: | |
43 if results.entries_left > 0: | |
44 results.entries_left -= 1 | |
45 else: | |
46 results.popitem(last=False) | |
47 result = func(arg) | |
48 results[arg] = result | |
49 return result | |
50 return wrapped | |
51 | |
52 | |
53 def cache_last(func): | |
54 """ | |
55 Decorator that memoizes the last return value of a function in case it is | |
56 called again with the same parameters. | |
57 """ | |
58 result = {"args": None, "result": None} | |
59 | |
60 def wrapped(*args): | |
61 if args != result["args"]: | |
62 result["result"] = func(*args) | |
63 result["args"] = args | |
64 return result["result"] | |
65 return wrapped | |
66 | |
67 | |
68 @cache_lru | |
27 def parse_ua(ua): | 69 def parse_ua(ua): |
28 # Opera might disguise itself as other browser so it needs to go first | 70 # Opera might disguise itself as other browser so it needs to go first |
29 match = re.search(r"\bOpera/([\d\.]+)", ua) | 71 match = re.search(r"\bOpera/([\d\.]+)", ua) |
Felix Dahlke
2013/08/28 17:25:32
Memoization helped save around 5% here. In an acce
Sebastian Noack
2013/08/29 10:54:30
Keys in python dicts are already unique. So when y
| |
30 if match: | 72 if match: |
31 # Opera 10+ declares itself as Opera 9.80 but adds Version/1x.x to the UA | 73 # Opera 10+ declares itself as Opera 9.80 but adds Version/1x.x to the UA |
32 match2 = re.search(r"\bVersion/([\d\.]+)", ua) | 74 match2 = re.search(r"\bVersion/([\d\.]+)", ua) |
33 if match2: | 75 if match2: |
34 return "Opera", match2.group(1) | 76 return "Opera", match2.group(1) |
35 else: | 77 else: |
36 return "Opera", match.group(1) | 78 return "Opera", match.group(1) |
37 | 79 |
38 # Opera 15+ has the same UA as Chrome but adds OPR/1x.x to it | 80 # Opera 15+ has the same UA as Chrome but adds OPR/1x.x to it |
39 match = re.search(r"\bOPR/(\d+\.\d+)", ua) | 81 match = re.search(r"\bOPR/(\d+\.\d+)", ua) |
(...skipping 70 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
110 if match: | 152 if match: |
111 ip = match.group(1) | 153 ip = match.group(1) |
112 | 154 |
113 country = geo.country_code_by_addr(ip) | 155 country = geo.country_code_by_addr(ip) |
114 if country in (None, "", "--"): | 156 if country in (None, "", "--"): |
115 country = "unknown" | 157 country = "unknown" |
116 country = country.lower() | 158 country = country.lower() |
117 | 159 |
118 return ip, country | 160 return ip, country |
119 | 161 |
162 @cache_last | |
120 def parse_time(timestr, tz_hours, tz_minutes): | 163 def parse_time(timestr, tz_hours, tz_minutes): |
121 result = datetime.strptime(timestr, "%d/%b/%Y:%H:%M:%S") | 164 result = datetime.strptime(timestr, "%d/%b/%Y:%H:%M:%S") |
122 result -= timedelta(hours = tz_hours, minutes = math.copysign(tz_minutes, tz_h ours)) | 165 result -= timedelta(hours = tz_hours, minutes = math.copysign(tz_minutes, tz_h ours)) |
123 return result, result.strftime("%Y%m"), result.day, result.weekday(), result.h our | 166 return result, result.strftime("%Y%m"), result.day, result.weekday(), result.h our |
124 | 167 |
168 @cache_lru | |
125 def parse_path(path): | 169 def parse_path(path): |
126 urlparts = urlparse.urlparse(path) | 170 urlparts = urlparse.urlparse(path) |
127 try: | 171 try: |
128 path = urllib.unquote(urlparts.path).decode("utf-8") | 172 path = urllib.unquote(urlparts.path).decode("utf-8") |
129 except: | 173 except: |
130 path = urlparts.path | 174 path = urlparts.path |
131 return path[1:], urlparts.query | 175 return path[1:], urlparts.query |
132 | 176 |
177 @cache_lru | |
178 def parse_query(query): | |
179 return urlparse.parse_qs(query) | |
180 | |
181 @cache_lru | |
182 def parse_lastversion(last_version): | |
183 return datetime.strptime(last_version, "%Y%m%d%H%M") | |
184 | |
185 @cache_lru | |
186 def get_week(date): | |
187 return date.isocalendar()[0:2] | |
188 | |
133 def parse_downloader_query(info): | 189 def parse_downloader_query(info): |
134 params = urlparse.parse_qs(info["query"]) | 190 params = parse_query(info["query"]) |
135 for param in ("addonName", "addonVersion", "application", "applicationVersion" , "platform", "platformVersion"): | 191 for param in ("addonName", "addonVersion", "application", "applicationVersion" , "platform", "platformVersion"): |
136 info[param] = params.get(param, ["unknown"])[0] | 192 info[param] = params.get(param, ["unknown"])[0] |
137 | 193 |
138 # Only leave the major and minor release number for application and platform | 194 # Only leave the major and minor release number for application and platform |
139 info["applicationVersion"] = re.sub(r"^(\d+\.\d+).*", r"\1", info["application Version"]) | 195 info["applicationVersion"] = re.sub(r"^(\d+\.\d+).*", r"\1", info["application Version"]) |
140 info["platformVersion"] = re.sub(r"^(\d+\.\d+).*", r"\1", info["platformVersio n"]) | 196 info["platformVersion"] = re.sub(r"^(\d+\.\d+).*", r"\1", info["platformVersio n"]) |
141 | 197 |
142 # Chrome Adblock sends an X-Client-ID header insteads of URL parameters | 198 # Chrome Adblock sends an X-Client-ID header insteads of URL parameters |
143 match = re.match(r"^adblock/([\d\.]+)$", info["clientid"], re.I) if info["clie ntid"] else None | 199 match = re.match(r"^adblock/([\d\.]+)$", info["clientid"], re.I) if info["clie ntid"] else None |
144 if match: | 200 if match: |
145 info["addonName"] = "chromeadblock" | 201 info["addonName"] = "chromeadblock" |
146 info["addonVersion"] = match.group(1) | 202 info["addonVersion"] = match.group(1) |
147 | 203 |
148 last_version = params.get("lastVersion", ["unknown"])[0] | 204 last_version = params.get("lastVersion", ["unknown"])[0] |
149 if info["file"] == "notification.json" and last_version == "0" and ( | 205 if info["file"] == "notification.json" and last_version == "0" and ( |
150 (info["addonName"] == "adblockplus" and info["addonVersion"] == "2.3.1") o r | 206 (info["addonName"] == "adblockplus" and info["addonVersion"] == "2.3.1") o r |
151 (info["addonName"] in ("adblockpluschrome", "adblockplusopera") and info[" addonVersion"] == "1.5.2") | 207 (info["addonName"] in ("adblockpluschrome", "adblockplusopera") and info[" addonVersion"] == "1.5.2") |
152 ): | 208 ): |
153 # Broken notification version number in these releases, treat like unknown | 209 # Broken notification version number in these releases, treat like unknown |
154 last_version = "unknown" | 210 last_version = "unknown" |
155 | 211 |
156 if last_version == "unknown": | 212 if last_version == "unknown": |
157 info["downloadInterval"] = "unknown" | 213 info["downloadInterval"] = "unknown" |
158 elif last_version == "0": | 214 elif last_version == "0": |
159 info["downloadInterval"] = "unknown" | 215 info["downloadInterval"] = "unknown" |
160 info["firstDownload"] = info["firstInMonth"] = info["firstInWeek"] = info["f irstInDay"] = True | 216 info["firstDownload"] = info["firstInMonth"] = info["firstInWeek"] = info["f irstInDay"] = True |
161 else: | 217 else: |
162 try: | 218 try: |
163 last_update = datetime.strptime(last_version, "%Y%m%d%H%M") | 219 last_update = parse_lastversion(last_version) |
164 diff = info["time"] - last_update | 220 diff = info["time"] - last_update |
165 if diff.days >= 365: | 221 if diff.days >= 365: |
166 info["downloadInterval"] = "%i year(s)" % (diff.days / 365) | 222 info["downloadInterval"] = "%i year(s)" % (diff.days / 365) |
167 elif diff.days >= 30: | 223 elif diff.days >= 30: |
168 info["downloadInterval"] = "%i month(s)" % (diff.days / 30) | 224 info["downloadInterval"] = "%i month(s)" % (diff.days / 30) |
169 elif diff.days >= 1: | 225 elif diff.days >= 1: |
170 info["downloadInterval"] = "%i day(s)" % diff.days | 226 info["downloadInterval"] = "%i day(s)" % diff.days |
171 else: | 227 else: |
172 info["downloadInterval"] = "%i hour(s)" % (diff.seconds / 3600) | 228 info["downloadInterval"] = "%i hour(s)" % (diff.seconds / 3600) |
173 | 229 |
174 if last_update.year != info["time"].year or last_update.month != info["tim e"].month: | 230 if last_update.year != info["time"].year or last_update.month != info["tim e"].month: |
175 info["firstInMonth"] = info["firstInDay"] = True | 231 info["firstInMonth"] = info["firstInDay"] = True |
176 elif last_update.day != info["time"].day: | 232 elif last_update.day != info["time"].day: |
177 info["firstInDay"] = True | 233 info["firstInDay"] = True |
178 | 234 |
179 if last_update.isocalendar()[0:2] != info["time"].isocalendar()[0:2]: | 235 if get_week(last_update) != get_week(info["time"]): |
180 info["firstInWeek"] = True | 236 info["firstInWeek"] = True |
181 except ValueError: | 237 except ValueError: |
182 info["downloadInterval"] = "unknown" | 238 info["downloadInterval"] = "unknown" |
183 pass | 239 pass |
184 | 240 |
185 def parse_addon_name(file): | 241 def parse_addon_name(file): |
186 if "/" in file: | 242 if "/" in file: |
187 return file.split("/")[-2] | 243 return file.split("/")[-2] |
188 else: | 244 else: |
189 return None | 245 return None |
(...skipping 141 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
331 | 387 |
332 if __name__ == "__main__": | 388 if __name__ == "__main__": |
333 setupStderr() | 389 setupStderr() |
334 | 390 |
335 verbose = (len(sys.argv) >= 2 and sys.argv[1] == "verbose") | 391 verbose = (len(sys.argv) >= 2 and sys.argv[1] == "verbose") |
336 geo = GeoIP.open(get_config().get("stats", "geoip_db"), GeoIP.GEOIP_MEMORY_CAC HE) | 392 geo = GeoIP.open(get_config().get("stats", "geoip_db"), GeoIP.GEOIP_MEMORY_CAC HE) |
337 result = parse_stdin(geo, verbose) | 393 result = parse_stdin(geo, verbose) |
338 | 394 |
339 with codecs.open(get_config().get("stats", "tempFile"), "wb", encoding="utf-8" ) as file: | 395 with codecs.open(get_config().get("stats", "tempFile"), "wb", encoding="utf-8" ) as file: |
340 simplejson.dump(result, file, indent=2, sort_keys=True) | 396 simplejson.dump(result, file, indent=2, sort_keys=True) |
LEFT | RIGHT |