Left: | ||
Right: |
LEFT | RIGHT |
---|---|
1 # coding: utf-8 | 1 # coding: utf-8 |
2 | 2 |
3 # This file is part of the Adblock Plus web scripts, | 3 # This file is part of the Adblock Plus web scripts, |
4 # Copyright (C) 2006-2013 Eyeo GmbH | 4 # Copyright (C) 2006-2013 Eyeo GmbH |
5 # | 5 # |
6 # Adblock Plus is free software: you can redistribute it and/or modify | 6 # Adblock Plus is free software: you can redistribute it and/or modify |
7 # it under the terms of the GNU General Public License version 3 as | 7 # it under the terms of the GNU General Public License version 3 as |
8 # published by the Free Software Foundation. | 8 # published by the Free Software Foundation. |
9 # | 9 # |
10 # Adblock Plus is distributed in the hope that it will be useful, | 10 # Adblock Plus is distributed in the hope that it will be useful, |
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of | 11 # but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | 12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
13 # GNU General Public License for more details. | 13 # GNU General Public License for more details. |
14 # | 14 # |
15 # You should have received a copy of the GNU General Public License | 15 # You should have received a copy of the GNU General Public License |
16 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. | 16 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. |
17 | 17 |
18 import os, sys, codecs, re, math, GeoIP, urllib, urlparse, socket, simplejson | 18 import os, sys, codecs, re, math, GeoIP, urllib, urlparse, socket, simplejson |
19 from collections import OrderedDict | |
19 import sitescripts.stats.common as common | 20 import sitescripts.stats.common as common |
20 from sitescripts.utils import get_config, setupStderr | 21 from sitescripts.utils import get_config, setupStderr |
21 from datetime import datetime, timedelta | 22 from datetime import datetime, timedelta |
22 | 23 |
23 log_regexp = None | 24 log_regexp = None |
24 mirror_name = None | 25 mirror_name = None |
25 gecko_apps = None | 26 gecko_apps = None |
26 | 27 |
28 def cache_lru(func): | |
29 """ | |
30 Decorator that memoizes the return values of a single-parameter function in | |
31 case it is called again with the same parameter. The 1024 most recent | |
32 results are saved. | |
33 """ | |
34 | |
35 results = OrderedDict() | |
36 results.entries_left = 1024 | |
37 | |
38 def wrapped(arg): | |
39 if arg in results: | |
40 result = results[arg] | |
41 del results[arg] | |
42 else: | |
43 if results.entries_left > 0: | |
44 results.entries_left -= 1 | |
45 else: | |
46 results.popitem(last=False) | |
47 result = func(arg) | |
48 results[arg] = result | |
49 return result | |
50 return wrapped | |
51 | |
52 | |
53 def cache_last(func): | |
54 """ | |
55 Decorator that memoizes the last return value of a function in case it is | |
56 called again with the same parameters. | |
57 """ | |
58 result = {"args": None, "result": None} | |
59 | |
60 def wrapped(*args): | |
61 if args != result["args"]: | |
62 result["result"] = func(*args) | |
63 result["args"] = args | |
64 return result["result"] | |
65 return wrapped | |
66 | |
67 | |
68 @cache_lru | |
27 def parse_ua(ua): | 69 def parse_ua(ua): |
28 # Opera might disguise itself as other browser so it needs to go first | 70 # Opera might disguise itself as other browser so it needs to go first |
29 match = re.search(r"\bOpera/([\d\.]+)", ua) | 71 match = re.search(r"\bOpera/([\d\.]+)", ua) |
30 if match: | 72 if match: |
31 # Opera 10+ declares itself as Opera 9.80 but adds Version/1x.x to the UA | 73 # Opera 10+ declares itself as Opera 9.80 but adds Version/1x.x to the UA |
32 match2 = re.search(r"\bVersion/([\d\.]+)", ua) | 74 match2 = re.search(r"\bVersion/([\d\.]+)", ua) |
33 if match2: | 75 if match2: |
34 return "Opera", match2.group(1) | 76 return "Opera", match2.group(1) |
35 else: | 77 else: |
36 return "Opera", match.group(1) | 78 return "Opera", match.group(1) |
37 | 79 |
38 # Opera 15+ has the same UA as Chrome but adds OPR/1x.x to it | 80 # Opera 15+ has the same UA as Chrome but adds OPR/1x.x to it |
39 match = re.search(r"\bOPR/(\d+\.\d+)", ua) | 81 match = re.search(r"\bOPR/(\d+\.\d+)", ua) |
40 if match: | 82 if match: |
41 return "Opera", match.group(1) | 83 return "Opera", match.group(1) |
42 | 84 |
43 for appName in ("Fennec", "Thunderbird", "SeaMonkey", "Songbird", "K-Meleon", "Prism", "Firefox"): | 85 # Have to check for these before Firefox, they will usually have a Firefox ide ntifier as well |
44 match = re.search(r"\b%s/(\d+\.\d+)" % appName, ua) | 86 match = re.search(r"\b(Fennec|Thunderbird|SeaMonkey|Songbird|K-Meleon|Prism)/( \d+\.\d+)", ua) |
Sebastian Noack
2013/08/26 16:05:22
Instead of iterating over the list of browsers and
Wladimir Palant
2013/08/27 07:34:28
This was done like that intentionally - quite a fe
| |
45 if match: | 87 if match: |
46 if appName == "Fennec" or (appName == "Firefox" and re.search(r"\bMobile;" , ua)): | 88 if match.group(1) == "Fennec": |
47 return "Firefox Mobile", match.group(1) | 89 return "Firefox Mobile", match.group(2) |
48 else: | 90 else: |
49 return appName, match.group(1) | 91 return match.group(1), match.group(2) |
92 | |
93 match = re.search(r"\bFirefox/(\d+\.\d+)", ua) | |
94 if match: | |
95 if re.search(r"\bMobile;", ua): | |
96 return "Firefox Mobile", match.group(1) | |
97 else: | |
98 return "Firefox", match.group(1) | |
50 | 99 |
51 match = re.search(r"\brv:(\d+)\.(\d+)(?:\.(\d+))?", ua) | 100 match = re.search(r"\brv:(\d+)\.(\d+)(?:\.(\d+))?", ua) |
52 if match and re.search(r"\bGecko/", ua): | 101 if match and re.search(r"\bGecko/", ua): |
53 if match.group(3) and int(match.group(1)) < 2: | 102 if match.group(3) and int(match.group(1)) < 2: |
54 return "Gecko", "%s.%s.%s" % (match.group(1), match.group(2), match.group( 3)) | 103 return "Gecko", "%s.%s.%s" % (match.group(1), match.group(2), match.group( 3)) |
Sebastian Noack
2013/08/26 16:05:22
You could just just call match.groups(), which alr
Wladimir Palant
2013/08/27 07:34:28
I think I rather keep this as is for consistency w
| |
55 else: | 104 else: |
56 return "Gecko", "%s.%s" % (match.group(1), match.group(2)) | 105 return "Gecko", "%s.%s" % (match.group(1), match.group(2)) |
57 | 106 |
58 match = re.search(r"\bCoolNovo/(\d+\.\d+\.\d+)", ua) | 107 match = re.search(r"\bCoolNovo/(\d+\.\d+\.\d+)", ua) |
59 if match: | 108 if match: |
60 return "CoolNovo", match.group(1) | 109 return "CoolNovo", match.group(1) |
61 | 110 |
62 match = re.search(r"\bChrome/(\d+\.\d+)", ua) | 111 match = re.search(r"\bChrome/(\d+\.\d+)", ua) |
63 if match: | 112 if match: |
64 return "Chrome", match.group(1) | 113 return "Chrome", match.group(1) |
(...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
103 if match: | 152 if match: |
104 ip = match.group(1) | 153 ip = match.group(1) |
105 | 154 |
106 country = geo.country_code_by_addr(ip) | 155 country = geo.country_code_by_addr(ip) |
107 if country in (None, "", "--"): | 156 if country in (None, "", "--"): |
108 country = "unknown" | 157 country = "unknown" |
109 country = country.lower() | 158 country = country.lower() |
110 | 159 |
111 return ip, country | 160 return ip, country |
112 | 161 |
162 @cache_last | |
113 def parse_time(timestr, tz_hours, tz_minutes): | 163 def parse_time(timestr, tz_hours, tz_minutes): |
114 result = datetime.strptime(timestr, "%d/%b/%Y:%H:%M:%S") | 164 result = datetime.strptime(timestr, "%d/%b/%Y:%H:%M:%S") |
115 result -= timedelta(hours = tz_hours, minutes = math.copysign(tz_minutes, tz_h ours)) | 165 result -= timedelta(hours = tz_hours, minutes = math.copysign(tz_minutes, tz_h ours)) |
116 return result, result.strftime("%Y%m"), result.day, result.weekday(), result.h our | 166 return result, result.strftime("%Y%m"), result.day, result.weekday(), result.h our |
117 | 167 |
168 @cache_lru | |
118 def parse_path(path): | 169 def parse_path(path): |
119 urlparts = urlparse.urlparse(path) | 170 urlparts = urlparse.urlparse(path) |
120 try: | 171 try: |
121 path = urllib.unquote(urlparts.path).decode("utf-8") | 172 path = urllib.unquote(urlparts.path).decode("utf-8") |
122 except: | 173 except: |
123 path = urlparts.path | 174 path = urlparts.path |
124 return path[1:], urlparts.query | 175 return path[1:], urlparts.query |
125 | 176 |
177 @cache_lru | |
178 def parse_query(query): | |
179 return urlparse.parse_qs(query) | |
180 | |
181 @cache_lru | |
182 def parse_lastversion(last_version): | |
183 return datetime.strptime(last_version, "%Y%m%d%H%M") | |
184 | |
185 @cache_lru | |
186 def get_week(date): | |
187 return date.isocalendar()[0:2] | |
188 | |
126 def parse_downloader_query(info): | 189 def parse_downloader_query(info): |
127 params = urlparse.parse_qs(info["query"]) | 190 params = parse_query(info["query"]) |
128 for param in ("addonName", "addonVersion", "application", "applicationVersion" , "platform", "platformVersion"): | 191 for param in ("addonName", "addonVersion", "application", "applicationVersion" , "platform", "platformVersion"): |
129 info[param] = params.get(param, ["unknown"])[0] | 192 info[param] = params.get(param, ["unknown"])[0] |
130 | 193 |
131 # Only leave the major and minor release number for application and platform | 194 # Only leave the major and minor release number for application and platform |
132 info["applicationVersion"] = re.sub(r"^(\d+\.\d+).*", r"\1", info["application Version"]) | 195 info["applicationVersion"] = re.sub(r"^(\d+\.\d+).*", r"\1", info["application Version"]) |
133 info["platformVersion"] = re.sub(r"^(\d+\.\d+).*", r"\1", info["platformVersio n"]) | 196 info["platformVersion"] = re.sub(r"^(\d+\.\d+).*", r"\1", info["platformVersio n"]) |
134 | 197 |
135 # Chrome Adblock sends an X-Client-ID header insteads of URL parameters | 198 # Chrome Adblock sends an X-Client-ID header insteads of URL parameters |
136 match = re.match(r"^adblock/([\d\.]+)$", info["clientid"], re.I) if info["clie ntid"] else None | 199 match = re.match(r"^adblock/([\d\.]+)$", info["clientid"], re.I) if info["clie ntid"] else None |
137 if match: | 200 if match: |
138 info["addonName"] = "chromeadblock" | 201 info["addonName"] = "chromeadblock" |
139 info["addonVersion"] = match.group(1) | 202 info["addonVersion"] = match.group(1) |
140 | 203 |
141 last_version = params.get("lastVersion", ["unknown"])[0] | 204 last_version = params.get("lastVersion", ["unknown"])[0] |
142 if info["file"] == "notification.json" and last_version == "0" and ( | 205 if info["file"] == "notification.json" and last_version == "0" and ( |
143 (info["addonName"] == "adblockplus" and info["addonVersion"] == "2.3.1") o r | 206 (info["addonName"] == "adblockplus" and info["addonVersion"] == "2.3.1") o r |
144 (info["addonName"] in ("adblockpluschrome", "adblockplusopera") and info[" addonVersion"] == "1.5.2") | 207 (info["addonName"] in ("adblockpluschrome", "adblockplusopera") and info[" addonVersion"] == "1.5.2") |
145 ): | 208 ): |
146 # Broken notification version number in these releases, treat like unknown | 209 # Broken notification version number in these releases, treat like unknown |
147 last_version = "unknown" | 210 last_version = "unknown" |
148 | 211 |
149 if last_version == "unknown": | 212 if last_version == "unknown": |
150 info["downloadInterval"] = "unknown" | 213 info["downloadInterval"] = "unknown" |
151 elif last_version == "0": | 214 elif last_version == "0": |
152 info["downloadInterval"] = "unknown" | 215 info["downloadInterval"] = "unknown" |
153 info["firstDownload"] = info["firstInMonth"] = info["firstInWeek"] = info["f irstInDay"] = True | 216 info["firstDownload"] = info["firstInMonth"] = info["firstInWeek"] = info["f irstInDay"] = True |
154 else: | 217 else: |
155 try: | 218 try: |
156 last_update = datetime.strptime(last_version, "%Y%m%d%H%M") | 219 last_update = parse_lastversion(last_version) |
157 diff = info["time"] - last_update | 220 diff = info["time"] - last_update |
158 if diff.days >= 365: | 221 if diff.days >= 365: |
159 info["downloadInterval"] = "%i year(s)" % (diff.days / 365) | 222 info["downloadInterval"] = "%i year(s)" % (diff.days / 365) |
160 elif diff.days >= 30: | 223 elif diff.days >= 30: |
161 info["downloadInterval"] = "%i month(s)" % (diff.days / 30) | 224 info["downloadInterval"] = "%i month(s)" % (diff.days / 30) |
162 elif diff.days >= 1: | 225 elif diff.days >= 1: |
163 info["downloadInterval"] = "%i day(s)" % diff.days | 226 info["downloadInterval"] = "%i day(s)" % diff.days |
164 else: | 227 else: |
165 info["downloadInterval"] = "%i hour(s)" % (diff.seconds / 3600) | 228 info["downloadInterval"] = "%i hour(s)" % (diff.seconds / 3600) |
166 | 229 |
167 if last_update.year != info["time"].year or last_update.month != info["tim e"].month: | 230 if last_update.year != info["time"].year or last_update.month != info["tim e"].month: |
168 info["firstInMonth"] = info["firstInDay"] = True | 231 info["firstInMonth"] = info["firstInDay"] = True |
169 elif last_update.day != info["time"].day: | 232 elif last_update.day != info["time"].day: |
170 info["firstInDay"] = True | 233 info["firstInDay"] = True |
171 | 234 |
172 if last_update.isocalendar()[0:2] != info["time"].isocalendar()[0:2]: | 235 if get_week(last_update) != get_week(info["time"]): |
173 info["firstInWeek"] = True | 236 info["firstInWeek"] = True |
174 except ValueError: | 237 except ValueError: |
175 info["downloadInterval"] = "unknown" | 238 info["downloadInterval"] = "unknown" |
176 pass | 239 pass |
177 | 240 |
178 def parse_addon_name(file): | 241 def parse_addon_name(file): |
179 if "/" in file: | 242 if "/" in file: |
180 return file.split("/")[-2] | 243 return file.split("/")[-2] |
181 else: | 244 else: |
182 return None | 245 return None |
(...skipping 141 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
324 | 387 |
325 if __name__ == "__main__": | 388 if __name__ == "__main__": |
326 setupStderr() | 389 setupStderr() |
327 | 390 |
328 verbose = (len(sys.argv) >= 2 and sys.argv[1] == "verbose") | 391 verbose = (len(sys.argv) >= 2 and sys.argv[1] == "verbose") |
329 geo = GeoIP.open(get_config().get("stats", "geoip_db"), GeoIP.GEOIP_MEMORY_CAC HE) | 392 geo = GeoIP.open(get_config().get("stats", "geoip_db"), GeoIP.GEOIP_MEMORY_CAC HE) |
330 result = parse_stdin(geo, verbose) | 393 result = parse_stdin(geo, verbose) |
331 | 394 |
332 with codecs.open(get_config().get("stats", "tempFile"), "wb", encoding="utf-8" ) as file: | 395 with codecs.open(get_config().get("stats", "tempFile"), "wb", encoding="utf-8" ) as file: |
333 simplejson.dump(result, file, indent=2, sort_keys=True) | 396 simplejson.dump(result, file, indent=2, sort_keys=True) |
LEFT | RIGHT |