OLD | NEW |
1 # coding: utf-8 | 1 # coding: utf-8 |
2 | 2 |
3 # This file is part of the Adblock Plus web scripts, | 3 # This file is part of the Adblock Plus web scripts, |
4 # Copyright (C) 2006-2013 Eyeo GmbH | 4 # Copyright (C) 2006-2013 Eyeo GmbH |
5 # | 5 # |
6 # Adblock Plus is free software: you can redistribute it and/or modify | 6 # Adblock Plus is free software: you can redistribute it and/or modify |
7 # it under the terms of the GNU General Public License version 3 as | 7 # it under the terms of the GNU General Public License version 3 as |
8 # published by the Free Software Foundation. | 8 # published by the Free Software Foundation. |
9 # | 9 # |
10 # Adblock Plus is distributed in the hope that it will be useful, | 10 # Adblock Plus is distributed in the hope that it will be useful, |
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of | 11 # but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | 12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
13 # GNU General Public License for more details. | 13 # GNU General Public License for more details. |
14 # | 14 # |
15 # You should have received a copy of the GNU General Public License | 15 # You should have received a copy of the GNU General Public License |
16 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. | 16 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. |
17 | 17 |
18 import os, re, time | 18 import os, re, codecs, simplejson, time, itertools |
19 from datetime import date, timedelta | 19 from datetime import date |
20 from sitescripts.utils import get_config, setupStderr, get_template | 20 from sitescripts.utils import get_config, setupStderr, get_custom_template_envir
onment, cached |
21 from sitescripts.logs.countryCodes import countryCodes | 21 import sitescripts.stats.common as common |
22 from ConfigParser import SafeConfigParser | 22 from sitescripts.stats.countrycodes import countrycodes |
23 | 23 |
24 def getSubscriptionFiles(data, month): | 24 @cached(float("inf")) |
25 result = {} | 25 def get_template_environment(): |
26 if data.has_section(month): | 26 return get_custom_template_environment({ |
27 for option in data.options(month): | 27 "monthname": lambda value: date(int(value[0:4]), int(value[4:]), 1).strftime
("%b %Y"), |
28 result[option[0:option.index(' ')]] = True | 28 "weekday": lambda value: ["Monday", "Tuesday", "Wednesday", "Thursday", "Fri
day", "Saturday", "Sunday"][int(value)], |
29 return result | 29 "countryname": lambda value: countrycodes.get(value, "Unknown"), |
| 30 "sortfield": lambda value, field: (field["sort"] if "sort" in field else def
ault_sort)(value), |
| 31 "maxhits": lambda items: max(itertools.chain((value["hits"] for key, value i
n items), [1])), |
| 32 "maxbandwidth": lambda items: max(itertools.chain((value["bandwidth"] for ke
y, value in items), [1])), |
| 33 "sumhits": lambda items: max(sum(value["hits"] for key, value in items), 1), |
| 34 "sumbandwidth": lambda items: max(sum(value["bandwidth"] for key, value in i
tems), 1), |
| 35 "isspecial": lambda name, field: field["isspecial"](name) if "isspecial" in
field else False, |
| 36 }) |
30 | 37 |
31 def generateMainPage(data, outputDir): | 38 @cached(float("inf")) |
32 def getDataInt(month, key): | 39 def get_main_page_template(): |
33 if data.has_option(month, key): | 40 return get_template_environment().get_template(get_config().get("stats", "main
PageTemplate")) |
34 return data.getint(month, key) | |
35 else: | |
36 return 0 | |
37 | 41 |
38 month = date.today().strftime('%Y%m') | 42 @cached(float("inf")) |
39 subscriptions = [] | 43 def get_file_stats_template(): |
40 for fileName in getSubscriptionFiles(data, month).iterkeys(): | 44 return get_template_environment().get_template(get_config().get("stats", "file
PageTemplate")) |
41 subscriptions.append({ | |
42 'fileName': fileName, | |
43 'url': 'subscription_%s_%s.html' % (re.sub(r'\W', '_', fileName), month), | |
44 'hits': getDataInt(month, '%s hits' % fileName), | |
45 'bandwidth': getDataInt(month, '%s bandwidth' % fileName) | |
46 }) | |
47 subscriptions = sorted(subscriptions, key=lambda s: s['hits'], reverse=True) | |
48 | 45 |
49 file = os.path.join(outputDir, 'index.html') | 46 @cached(float("inf")) |
50 template = get_template(get_config().get('subscriptionStats', 'mainPageTemplat
e')) | 47 def get_file_overview_template(): |
51 template.stream({'now': time.time(), 'month': month, 'subscriptions': subscrip
tions}).dump(file) | 48 return get_template_environment().get_template(get_config().get("stats", "file
OverviewTemplate")) |
52 | 49 |
53 def generateSubscriptionPages(data, outputDir): | 50 def default_sort(obj): |
54 existingSubscriptions = {} | 51 return sorted(obj.items(), key=lambda (k,v): v["hits"], reverse=True) |
55 template = get_template(get_config().get('subscriptionStats', 'subscriptionPag
eTemplate')) | |
56 for month in data.sections(): | |
57 subscriptions = {} | |
58 for option in data.options(month): | |
59 spaceIndex = option.index(' ') | |
60 if spaceIndex < 0: | |
61 continue | |
62 fileName, key = option[0:spaceIndex], option[spaceIndex+1:] | |
63 existingSubscriptions[fileName] = True | |
64 if not fileName in subscriptions: | |
65 subscriptions[fileName] = { | |
66 'now': time.time(), | |
67 'month': month, | |
68 'daysInMonth': (date(int(month[0:4]), int(month[4:]), 1) - timedelta(d
ays=1)).day, | |
69 'currentMonth': month == date.today().strftime('%Y%m'), | |
70 'fileName': fileName, | |
71 'overviewURL': 'overview_%s.html' % re.sub(r'\W', '_', fileName), | |
72 'hits': 0, | |
73 'bandwidth': 0, | |
74 'day': {}, | |
75 'weekday': [{'id': i, 'hits': 0, 'bandwidth': 0, 'count': 0}for i in r
ange(7)], | |
76 'hour': {}, | |
77 'country': {}, | |
78 'app': {}, | |
79 'mirror': {}, | |
80 } | |
81 if key == 'hits' or key == 'bandwidth': | |
82 subscriptions[fileName][key] = data.getint(month, option) | |
83 else: | |
84 match = re.search(r'^(hits|bandwidth) (day|hour|country|app|mirror) (.*)
$', key) | |
85 if match: | |
86 if not match.group(3) in subscriptions[fileName][match.group(2)]: | |
87 subscriptions[fileName][match.group(2)][match.group(3)] = { | |
88 'id': match.group(3), | |
89 'hits': 0, | |
90 'bandwidth': 0, | |
91 } | |
92 if match.group(2) == 'day': | |
93 subscriptions[fileName][match.group(2)][match.group(3)]['weekday']
= date(int(month[0:4]), int(month[4:]), int(match.group(3))).weekday() | |
94 if match.group(2) == 'country': | |
95 if match.group(3) in countryCodes: | |
96 subscriptions[fileName][match.group(2)][match.group(3)]['name']
= countryCodes[match.group(3)] | |
97 subscriptions[fileName][match.group(2)][match.group(3)]['image']
= match.group(3) | |
98 else: | |
99 subscriptions[fileName][match.group(2)][match.group(3)]['name']
= 'Unknown' | |
100 subscriptions[fileName][match.group(2)][match.group(3)]['image']
= 'ip' | |
101 subscriptions[fileName][match.group(2)][match.group(3)][match.group(1)
] = data.getint(month, option) | |
102 | 52 |
103 for subscription in subscriptions.itervalues(): | 53 def ensure_dir(path): |
104 for key in ('day', 'hour'): | 54 dir = os.path.dirname(path) |
105 subscription[key] = sorted(subscription[key].itervalues(), key=lambda s:
int(s['id'])) | 55 try: |
106 for key in ('country', 'app', 'mirror'): | 56 os.makedirs(dir) |
107 subscription[key] = sorted(subscription[key].itervalues(), key=lambda s:
s['hits'], reverse=True) | 57 except OSError: |
108 for dayInfo in subscription['day']: | 58 pass |
109 weekdayInfo = subscription['weekday'][dayInfo['weekday']] | |
110 weekdayInfo['hits'] = (weekdayInfo['hits'] * weekdayInfo['count'] + dayI
nfo['hits']) / (weekdayInfo['count'] + 1) | |
111 weekdayInfo['bandwidth'] = (weekdayInfo['bandwidth'] * weekdayInfo['coun
t'] + dayInfo['bandwidth']) / (weekdayInfo['count'] + 1) | |
112 weekdayInfo['count'] += 1 | |
113 fileName = 'subscription_%s_%s.html' % (re.sub(r'\W', '_', subscription['f
ileName']), month) | |
114 template.stream(subscription).dump(os.path.join(outputDir, fileName)) | |
115 return existingSubscriptions | |
116 | 59 |
117 def generateOverviewPage(data, outputDir, fileName): | 60 def generate_main_page(outputfile, month, url, data): |
118 months = [] | 61 ensure_dir(outputfile) |
119 for month in data.sections(): | 62 get_main_page_template().stream({ |
120 if data.has_option(month, '%s hits' % fileName) and data.has_option(month, '
%s bandwidth' % fileName): | 63 "now": time.time(), |
121 months.append({ | 64 "month": month, |
122 'id': month, | 65 "url": url, |
123 'url': 'subscription_%s_%s.html' % (re.sub(r'\W', '_', fileName), month)
, | 66 "data": data, |
124 'hits': data.getint(month, '%s hits' % fileName), | 67 }).dump(outputfile) |
125 'bandwidth': data.getint(month, '%s bandwidth' % fileName), | |
126 }) | |
127 months = sorted(months, key=lambda m: m['id']) | |
128 | 68 |
129 file = os.path.join(outputDir, 'overview_%s.html' % re.sub(r'\W', '_', fileNam
e)) | 69 def generate_file_stats(outputfile, month, url, overview_url, data, filter=None,
filtered_urls={}): |
130 template = get_template(get_config().get('subscriptionStats', 'subscriptionOve
rviewTemplate')) | 70 ensure_dir(outputfile) |
131 template.stream({'now': time.time(), 'fileName': fileName, 'month': months}).d
ump(file) | 71 get_file_stats_template().stream({ |
| 72 "now": time.time(), |
| 73 "month": month, |
| 74 "url": url, |
| 75 "overview_url": overview_url, |
| 76 "data": data, |
| 77 "fields": common.fields, |
| 78 "filter": filter, |
| 79 "filtered_urls": filtered_urls |
| 80 }).dump(outputfile) |
| 81 |
| 82 def generate_file_overview(outputfile, url, data): |
| 83 ensure_dir(outputfile) |
| 84 get_file_overview_template().stream({ |
| 85 "now": time.time(), |
| 86 "url": url, |
| 87 "data": data |
| 88 }).dump(outputfile) |
| 89 |
| 90 def get_names(dir, needdirectories): |
| 91 for file in os.listdir(dir): |
| 92 path = os.path.join(dir, file) |
| 93 if (needdirectories and os.path.isdir(path)) or (not needdirectories and os.
path.isfile(path)): |
| 94 yield common.filename_decode(file), path |
| 95 |
| 96 def generate_pages(datadir, outputdir): |
| 97 for server_type, server_type_dir in get_names(datadir, True): |
| 98 baseURL = get_config().get("stats", "baseURL_" + server_type) |
| 99 filedata = {} |
| 100 current_month = None |
| 101 for month, month_dir in get_names(server_type_dir, True): |
| 102 if current_month == None or month > current_month: |
| 103 current_month = month |
| 104 |
| 105 for filename, path in get_names(month_dir, False): |
| 106 filename = re.sub(r"\.json$", "", filename) |
| 107 with codecs.open(path, "rb", encoding="utf-8") as file: |
| 108 data = simplejson.load(file) |
| 109 |
| 110 overview_url = "../../overview-" + common.filename_encode(filename + ".h
tml") |
| 111 filtered_urls = {} |
| 112 for field in common.fields: |
| 113 if field["name"] not in data: |
| 114 continue |
| 115 # Create filtered views for the first thirty values of a field if they |
| 116 # have filtered data. |
| 117 for name, value in get_template_environment().filters["sortfield"](dat
a[field["name"]], field)[0:30]: |
| 118 if filter(lambda k: k not in ("hits", "bandwidth"), value.iterkeys()
): |
| 119 outputfile = os.path.join(outputdir, |
| 120 common.filename_encode(server_type), |
| 121 common.filename_encode(month), |
| 122 common.filename_encode(filename), |
| 123 "filtered-%s-%s.html" % ( |
| 124 common.filename_encode(field["name"]), |
| 125 common.filename_encode(name), |
| 126 )) |
| 127 generate_file_stats(outputfile, month, baseURL + filename, overvie
w_url, |
| 128 value, filter={"field": field, "value": name}) |
| 129 |
| 130 if not field["name"] in filtered_urls: |
| 131 filtered_urls[field["name"]] = {} |
| 132 filtered_urls[field["name"]][name] = os.path.basename(outputfile) |
| 133 |
| 134 outputfile = os.path.join(outputdir, |
| 135 common.filename_encode(server_type), |
| 136 common.filename_encode(month), |
| 137 common.filename_encode(filename), |
| 138 "index.html") |
| 139 generate_file_stats(outputfile, month, baseURL + filename, overview_url, |
| 140 data, filtered_urls=filtered_urls) |
| 141 |
| 142 if filename not in filedata: |
| 143 filedata[filename] = {} |
| 144 month_url = (common.filename_encode(month) + "/" + |
| 145 common.filename_encode(filename) + "/" + |
| 146 "index.html") |
| 147 filedata[filename][month] = {"url": month_url, "hits": data["hits"], "ba
ndwidth": data["bandwidth"]} |
| 148 |
| 149 monthdata = {} |
| 150 for filename, data in filedata.iteritems(): |
| 151 outputfile = os.path.join(outputdir, |
| 152 common.filename_encode(server_type), |
| 153 "overview-" + common.filename_encode(filename + ".html")) |
| 154 generate_file_overview(outputfile, baseURL + filename, data) |
| 155 |
| 156 if current_month in data: |
| 157 monthdata[filename] = dict(data[current_month]) |
| 158 |
| 159 outputfile = os.path.join(outputdir, common.filename_encode(server_type), "i
ndex.html") |
| 160 generate_main_page(outputfile, current_month, baseURL, monthdata) |
132 | 161 |
133 if __name__ == '__main__': | 162 if __name__ == '__main__': |
134 setupStderr() | 163 setupStderr() |
135 | 164 |
136 data = SafeConfigParser() | 165 datadir = get_config().get("stats", "dataDirectory") |
137 data.read(get_config().get('subscriptionStats', 'mainFile')) | 166 outputdir = get_config().get("stats", "outputDirectory") |
138 | 167 generate_pages(datadir, outputdir) |
139 outputDir = get_config().get('subscriptionStats', 'outputDirectory') | |
140 if not os.path.exists(outputDir): | |
141 os.makedirs(outputDir) | |
142 generateMainPage(data, outputDir) | |
143 subscriptions = generateSubscriptionPages(data, outputDir) | |
144 for fileName in subscriptions.iterkeys(): | |
145 generateOverviewPage(data, outputDir, fileName) | |
OLD | NEW |