OLD | NEW |
| (Empty) |
1 # This file is part of the Adblock Plus web scripts, | |
2 # Copyright (C) 2006-present eyeo GmbH | |
3 # | |
4 # Adblock Plus is free software: you can redistribute it and/or modify | |
5 # it under the terms of the GNU General Public License version 3 as | |
6 # published by the Free Software Foundation. | |
7 # | |
8 # Adblock Plus is distributed in the hope that it will be useful, | |
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
11 # GNU General Public License for more details. | |
12 # | |
13 # You should have received a copy of the GNU General Public License | |
14 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. | |
15 | |
16 import os | |
17 import re | |
18 import codecs | |
19 import json | |
20 import time | |
21 import itertools | |
22 from datetime import date | |
23 from sitescripts.utils import get_config, setupStderr, get_custom_template_envir
onment, cached | |
24 import sitescripts.stats.common as common | |
25 from sitescripts.stats.countrycodes import countrycodes | |
26 | |
27 | |
28 @cached(float('inf')) | |
29 def get_template_environment(): | |
30 return get_custom_template_environment({ | |
31 'monthname': lambda value: date(int(value[0:4]), int(value[4:]), 1).strf
time('%b %Y'), | |
32 'weekday': lambda value: ['Monday', 'Tuesday', 'Wednesday', 'Thursday',
'Friday', 'Saturday', 'Sunday'][int(value)], | |
33 'countryname': lambda value: countrycodes.get(value, 'Unknown'), | |
34 'sortfield': lambda value, field: (field['sort'] if 'sort' in field else
default_sort)(value), | |
35 'maxhits': lambda items: max(value['hits'] for key, value in items), | |
36 'maxbandwidth': lambda items: max(value['bandwidth'] for key, value in i
tems), | |
37 'sumhits': lambda items: sum(value['hits'] for key, value in items), | |
38 'sumbandwidth': lambda items: sum(value['bandwidth'] for key, value in i
tems), | |
39 'percentage': lambda value, total: float(value) / total * 100 if total !
= 0 else 0, | |
40 'isspecial': lambda name, field: field['isspecial'](name) if 'isspecial'
in field else False, | |
41 'defaultcount': get_default_count, | |
42 }) | |
43 | |
44 | |
45 @cached(float('inf')) | |
46 def get_main_page_template(): | |
47 return get_template_environment().get_template(get_config().get('stats', 'ma
inPageTemplate')) | |
48 | |
49 | |
50 @cached(float('inf')) | |
51 def get_file_stats_template(): | |
52 return get_template_environment().get_template(get_config().get('stats', 'fi
lePageTemplate')) | |
53 | |
54 | |
55 @cached(float('inf')) | |
56 def get_file_overview_template(): | |
57 return get_template_environment().get_template(get_config().get('stats', 'fi
leOverviewTemplate')) | |
58 | |
59 | |
60 def default_sort(obj): | |
61 return sorted(obj.items(), key=lambda (k, v): v['hits'], reverse=True) | |
62 | |
63 | |
64 def ensure_dir(path): | |
65 dir = os.path.dirname(path) | |
66 try: | |
67 os.makedirs(dir) | |
68 except OSError: | |
69 pass | |
70 | |
71 | |
72 def generate_main_page(outputfile, month, url, data): | |
73 ensure_dir(outputfile) | |
74 get_main_page_template().stream({ | |
75 'now': time.time(), | |
76 'month': month, | |
77 'url': url, | |
78 'data': data, | |
79 }).dump(outputfile, encoding='utf-8') | |
80 | |
81 | |
82 def generate_file_stats(outputfile, month, url, overview_url, data, filter=None,
filtered_urls={}): | |
83 ensure_dir(outputfile) | |
84 get_file_stats_template().stream({ | |
85 'now': time.time(), | |
86 'month': month, | |
87 'url': url, | |
88 'overview_url': overview_url, | |
89 'data': data, | |
90 'fields': common.fields, | |
91 'filter': filter, | |
92 'filtered_urls': filtered_urls, | |
93 }).dump(outputfile, encoding='utf-8') | |
94 | |
95 | |
96 def generate_file_overview(outputfile, url, data): | |
97 ensure_dir(outputfile) | |
98 get_file_overview_template().stream({ | |
99 'now': time.time(), | |
100 'url': url, | |
101 'data': data, | |
102 }).dump(outputfile, encoding='utf-8') | |
103 | |
104 | |
105 def get_names(dir, needdirectories): | |
106 for file in os.listdir(dir): | |
107 path = os.path.join(dir, file) | |
108 if (needdirectories and os.path.isdir(path)) or (not needdirectories and
os.path.isfile(path)): | |
109 yield common.filename_decode(file), path | |
110 | |
111 | |
112 def get_default_count(field): | |
113 return field.get('defaultcount', 30) | |
114 | |
115 | |
116 def generate_pages(datadir, outputdir): | |
117 for server_type, server_type_dir in get_names(datadir, True): | |
118 baseURL = get_config().get('stats', 'baseURL_' + server_type) | |
119 filedata = {} | |
120 current_month = None | |
121 for month, month_dir in get_names(server_type_dir, True): | |
122 if current_month == None or month > current_month: | |
123 current_month = month | |
124 | |
125 for filename, path in get_names(month_dir, False): | |
126 filename = re.sub(r'\.json$', '', filename) | |
127 with codecs.open(path, 'rb', encoding='utf-8') as file: | |
128 data = json.load(file) | |
129 | |
130 overview_url = '../../overview-' + common.filename_encode(filena
me + '.html') | |
131 filtered_urls = {} | |
132 for field in common.fields: | |
133 if field['name'] not in data: | |
134 continue | |
135 # Create filtered views for the first thirty values of a fie
ld if they | |
136 # have filtered data. | |
137 sorted_field = get_template_environment().filters['sortfield
'](data[field['name']], field) | |
138 for name, value in sorted_field[0:get_default_count(field)]: | |
139 if filter(lambda k: k not in ('hits', 'bandwidth'), valu
e.iterkeys()): | |
140 outputfile = os.path.join(outputdir, | |
141 common.filename_encode(ser
ver_type), | |
142 common.filename_encode(mon
th), | |
143 common.filename_encode(fil
ename), | |
144 'filtered-%s-%s.html' % ( | |
145 common.filename_encode
(field['name']), | |
146 common.filename_encode
(name), | |
147 )) | |
148 generate_file_stats(outputfile, month, baseURL + fil
ename, overview_url, | |
149 value, filter={'field': field, '
value': name}) | |
150 | |
151 if not field['name'] in filtered_urls: | |
152 filtered_urls[field['name']] = {} | |
153 filtered_urls[field['name']][name] = os.path.basenam
e(outputfile) | |
154 | |
155 outputfile = os.path.join(outputdir, | |
156 common.filename_encode(server_type), | |
157 common.filename_encode(month), | |
158 common.filename_encode(filename), | |
159 'index.html') | |
160 generate_file_stats(outputfile, month, baseURL + filename, overv
iew_url, | |
161 data, filtered_urls=filtered_urls) | |
162 | |
163 if filename not in filedata: | |
164 filedata[filename] = {} | |
165 month_url = '%s/%s/%s' % (common.filename_encode(month), | |
166 common.filename_encode(filename), | |
167 'index.html') | |
168 filedata[filename][month] = {'url': month_url, 'hits': data['hit
s'], 'bandwidth': data['bandwidth']} | |
169 | |
170 monthdata = {} | |
171 for filename, data in filedata.iteritems(): | |
172 outputfile = os.path.join(outputdir, | |
173 common.filename_encode(server_type), | |
174 'overview-' + common.filename_encode(filen
ame + '.html')) | |
175 generate_file_overview(outputfile, baseURL + filename, data) | |
176 | |
177 if current_month in data: | |
178 monthdata[filename] = dict(data[current_month]) | |
179 | |
180 outputfile = os.path.join(outputdir, common.filename_encode(server_type)
, 'index.html') | |
181 generate_main_page(outputfile, current_month, baseURL, monthdata) | |
182 | |
183 | |
184 if __name__ == '__main__': | |
185 setupStderr() | |
186 | |
187 datadir = get_config().get('stats', 'dataDirectory') | |
188 outputdir = get_config().get('stats', 'outputDirectory') | |
189 generate_pages(datadir, outputdir) | |
OLD | NEW |