OLD | NEW |
1 # This file is part of the Adblock Plus web scripts, | 1 # This file is part of the Adblock Plus web scripts, |
2 # Copyright (C) 2006-2016 Eyeo GmbH | 2 # Copyright (C) 2006-2016 Eyeo GmbH |
3 # | 3 # |
4 # Adblock Plus is free software: you can redistribute it and/or modify | 4 # Adblock Plus is free software: you can redistribute it and/or modify |
5 # it under the terms of the GNU General Public License version 3 as | 5 # it under the terms of the GNU General Public License version 3 as |
6 # published by the Free Software Foundation. | 6 # published by the Free Software Foundation. |
7 # | 7 # |
8 # Adblock Plus is distributed in the hope that it will be useful, | 8 # Adblock Plus is distributed in the hope that it will be useful, |
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of | 9 # but WITHOUT ANY WARRANTY; without even the implied warranty of |
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | 10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
11 # GNU General Public License for more details. | 11 # GNU General Public License for more details. |
12 # | 12 # |
13 # You should have received a copy of the GNU General Public License | 13 # You should have received a copy of the GNU General Public License |
14 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. | 14 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. |
15 | 15 |
16 import os | 16 import os |
17 import re | 17 import re |
18 import codecs | 18 import codecs |
19 import json | 19 import json |
20 import time | 20 import time |
21 import itertools | 21 import itertools |
22 from datetime import date | 22 from datetime import date |
23 from sitescripts.utils import get_config, setupStderr, get_custom_template_envir
onment, cached | 23 from sitescripts.utils import get_config, setupStderr, get_custom_template_envir
onment, cached |
24 import sitescripts.stats.common as common | 24 import sitescripts.stats.common as common |
25 from sitescripts.stats.countrycodes import countrycodes | 25 from sitescripts.stats.countrycodes import countrycodes |
26 | 26 |
27 | 27 |
28 @cached(float("inf")) | 28 @cached(float('inf')) |
29 def get_template_environment(): | 29 def get_template_environment(): |
30 return get_custom_template_environment({ | 30 return get_custom_template_environment({ |
31 "monthname": lambda value: date(int(value[0:4]), int(value[4:]), 1).strf
time("%b %Y"), | 31 'monthname': lambda value: date(int(value[0:4]), int(value[4:]), 1).strf
time('%b %Y'), |
32 "weekday": lambda value: ["Monday", "Tuesday", "Wednesday", "Thursday",
"Friday", "Saturday", "Sunday"][int(value)], | 32 'weekday': lambda value: ['Monday', 'Tuesday', 'Wednesday', 'Thursday',
'Friday', 'Saturday', 'Sunday'][int(value)], |
33 "countryname": lambda value: countrycodes.get(value, "Unknown"), | 33 'countryname': lambda value: countrycodes.get(value, 'Unknown'), |
34 "sortfield": lambda value, field: (field["sort"] if "sort" in field else
default_sort)(value), | 34 'sortfield': lambda value, field: (field['sort'] if 'sort' in field else
default_sort)(value), |
35 "maxhits": lambda items: max(value["hits"] for key, value in items), | 35 'maxhits': lambda items: max(value['hits'] for key, value in items), |
36 "maxbandwidth": lambda items: max(value["bandwidth"] for key, value in i
tems), | 36 'maxbandwidth': lambda items: max(value['bandwidth'] for key, value in i
tems), |
37 "sumhits": lambda items: sum(value["hits"] for key, value in items), | 37 'sumhits': lambda items: sum(value['hits'] for key, value in items), |
38 "sumbandwidth": lambda items: sum(value["bandwidth"] for key, value in i
tems), | 38 'sumbandwidth': lambda items: sum(value['bandwidth'] for key, value in i
tems), |
39 "percentage": lambda value, total: float(value) / total * 100 if total !
= 0 else 0, | 39 'percentage': lambda value, total: float(value) / total * 100 if total !
= 0 else 0, |
40 "isspecial": lambda name, field: field["isspecial"](name) if "isspecial"
in field else False, | 40 'isspecial': lambda name, field: field['isspecial'](name) if 'isspecial'
in field else False, |
41 "defaultcount": get_default_count, | 41 'defaultcount': get_default_count, |
42 }) | 42 }) |
43 | 43 |
44 | 44 |
45 @cached(float("inf")) | 45 @cached(float('inf')) |
46 def get_main_page_template(): | 46 def get_main_page_template(): |
47 return get_template_environment().get_template(get_config().get("stats", "ma
inPageTemplate")) | 47 return get_template_environment().get_template(get_config().get('stats', 'ma
inPageTemplate')) |
48 | 48 |
49 | 49 |
50 @cached(float("inf")) | 50 @cached(float('inf')) |
51 def get_file_stats_template(): | 51 def get_file_stats_template(): |
52 return get_template_environment().get_template(get_config().get("stats", "fi
lePageTemplate")) | 52 return get_template_environment().get_template(get_config().get('stats', 'fi
lePageTemplate')) |
53 | 53 |
54 | 54 |
55 @cached(float("inf")) | 55 @cached(float('inf')) |
56 def get_file_overview_template(): | 56 def get_file_overview_template(): |
57 return get_template_environment().get_template(get_config().get("stats", "fi
leOverviewTemplate")) | 57 return get_template_environment().get_template(get_config().get('stats', 'fi
leOverviewTemplate')) |
58 | 58 |
59 | 59 |
60 def default_sort(obj): | 60 def default_sort(obj): |
61 return sorted(obj.items(), key=lambda (k, v): v["hits"], reverse=True) | 61 return sorted(obj.items(), key=lambda (k, v): v['hits'], reverse=True) |
62 | 62 |
63 | 63 |
64 def ensure_dir(path): | 64 def ensure_dir(path): |
65 dir = os.path.dirname(path) | 65 dir = os.path.dirname(path) |
66 try: | 66 try: |
67 os.makedirs(dir) | 67 os.makedirs(dir) |
68 except OSError: | 68 except OSError: |
69 pass | 69 pass |
70 | 70 |
71 | 71 |
72 def generate_main_page(outputfile, month, url, data): | 72 def generate_main_page(outputfile, month, url, data): |
73 ensure_dir(outputfile) | 73 ensure_dir(outputfile) |
74 get_main_page_template().stream({ | 74 get_main_page_template().stream({ |
75 "now": time.time(), | 75 'now': time.time(), |
76 "month": month, | 76 'month': month, |
77 "url": url, | 77 'url': url, |
78 "data": data, | 78 'data': data, |
79 }).dump(outputfile, encoding="utf-8") | 79 }).dump(outputfile, encoding='utf-8') |
80 | 80 |
81 | 81 |
82 def generate_file_stats(outputfile, month, url, overview_url, data, filter=None,
filtered_urls={}): | 82 def generate_file_stats(outputfile, month, url, overview_url, data, filter=None,
filtered_urls={}): |
83 ensure_dir(outputfile) | 83 ensure_dir(outputfile) |
84 get_file_stats_template().stream({ | 84 get_file_stats_template().stream({ |
85 "now": time.time(), | 85 'now': time.time(), |
86 "month": month, | 86 'month': month, |
87 "url": url, | 87 'url': url, |
88 "overview_url": overview_url, | 88 'overview_url': overview_url, |
89 "data": data, | 89 'data': data, |
90 "fields": common.fields, | 90 'fields': common.fields, |
91 "filter": filter, | 91 'filter': filter, |
92 "filtered_urls": filtered_urls | 92 'filtered_urls': filtered_urls |
93 }).dump(outputfile, encoding="utf-8") | 93 }).dump(outputfile, encoding='utf-8') |
94 | 94 |
95 | 95 |
96 def generate_file_overview(outputfile, url, data): | 96 def generate_file_overview(outputfile, url, data): |
97 ensure_dir(outputfile) | 97 ensure_dir(outputfile) |
98 get_file_overview_template().stream({ | 98 get_file_overview_template().stream({ |
99 "now": time.time(), | 99 'now': time.time(), |
100 "url": url, | 100 'url': url, |
101 "data": data | 101 'data': data |
102 }).dump(outputfile, encoding="utf-8") | 102 }).dump(outputfile, encoding='utf-8') |
103 | 103 |
104 | 104 |
105 def get_names(dir, needdirectories): | 105 def get_names(dir, needdirectories): |
106 for file in os.listdir(dir): | 106 for file in os.listdir(dir): |
107 path = os.path.join(dir, file) | 107 path = os.path.join(dir, file) |
108 if (needdirectories and os.path.isdir(path)) or (not needdirectories and
os.path.isfile(path)): | 108 if (needdirectories and os.path.isdir(path)) or (not needdirectories and
os.path.isfile(path)): |
109 yield common.filename_decode(file), path | 109 yield common.filename_decode(file), path |
110 | 110 |
111 | 111 |
112 def get_default_count(field): | 112 def get_default_count(field): |
113 return field.get("defaultcount", 30) | 113 return field.get('defaultcount', 30) |
114 | 114 |
115 | 115 |
116 def generate_pages(datadir, outputdir): | 116 def generate_pages(datadir, outputdir): |
117 for server_type, server_type_dir in get_names(datadir, True): | 117 for server_type, server_type_dir in get_names(datadir, True): |
118 baseURL = get_config().get("stats", "baseURL_" + server_type) | 118 baseURL = get_config().get('stats', 'baseURL_' + server_type) |
119 filedata = {} | 119 filedata = {} |
120 current_month = None | 120 current_month = None |
121 for month, month_dir in get_names(server_type_dir, True): | 121 for month, month_dir in get_names(server_type_dir, True): |
122 if current_month == None or month > current_month: | 122 if current_month == None or month > current_month: |
123 current_month = month | 123 current_month = month |
124 | 124 |
125 for filename, path in get_names(month_dir, False): | 125 for filename, path in get_names(month_dir, False): |
126 filename = re.sub(r"\.json$", "", filename) | 126 filename = re.sub(r'\.json$', '', filename) |
127 with codecs.open(path, "rb", encoding="utf-8") as file: | 127 with codecs.open(path, 'rb', encoding='utf-8') as file: |
128 data = json.load(file) | 128 data = json.load(file) |
129 | 129 |
130 overview_url = "../../overview-" + common.filename_encode(filena
me + ".html") | 130 overview_url = '../../overview-' + common.filename_encode(filena
me + '.html') |
131 filtered_urls = {} | 131 filtered_urls = {} |
132 for field in common.fields: | 132 for field in common.fields: |
133 if field["name"] not in data: | 133 if field['name'] not in data: |
134 continue | 134 continue |
135 # Create filtered views for the first thirty values of a fie
ld if they | 135 # Create filtered views for the first thirty values of a fie
ld if they |
136 # have filtered data. | 136 # have filtered data. |
137 sorted_field = get_template_environment().filters["sortfield
"](data[field["name"]], field) | 137 sorted_field = get_template_environment().filters['sortfield
'](data[field['name']], field) |
138 for name, value in sorted_field[0:get_default_count(field)]: | 138 for name, value in sorted_field[0:get_default_count(field)]: |
139 if filter(lambda k: k not in ("hits", "bandwidth"), valu
e.iterkeys()): | 139 if filter(lambda k: k not in ('hits', 'bandwidth'), valu
e.iterkeys()): |
140 outputfile = os.path.join(outputdir, | 140 outputfile = os.path.join(outputdir, |
141 common.filename_encode(ser
ver_type), | 141 common.filename_encode(ser
ver_type), |
142 common.filename_encode(mon
th), | 142 common.filename_encode(mon
th), |
143 common.filename_encode(fil
ename), | 143 common.filename_encode(fil
ename), |
144 "filtered-%s-%s.html" % ( | 144 'filtered-%s-%s.html' % ( |
145 common.filename_encode
(field["name"]), | 145 common.filename_encode
(field['name']), |
146 common.filename_encode
(name), | 146 common.filename_encode
(name), |
147 )) | 147 )) |
148 generate_file_stats(outputfile, month, baseURL + fil
ename, overview_url, | 148 generate_file_stats(outputfile, month, baseURL + fil
ename, overview_url, |
149 value, filter={"field": field, "
value": name}) | 149 value, filter={'field': field, '
value': name}) |
150 | 150 |
151 if not field["name"] in filtered_urls: | 151 if not field['name'] in filtered_urls: |
152 filtered_urls[field["name"]] = {} | 152 filtered_urls[field['name']] = {} |
153 filtered_urls[field["name"]][name] = os.path.basenam
e(outputfile) | 153 filtered_urls[field['name']][name] = os.path.basenam
e(outputfile) |
154 | 154 |
155 outputfile = os.path.join(outputdir, | 155 outputfile = os.path.join(outputdir, |
156 common.filename_encode(server_type), | 156 common.filename_encode(server_type), |
157 common.filename_encode(month), | 157 common.filename_encode(month), |
158 common.filename_encode(filename), | 158 common.filename_encode(filename), |
159 "index.html") | 159 'index.html') |
160 generate_file_stats(outputfile, month, baseURL + filename, overv
iew_url, | 160 generate_file_stats(outputfile, month, baseURL + filename, overv
iew_url, |
161 data, filtered_urls=filtered_urls) | 161 data, filtered_urls=filtered_urls) |
162 | 162 |
163 if filename not in filedata: | 163 if filename not in filedata: |
164 filedata[filename] = {} | 164 filedata[filename] = {} |
165 month_url = "%s/%s/%s" % (common.filename_encode(month), | 165 month_url = '%s/%s/%s' % (common.filename_encode(month), |
166 common.filename_encode(filename), | 166 common.filename_encode(filename), |
167 "index.html") | 167 'index.html') |
168 filedata[filename][month] = {"url": month_url, "hits": data["hit
s"], "bandwidth": data["bandwidth"]} | 168 filedata[filename][month] = {'url': month_url, 'hits': data['hit
s'], 'bandwidth': data['bandwidth']} |
169 | 169 |
170 monthdata = {} | 170 monthdata = {} |
171 for filename, data in filedata.iteritems(): | 171 for filename, data in filedata.iteritems(): |
172 outputfile = os.path.join(outputdir, | 172 outputfile = os.path.join(outputdir, |
173 common.filename_encode(server_type), | 173 common.filename_encode(server_type), |
174 "overview-" + common.filename_encode(filen
ame + ".html")) | 174 'overview-' + common.filename_encode(filen
ame + '.html')) |
175 generate_file_overview(outputfile, baseURL + filename, data) | 175 generate_file_overview(outputfile, baseURL + filename, data) |
176 | 176 |
177 if current_month in data: | 177 if current_month in data: |
178 monthdata[filename] = dict(data[current_month]) | 178 monthdata[filename] = dict(data[current_month]) |
179 | 179 |
180 outputfile = os.path.join(outputdir, common.filename_encode(server_type)
, "index.html") | 180 outputfile = os.path.join(outputdir, common.filename_encode(server_type)
, 'index.html') |
181 generate_main_page(outputfile, current_month, baseURL, monthdata) | 181 generate_main_page(outputfile, current_month, baseURL, monthdata) |
182 | 182 |
183 if __name__ == '__main__': | 183 if __name__ == '__main__': |
184 setupStderr() | 184 setupStderr() |
185 | 185 |
186 datadir = get_config().get("stats", "dataDirectory") | 186 datadir = get_config().get('stats', 'dataDirectory') |
187 outputdir = get_config().get("stats", "outputDirectory") | 187 outputdir = get_config().get('stats', 'outputDirectory') |
188 generate_pages(datadir, outputdir) | 188 generate_pages(datadir, outputdir) |
OLD | NEW |