| OLD | NEW |
| (Empty) |
| 1 # This file is part of the Adblock Plus web scripts, | |
| 2 # Copyright (C) 2006-present eyeo GmbH | |
| 3 # | |
| 4 # Adblock Plus is free software: you can redistribute it and/or modify | |
| 5 # it under the terms of the GNU General Public License version 3 as | |
| 6 # published by the Free Software Foundation. | |
| 7 # | |
| 8 # Adblock Plus is distributed in the hope that it will be useful, | |
| 9 # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| 10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
| 11 # GNU General Public License for more details. | |
| 12 # | |
| 13 # You should have received a copy of the GNU General Public License | |
| 14 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. | |
| 15 | |
| 16 import os | |
| 17 import re | |
| 18 import codecs | |
| 19 import json | |
| 20 import time | |
| 21 import itertools | |
| 22 from datetime import date | |
| 23 from sitescripts.utils import get_config, setupStderr, get_custom_template_envir
onment, cached | |
| 24 import sitescripts.stats.common as common | |
| 25 from sitescripts.stats.countrycodes import countrycodes | |
| 26 | |
| 27 | |
| 28 @cached(float('inf')) | |
| 29 def get_template_environment(): | |
| 30 return get_custom_template_environment({ | |
| 31 'monthname': lambda value: date(int(value[0:4]), int(value[4:]), 1).strf
time('%b %Y'), | |
| 32 'weekday': lambda value: ['Monday', 'Tuesday', 'Wednesday', 'Thursday',
'Friday', 'Saturday', 'Sunday'][int(value)], | |
| 33 'countryname': lambda value: countrycodes.get(value, 'Unknown'), | |
| 34 'sortfield': lambda value, field: (field['sort'] if 'sort' in field else
default_sort)(value), | |
| 35 'maxhits': lambda items: max(value['hits'] for key, value in items), | |
| 36 'maxbandwidth': lambda items: max(value['bandwidth'] for key, value in i
tems), | |
| 37 'sumhits': lambda items: sum(value['hits'] for key, value in items), | |
| 38 'sumbandwidth': lambda items: sum(value['bandwidth'] for key, value in i
tems), | |
| 39 'percentage': lambda value, total: float(value) / total * 100 if total !
= 0 else 0, | |
| 40 'isspecial': lambda name, field: field['isspecial'](name) if 'isspecial'
in field else False, | |
| 41 'defaultcount': get_default_count, | |
| 42 }) | |
| 43 | |
| 44 | |
| 45 @cached(float('inf')) | |
| 46 def get_main_page_template(): | |
| 47 return get_template_environment().get_template(get_config().get('stats', 'ma
inPageTemplate')) | |
| 48 | |
| 49 | |
| 50 @cached(float('inf')) | |
| 51 def get_file_stats_template(): | |
| 52 return get_template_environment().get_template(get_config().get('stats', 'fi
lePageTemplate')) | |
| 53 | |
| 54 | |
| 55 @cached(float('inf')) | |
| 56 def get_file_overview_template(): | |
| 57 return get_template_environment().get_template(get_config().get('stats', 'fi
leOverviewTemplate')) | |
| 58 | |
| 59 | |
| 60 def default_sort(obj): | |
| 61 return sorted(obj.items(), key=lambda (k, v): v['hits'], reverse=True) | |
| 62 | |
| 63 | |
| 64 def ensure_dir(path): | |
| 65 dir = os.path.dirname(path) | |
| 66 try: | |
| 67 os.makedirs(dir) | |
| 68 except OSError: | |
| 69 pass | |
| 70 | |
| 71 | |
| 72 def generate_main_page(outputfile, month, url, data): | |
| 73 ensure_dir(outputfile) | |
| 74 get_main_page_template().stream({ | |
| 75 'now': time.time(), | |
| 76 'month': month, | |
| 77 'url': url, | |
| 78 'data': data, | |
| 79 }).dump(outputfile, encoding='utf-8') | |
| 80 | |
| 81 | |
| 82 def generate_file_stats(outputfile, month, url, overview_url, data, filter=None,
filtered_urls={}): | |
| 83 ensure_dir(outputfile) | |
| 84 get_file_stats_template().stream({ | |
| 85 'now': time.time(), | |
| 86 'month': month, | |
| 87 'url': url, | |
| 88 'overview_url': overview_url, | |
| 89 'data': data, | |
| 90 'fields': common.fields, | |
| 91 'filter': filter, | |
| 92 'filtered_urls': filtered_urls, | |
| 93 }).dump(outputfile, encoding='utf-8') | |
| 94 | |
| 95 | |
| 96 def generate_file_overview(outputfile, url, data): | |
| 97 ensure_dir(outputfile) | |
| 98 get_file_overview_template().stream({ | |
| 99 'now': time.time(), | |
| 100 'url': url, | |
| 101 'data': data, | |
| 102 }).dump(outputfile, encoding='utf-8') | |
| 103 | |
| 104 | |
| 105 def get_names(dir, needdirectories): | |
| 106 for file in os.listdir(dir): | |
| 107 path = os.path.join(dir, file) | |
| 108 if (needdirectories and os.path.isdir(path)) or (not needdirectories and
os.path.isfile(path)): | |
| 109 yield common.filename_decode(file), path | |
| 110 | |
| 111 | |
| 112 def get_default_count(field): | |
| 113 return field.get('defaultcount', 30) | |
| 114 | |
| 115 | |
| 116 def generate_pages(datadir, outputdir): | |
| 117 for server_type, server_type_dir in get_names(datadir, True): | |
| 118 baseURL = get_config().get('stats', 'baseURL_' + server_type) | |
| 119 filedata = {} | |
| 120 current_month = None | |
| 121 for month, month_dir in get_names(server_type_dir, True): | |
| 122 if current_month == None or month > current_month: | |
| 123 current_month = month | |
| 124 | |
| 125 for filename, path in get_names(month_dir, False): | |
| 126 filename = re.sub(r'\.json$', '', filename) | |
| 127 with codecs.open(path, 'rb', encoding='utf-8') as file: | |
| 128 data = json.load(file) | |
| 129 | |
| 130 overview_url = '../../overview-' + common.filename_encode(filena
me + '.html') | |
| 131 filtered_urls = {} | |
| 132 for field in common.fields: | |
| 133 if field['name'] not in data: | |
| 134 continue | |
| 135 # Create filtered views for the first thirty values of a fie
ld if they | |
| 136 # have filtered data. | |
| 137 sorted_field = get_template_environment().filters['sortfield
'](data[field['name']], field) | |
| 138 for name, value in sorted_field[0:get_default_count(field)]: | |
| 139 if filter(lambda k: k not in ('hits', 'bandwidth'), valu
e.iterkeys()): | |
| 140 outputfile = os.path.join(outputdir, | |
| 141 common.filename_encode(ser
ver_type), | |
| 142 common.filename_encode(mon
th), | |
| 143 common.filename_encode(fil
ename), | |
| 144 'filtered-%s-%s.html' % ( | |
| 145 common.filename_encode
(field['name']), | |
| 146 common.filename_encode
(name), | |
| 147 )) | |
| 148 generate_file_stats(outputfile, month, baseURL + fil
ename, overview_url, | |
| 149 value, filter={'field': field, '
value': name}) | |
| 150 | |
| 151 if not field['name'] in filtered_urls: | |
| 152 filtered_urls[field['name']] = {} | |
| 153 filtered_urls[field['name']][name] = os.path.basenam
e(outputfile) | |
| 154 | |
| 155 outputfile = os.path.join(outputdir, | |
| 156 common.filename_encode(server_type), | |
| 157 common.filename_encode(month), | |
| 158 common.filename_encode(filename), | |
| 159 'index.html') | |
| 160 generate_file_stats(outputfile, month, baseURL + filename, overv
iew_url, | |
| 161 data, filtered_urls=filtered_urls) | |
| 162 | |
| 163 if filename not in filedata: | |
| 164 filedata[filename] = {} | |
| 165 month_url = '%s/%s/%s' % (common.filename_encode(month), | |
| 166 common.filename_encode(filename), | |
| 167 'index.html') | |
| 168 filedata[filename][month] = {'url': month_url, 'hits': data['hit
s'], 'bandwidth': data['bandwidth']} | |
| 169 | |
| 170 monthdata = {} | |
| 171 for filename, data in filedata.iteritems(): | |
| 172 outputfile = os.path.join(outputdir, | |
| 173 common.filename_encode(server_type), | |
| 174 'overview-' + common.filename_encode(filen
ame + '.html')) | |
| 175 generate_file_overview(outputfile, baseURL + filename, data) | |
| 176 | |
| 177 if current_month in data: | |
| 178 monthdata[filename] = dict(data[current_month]) | |
| 179 | |
| 180 outputfile = os.path.join(outputdir, common.filename_encode(server_type)
, 'index.html') | |
| 181 generate_main_page(outputfile, current_month, baseURL, monthdata) | |
| 182 | |
| 183 | |
| 184 if __name__ == '__main__': | |
| 185 setupStderr() | |
| 186 | |
| 187 datadir = get_config().get('stats', 'dataDirectory') | |
| 188 outputdir = get_config().get('stats', 'outputDirectory') | |
| 189 generate_pages(datadir, outputdir) | |
| OLD | NEW |