Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Side by Side Diff: sitescripts/stats/bin/pagegenerator.py

Issue 29934561: #1537 - Remove stats processing from sitescripts (Closed) Base URL: https://hg.adblockplus.org/sitescripts
Patch Set: Created Nov. 2, 2018, 12:42 p.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 # This file is part of the Adblock Plus web scripts,
2 # Copyright (C) 2006-present eyeo GmbH
3 #
4 # Adblock Plus is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU General Public License version 3 as
6 # published by the Free Software Foundation.
7 #
8 # Adblock Plus is distributed in the hope that it will be useful,
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # GNU General Public License for more details.
12 #
13 # You should have received a copy of the GNU General Public License
14 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.
15
16 import os
17 import re
18 import codecs
19 import json
20 import time
21 import itertools
22 from datetime import date
23 from sitescripts.utils import get_config, setupStderr, get_custom_template_envir onment, cached
24 import sitescripts.stats.common as common
25 from sitescripts.stats.countrycodes import countrycodes
26
27
28 @cached(float('inf'))
29 def get_template_environment():
30 return get_custom_template_environment({
31 'monthname': lambda value: date(int(value[0:4]), int(value[4:]), 1).strf time('%b %Y'),
32 'weekday': lambda value: ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'][int(value)],
33 'countryname': lambda value: countrycodes.get(value, 'Unknown'),
34 'sortfield': lambda value, field: (field['sort'] if 'sort' in field else default_sort)(value),
35 'maxhits': lambda items: max(value['hits'] for key, value in items),
36 'maxbandwidth': lambda items: max(value['bandwidth'] for key, value in i tems),
37 'sumhits': lambda items: sum(value['hits'] for key, value in items),
38 'sumbandwidth': lambda items: sum(value['bandwidth'] for key, value in i tems),
39 'percentage': lambda value, total: float(value) / total * 100 if total ! = 0 else 0,
40 'isspecial': lambda name, field: field['isspecial'](name) if 'isspecial' in field else False,
41 'defaultcount': get_default_count,
42 })
43
44
45 @cached(float('inf'))
46 def get_main_page_template():
47 return get_template_environment().get_template(get_config().get('stats', 'ma inPageTemplate'))
48
49
50 @cached(float('inf'))
51 def get_file_stats_template():
52 return get_template_environment().get_template(get_config().get('stats', 'fi lePageTemplate'))
53
54
55 @cached(float('inf'))
56 def get_file_overview_template():
57 return get_template_environment().get_template(get_config().get('stats', 'fi leOverviewTemplate'))
58
59
60 def default_sort(obj):
61 return sorted(obj.items(), key=lambda (k, v): v['hits'], reverse=True)
62
63
64 def ensure_dir(path):
65 dir = os.path.dirname(path)
66 try:
67 os.makedirs(dir)
68 except OSError:
69 pass
70
71
72 def generate_main_page(outputfile, month, url, data):
73 ensure_dir(outputfile)
74 get_main_page_template().stream({
75 'now': time.time(),
76 'month': month,
77 'url': url,
78 'data': data,
79 }).dump(outputfile, encoding='utf-8')
80
81
82 def generate_file_stats(outputfile, month, url, overview_url, data, filter=None, filtered_urls={}):
83 ensure_dir(outputfile)
84 get_file_stats_template().stream({
85 'now': time.time(),
86 'month': month,
87 'url': url,
88 'overview_url': overview_url,
89 'data': data,
90 'fields': common.fields,
91 'filter': filter,
92 'filtered_urls': filtered_urls,
93 }).dump(outputfile, encoding='utf-8')
94
95
96 def generate_file_overview(outputfile, url, data):
97 ensure_dir(outputfile)
98 get_file_overview_template().stream({
99 'now': time.time(),
100 'url': url,
101 'data': data,
102 }).dump(outputfile, encoding='utf-8')
103
104
105 def get_names(dir, needdirectories):
106 for file in os.listdir(dir):
107 path = os.path.join(dir, file)
108 if (needdirectories and os.path.isdir(path)) or (not needdirectories and os.path.isfile(path)):
109 yield common.filename_decode(file), path
110
111
112 def get_default_count(field):
113 return field.get('defaultcount', 30)
114
115
116 def generate_pages(datadir, outputdir):
117 for server_type, server_type_dir in get_names(datadir, True):
118 baseURL = get_config().get('stats', 'baseURL_' + server_type)
119 filedata = {}
120 current_month = None
121 for month, month_dir in get_names(server_type_dir, True):
122 if current_month == None or month > current_month:
123 current_month = month
124
125 for filename, path in get_names(month_dir, False):
126 filename = re.sub(r'\.json$', '', filename)
127 with codecs.open(path, 'rb', encoding='utf-8') as file:
128 data = json.load(file)
129
130 overview_url = '../../overview-' + common.filename_encode(filena me + '.html')
131 filtered_urls = {}
132 for field in common.fields:
133 if field['name'] not in data:
134 continue
135 # Create filtered views for the first thirty values of a fie ld if they
136 # have filtered data.
137 sorted_field = get_template_environment().filters['sortfield '](data[field['name']], field)
138 for name, value in sorted_field[0:get_default_count(field)]:
139 if filter(lambda k: k not in ('hits', 'bandwidth'), valu e.iterkeys()):
140 outputfile = os.path.join(outputdir,
141 common.filename_encode(ser ver_type),
142 common.filename_encode(mon th),
143 common.filename_encode(fil ename),
144 'filtered-%s-%s.html' % (
145 common.filename_encode (field['name']),
146 common.filename_encode (name),
147 ))
148 generate_file_stats(outputfile, month, baseURL + fil ename, overview_url,
149 value, filter={'field': field, ' value': name})
150
151 if not field['name'] in filtered_urls:
152 filtered_urls[field['name']] = {}
153 filtered_urls[field['name']][name] = os.path.basenam e(outputfile)
154
155 outputfile = os.path.join(outputdir,
156 common.filename_encode(server_type),
157 common.filename_encode(month),
158 common.filename_encode(filename),
159 'index.html')
160 generate_file_stats(outputfile, month, baseURL + filename, overv iew_url,
161 data, filtered_urls=filtered_urls)
162
163 if filename not in filedata:
164 filedata[filename] = {}
165 month_url = '%s/%s/%s' % (common.filename_encode(month),
166 common.filename_encode(filename),
167 'index.html')
168 filedata[filename][month] = {'url': month_url, 'hits': data['hit s'], 'bandwidth': data['bandwidth']}
169
170 monthdata = {}
171 for filename, data in filedata.iteritems():
172 outputfile = os.path.join(outputdir,
173 common.filename_encode(server_type),
174 'overview-' + common.filename_encode(filen ame + '.html'))
175 generate_file_overview(outputfile, baseURL + filename, data)
176
177 if current_month in data:
178 monthdata[filename] = dict(data[current_month])
179
180 outputfile = os.path.join(outputdir, common.filename_encode(server_type) , 'index.html')
181 generate_main_page(outputfile, current_month, baseURL, monthdata)
182
183
184 if __name__ == '__main__':
185 setupStderr()
186
187 datadir = get_config().get('stats', 'dataDirectory')
188 outputdir = get_config().get('stats', 'outputDirectory')
189 generate_pages(datadir, outputdir)
OLDNEW
« no previous file with comments | « sitescripts/stats/bin/logprocessor.py ('k') | sitescripts/stats/common.py » ('j') | tox.ini » ('J')

Powered by Google App Engine
This is Rietveld