Left: | ||
Right: |
OLD | NEW |
---|---|
1 # coding: utf-8 | 1 # coding: utf-8 |
2 | 2 |
3 # This file is part of the Adblock Plus web scripts, | 3 # This file is part of the Adblock Plus web scripts, |
4 # Copyright (C) 2006-2013 Eyeo GmbH | 4 # Copyright (C) 2006-2013 Eyeo GmbH |
5 # | 5 # |
6 # Adblock Plus is free software: you can redistribute it and/or modify | 6 # Adblock Plus is free software: you can redistribute it and/or modify |
7 # it under the terms of the GNU General Public License version 3 as | 7 # it under the terms of the GNU General Public License version 3 as |
8 # published by the Free Software Foundation. | 8 # published by the Free Software Foundation. |
9 # | 9 # |
10 # Adblock Plus is distributed in the hope that it will be useful, | 10 # Adblock Plus is distributed in the hope that it will be useful, |
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of | 11 # but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | 12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
13 # GNU General Public License for more details. | 13 # GNU General Public License for more details. |
14 # | 14 # |
15 # You should have received a copy of the GNU General Public License | 15 # You should have received a copy of the GNU General Public License |
16 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. | 16 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. |
17 | 17 |
18 import os, re, time | 18 import os, re, codecs, simplejson, time, itertools |
19 from datetime import date, timedelta | 19 from datetime import date |
20 from sitescripts.utils import get_config, setupStderr, get_template | 20 from sitescripts.utils import get_config, setupStderr, get_custom_template_envir onment, cached |
21 from sitescripts.logs.countryCodes import countryCodes | 21 import sitescripts.stats.common as common |
22 from ConfigParser import SafeConfigParser | 22 from sitescripts.stats.countrycodes import countrycodes |
23 | 23 |
24 def getSubscriptionFiles(data, month): | 24 @cached(()) |
Sebastian Noack
2013/08/26 16:05:22
You passed an empty tuple as timeout. Obviously yo
Wladimir Palant
2013/08/27 07:34:28
This behavior is somewhat specified as described h
| |
25 result = {} | 25 def get_template_environment(): |
26 if data.has_section(month): | 26 return get_custom_template_environment({ |
27 for option in data.options(month): | 27 "monthname": lambda value: date(int(value[0:4]), int(value[4:]), 1).strftime ("%b %Y"), |
28 result[option[0:option.index(' ')]] = True | 28 "weekday": lambda value: ["Monday", "Tuesday", "Wednesday", "Thursday", "Fri day", "Saturday", "Sunday"][int(value)], |
29 return result | 29 "countryname": lambda value: countrycodes.get(value, "Unknown"), |
30 "sortfield": lambda value, field: (field["sort"] if "sort" in field else def ault_sort)(value), | |
31 "maxhits": lambda items: max(itertools.chain((value["hits"] for key, value i n items), [1])), | |
32 "maxbandwidth": lambda items: max(itertools.chain((value["bandwidth"] for ke y, value in items), [1])), | |
33 "sumhits": lambda items: max(sum(value["hits"] for key, value in items), 1), | |
34 "sumbandwidth": lambda items: max(sum(value["bandwidth"] for key, value in i tems), 1), | |
35 "isspecial": lambda name, field: field["isspecial"](name) if "isspecial" in field else False, | |
36 }) | |
30 | 37 |
31 def generateMainPage(data, outputDir): | 38 @cached(()) |
32 def getDataInt(month, key): | 39 def get_main_page_template(): |
33 if data.has_option(month, key): | 40 return get_template_environment().get_template(get_config().get("stats", "main PageTemplate")) |
34 return data.getint(month, key) | |
35 else: | |
36 return 0 | |
37 | 41 |
38 month = date.today().strftime('%Y%m') | 42 @cached(()) |
39 subscriptions = [] | 43 def get_file_stats_template(): |
40 for fileName in getSubscriptionFiles(data, month).iterkeys(): | 44 return get_template_environment().get_template(get_config().get("stats", "file PageTemplate")) |
41 subscriptions.append({ | |
42 'fileName': fileName, | |
43 'url': 'subscription_%s_%s.html' % (re.sub(r'\W', '_', fileName), month), | |
44 'hits': getDataInt(month, '%s hits' % fileName), | |
45 'bandwidth': getDataInt(month, '%s bandwidth' % fileName) | |
46 }) | |
47 subscriptions = sorted(subscriptions, key=lambda s: s['hits'], reverse=True) | |
48 | 45 |
49 file = os.path.join(outputDir, 'index.html') | 46 @cached(()) |
50 template = get_template(get_config().get('subscriptionStats', 'mainPageTemplat e')) | 47 def get_file_overview_template(): |
51 template.stream({'now': time.time(), 'month': month, 'subscriptions': subscrip tions}).dump(file) | 48 return get_template_environment().get_template(get_config().get("stats", "file OverviewTemplate")) |
52 | 49 |
53 def generateSubscriptionPages(data, outputDir): | 50 def default_sort(obj): |
54 existingSubscriptions = {} | 51 return sorted(obj.items(), key=lambda (k,v): v["hits"], reverse=True) |
55 template = get_template(get_config().get('subscriptionStats', 'subscriptionPag eTemplate')) | |
56 for month in data.sections(): | |
57 subscriptions = {} | |
58 for option in data.options(month): | |
59 spaceIndex = option.index(' ') | |
60 if spaceIndex < 0: | |
61 continue | |
62 fileName, key = option[0:spaceIndex], option[spaceIndex+1:] | |
63 existingSubscriptions[fileName] = True | |
64 if not fileName in subscriptions: | |
65 subscriptions[fileName] = { | |
66 'now': time.time(), | |
67 'month': month, | |
68 'daysInMonth': (date(int(month[0:4]), int(month[4:]), 1) - timedelta(d ays=1)).day, | |
69 'currentMonth': month == date.today().strftime('%Y%m'), | |
70 'fileName': fileName, | |
71 'overviewURL': 'overview_%s.html' % re.sub(r'\W', '_', fileName), | |
72 'hits': 0, | |
73 'bandwidth': 0, | |
74 'day': {}, | |
75 'weekday': [{'id': i, 'hits': 0, 'bandwidth': 0, 'count': 0}for i in r ange(7)], | |
76 'hour': {}, | |
77 'country': {}, | |
78 'app': {}, | |
79 'mirror': {}, | |
80 } | |
81 if key == 'hits' or key == 'bandwidth': | |
82 subscriptions[fileName][key] = data.getint(month, option) | |
83 else: | |
84 match = re.search(r'^(hits|bandwidth) (day|hour|country|app|mirror) (.*) $', key) | |
85 if match: | |
86 if not match.group(3) in subscriptions[fileName][match.group(2)]: | |
87 subscriptions[fileName][match.group(2)][match.group(3)] = { | |
88 'id': match.group(3), | |
89 'hits': 0, | |
90 'bandwidth': 0, | |
91 } | |
92 if match.group(2) == 'day': | |
93 subscriptions[fileName][match.group(2)][match.group(3)]['weekday'] = date(int(month[0:4]), int(month[4:]), int(match.group(3))).weekday() | |
94 if match.group(2) == 'country': | |
95 if match.group(3) in countryCodes: | |
96 subscriptions[fileName][match.group(2)][match.group(3)]['name'] = countryCodes[match.group(3)] | |
97 subscriptions[fileName][match.group(2)][match.group(3)]['image'] = match.group(3) | |
98 else: | |
99 subscriptions[fileName][match.group(2)][match.group(3)]['name'] = 'Unknown' | |
100 subscriptions[fileName][match.group(2)][match.group(3)]['image'] = 'ip' | |
101 subscriptions[fileName][match.group(2)][match.group(3)][match.group(1) ] = data.getint(month, option) | |
102 | 52 |
103 for subscription in subscriptions.itervalues(): | 53 def ensure_dir(path): |
Sebastian Noack
2013/08/26 16:05:22
Instead of checking if the dir exist and creating
| |
104 for key in ('day', 'hour'): | 54 dir = os.path.dirname(path) |
105 subscription[key] = sorted(subscription[key].itervalues(), key=lambda s: int(s['id'])) | 55 if not os.path.exists(dir): |
106 for key in ('country', 'app', 'mirror'): | 56 os.makedirs(dir) |
Wladimir Palant
2013/08/27 07:34:28
Fixed. However, this is a common pattern in our co
| |
107 subscription[key] = sorted(subscription[key].itervalues(), key=lambda s: s['hits'], reverse=True) | |
108 for dayInfo in subscription['day']: | |
109 weekdayInfo = subscription['weekday'][dayInfo['weekday']] | |
110 weekdayInfo['hits'] = (weekdayInfo['hits'] * weekdayInfo['count'] + dayI nfo['hits']) / (weekdayInfo['count'] + 1) | |
111 weekdayInfo['bandwidth'] = (weekdayInfo['bandwidth'] * weekdayInfo['coun t'] + dayInfo['bandwidth']) / (weekdayInfo['count'] + 1) | |
112 weekdayInfo['count'] += 1 | |
113 fileName = 'subscription_%s_%s.html' % (re.sub(r'\W', '_', subscription['f ileName']), month) | |
114 template.stream(subscription).dump(os.path.join(outputDir, fileName)) | |
115 return existingSubscriptions | |
116 | 57 |
117 def generateOverviewPage(data, outputDir, fileName): | 58 def generate_main_page(outputfile, month, url, data): |
118 months = [] | 59 ensure_dir(outputfile) |
119 for month in data.sections(): | 60 get_main_page_template().stream({ |
120 if data.has_option(month, '%s hits' % fileName) and data.has_option(month, ' %s bandwidth' % fileName): | 61 "now": time.time(), |
121 months.append({ | 62 "month": month, |
122 'id': month, | 63 "url": url, |
123 'url': 'subscription_%s_%s.html' % (re.sub(r'\W', '_', fileName), month) , | 64 "data": data, |
124 'hits': data.getint(month, '%s hits' % fileName), | 65 }).dump(outputfile) |
125 'bandwidth': data.getint(month, '%s bandwidth' % fileName), | |
126 }) | |
127 months = sorted(months, key=lambda m: m['id']) | |
128 | 66 |
129 file = os.path.join(outputDir, 'overview_%s.html' % re.sub(r'\W', '_', fileNam e)) | 67 def generate_file_stats(outputfile, month, url, overview_url, data, filter=None, filtered_urls={}): |
130 template = get_template(get_config().get('subscriptionStats', 'subscriptionOve rviewTemplate')) | 68 ensure_dir(outputfile) |
131 template.stream({'now': time.time(), 'fileName': fileName, 'month': months}).d ump(file) | 69 get_file_stats_template().stream({ |
70 "now": time.time(), | |
71 "month": month, | |
72 "url": url, | |
73 "overview_url": overview_url, | |
74 "data": data, | |
75 "fields": common.fields, | |
76 "filter": filter, | |
77 "filtered_urls": filtered_urls | |
78 }).dump(outputfile) | |
79 | |
80 def generate_file_overview(outputfile, url, data): | |
81 ensure_dir(outputfile) | |
82 get_file_overview_template().stream({ | |
83 "now": time.time(), | |
84 "url": url, | |
85 "data": data | |
86 }).dump(outputfile) | |
87 | |
88 def get_names(dir, needdirectories): | |
89 for file in os.listdir(dir): | |
90 path = os.path.join(dir, file) | |
91 if (needdirectories and os.path.isdir(path)) or (not needdirectories and os. path.isfile(path)): | |
92 yield common.filename_decode(file), path | |
93 | |
94 def generate_pages(datadir, outputdir): | |
95 for server_type, server_type_dir in get_names(datadir, True): | |
96 baseURL = get_config().get("stats", "baseURL_" + server_type) | |
97 filedata = {} | |
98 current_month = None | |
99 for month, month_dir in get_names(server_type_dir, True): | |
100 if current_month == None or month > current_month: | |
101 current_month = month | |
102 | |
103 for filename, path in get_names(month_dir, False): | |
104 filename = re.sub(r"\.json$", "", filename) | |
105 with codecs.open(path, "rb", encoding="utf-8") as file: | |
106 data = simplejson.load(file) | |
107 | |
108 overview_url = "../../overview-" + common.filename_encode(filename + ".h tml") | |
109 filtered_urls = {} | |
110 for field in common.fields: | |
111 if field["name"] not in data: | |
112 continue | |
113 # Create filtered views for the first thirty values of a field if they | |
114 # have filtered data. | |
115 for name, value in get_template_environment().filters["sortfield"](dat a[field["name"]], field)[0:30]: | |
116 if filter(lambda k: k not in ("hits", "bandwidth"), value.keys()): | |
Sebastian Noack
2013/08/26 16:05:22
No need to create a new list with the keys first.
Wladimir Palant
2013/08/27 07:34:28
I rather use value.iterkeys() here - it's obvious
Wladimir Palant
2013/08/27 11:59:47
Reply by Sebastian:
It does the same, but isn't a
Wladimir Palant
2013/08/27 12:42:01
See my reply - I already know that. But I prefer i
| |
117 outputfile = os.path.join(outputdir, | |
118 common.filename_encode(server_type), | |
119 common.filename_encode(month), | |
120 common.filename_encode(filename), | |
121 "filtered-%s-%s.html" % ( | |
122 common.filename_encode(field["name"]), | |
123 common.filename_encode(name), | |
124 )) | |
125 generate_file_stats(outputfile, month, baseURL + filename, overvie w_url, | |
126 value, filter={"field": field, "value": name}) | |
127 | |
128 if not field["name"] in filtered_urls: | |
129 filtered_urls[field["name"]] = {} | |
130 filtered_urls[field["name"]][name] = outputfile | |
131 | |
132 outputfile = os.path.join(outputdir, | |
133 common.filename_encode(server_type), | |
134 common.filename_encode(month), | |
135 common.filename_encode(filename), | |
136 "index.html") | |
137 generate_file_stats(outputfile, month, baseURL + filename, overview_url, | |
138 data, filtered_urls=filtered_urls) | |
139 | |
140 if filename not in filedata: | |
141 filedata[filename] = {} | |
142 month_url = (common.filename_encode(month) + "/" + | |
Sebastian Noack
2013/08/26 16:05:22
You should use os.path.join() here as well.
Wladimir Palant
2013/08/27 07:34:28
No, definitely not going to use os.path.join() for
Wladimir Palant
2013/08/27 11:59:47
Reply by Sebastian:
Oh, didn't noted, it was an u
| |
143 common.filename_encode(filename) + "/" + | |
144 "index.html") | |
145 filedata[filename][month] = {"url": month_url, "hits": data["hits"], "ba ndwidth": data["bandwidth"]} | |
146 | |
147 monthdata = {} | |
148 for filename, data in filedata.iteritems(): | |
149 outputfile = os.path.join(outputdir, | |
150 common.filename_encode(server_type), | |
151 "overview-" + common.filename_encode(filename + ".html")) | |
152 generate_file_overview(outputfile, baseURL + filename, data) | |
153 | |
154 if current_month in data: | |
155 monthdata[filename] = dict(data[current_month]) | |
156 | |
157 outputfile = os.path.join(outputdir, common.filename_encode(server_type), "i ndex.html") | |
158 generate_main_page(outputfile, current_month, baseURL, monthdata) | |
132 | 159 |
133 if __name__ == '__main__': | 160 if __name__ == '__main__': |
134 setupStderr() | 161 setupStderr() |
135 | 162 |
136 data = SafeConfigParser() | 163 datadir = get_config().get("stats", "dataDirectory") |
137 data.read(get_config().get('subscriptionStats', 'mainFile')) | 164 outputdir = get_config().get("stats", "outputDirectory") |
138 | 165 generate_pages(datadir, outputdir) |
139 outputDir = get_config().get('subscriptionStats', 'outputDirectory') | |
140 if not os.path.exists(outputDir): | |
141 os.makedirs(outputDir) | |
142 generateMainPage(data, outputDir) | |
143 subscriptions = generateSubscriptionPages(data, outputDir) | |
144 for fileName in subscriptions.iterkeys(): | |
145 generateOverviewPage(data, outputDir, fileName) | |
OLD | NEW |