Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Delta Between Two Patch Sets: sitescripts/stats/bin/pagegenerator.py

Issue 11481051: Update stats processing (Closed)
Left Patch Set: Fixed two presentation issues Created Aug. 24, 2013, 1:11 p.m.
Right Patch Set: Improved performance using memoization Created Aug. 29, 2013, 1:39 p.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
Left: Side by side diff | Download
Right: Side by side diff | Download
« no previous file with change/comment | « sitescripts/stats/bin/logprocessor.py ('k') | sitescripts/stats/common.py » ('j') | no next file with change/comment »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
LEFTRIGHT
1 # coding: utf-8 1 # coding: utf-8
2 2
3 # This file is part of the Adblock Plus web scripts, 3 # This file is part of the Adblock Plus web scripts,
4 # Copyright (C) 2006-2013 Eyeo GmbH 4 # Copyright (C) 2006-2013 Eyeo GmbH
5 # 5 #
6 # Adblock Plus is free software: you can redistribute it and/or modify 6 # Adblock Plus is free software: you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License version 3 as 7 # it under the terms of the GNU General Public License version 3 as
8 # published by the Free Software Foundation. 8 # published by the Free Software Foundation.
9 # 9 #
10 # Adblock Plus is distributed in the hope that it will be useful, 10 # Adblock Plus is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of 11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details. 13 # GNU General Public License for more details.
14 # 14 #
15 # You should have received a copy of the GNU General Public License 15 # You should have received a copy of the GNU General Public License
16 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. 16 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.
17 17
18 import os, re, codecs, simplejson, time, itertools 18 import os, re, codecs, simplejson, time, itertools
19 from datetime import date 19 from datetime import date
20 from sitescripts.utils import get_config, setupStderr, get_custom_template_envir onment, cached 20 from sitescripts.utils import get_config, setupStderr, get_custom_template_envir onment, cached
21 import sitescripts.stats.common as common 21 import sitescripts.stats.common as common
22 from sitescripts.stats.countrycodes import countrycodes 22 from sitescripts.stats.countrycodes import countrycodes
23 23
24 @cached(()) 24 @cached(float("inf"))
Sebastian Noack 2013/08/26 16:05:22 You passed an empty tuple as timeout. Obviously yo
Wladimir Palant 2013/08/27 07:34:28 This behavior is somewhat specified as described h
25 def get_template_environment(): 25 def get_template_environment():
26 return get_custom_template_environment({ 26 return get_custom_template_environment({
27 "monthname": lambda value: date(int(value[0:4]), int(value[4:]), 1).strftime ("%b %Y"), 27 "monthname": lambda value: date(int(value[0:4]), int(value[4:]), 1).strftime ("%b %Y"),
28 "weekday": lambda value: ["Monday", "Tuesday", "Wednesday", "Thursday", "Fri day", "Saturday", "Sunday"][int(value)], 28 "weekday": lambda value: ["Monday", "Tuesday", "Wednesday", "Thursday", "Fri day", "Saturday", "Sunday"][int(value)],
29 "countryname": lambda value: countrycodes.get(value, "Unknown"), 29 "countryname": lambda value: countrycodes.get(value, "Unknown"),
30 "sortfield": lambda value, field: (field["sort"] if "sort" in field else def ault_sort)(value), 30 "sortfield": lambda value, field: (field["sort"] if "sort" in field else def ault_sort)(value),
31 "maxhits": lambda items: max(itertools.chain((value["hits"] for key, value i n items), [1])), 31 "maxhits": lambda items: max(itertools.chain((value["hits"] for key, value i n items), [1])),
32 "maxbandwidth": lambda items: max(itertools.chain((value["bandwidth"] for ke y, value in items), [1])), 32 "maxbandwidth": lambda items: max(itertools.chain((value["bandwidth"] for ke y, value in items), [1])),
33 "sumhits": lambda items: max(sum(value["hits"] for key, value in items), 1), 33 "sumhits": lambda items: max(sum(value["hits"] for key, value in items), 1),
34 "sumbandwidth": lambda items: max(sum(value["bandwidth"] for key, value in i tems), 1), 34 "sumbandwidth": lambda items: max(sum(value["bandwidth"] for key, value in i tems), 1),
35 "isspecial": lambda name, field: field["isspecial"](name) if "isspecial" in field else False, 35 "isspecial": lambda name, field: field["isspecial"](name) if "isspecial" in field else False,
36 }) 36 })
37 37
38 @cached(()) 38 @cached(float("inf"))
39 def get_main_page_template(): 39 def get_main_page_template():
40 return get_template_environment().get_template(get_config().get("stats", "main PageTemplate")) 40 return get_template_environment().get_template(get_config().get("stats", "main PageTemplate"))
41 41
42 @cached(()) 42 @cached(float("inf"))
43 def get_file_stats_template(): 43 def get_file_stats_template():
44 return get_template_environment().get_template(get_config().get("stats", "file PageTemplate")) 44 return get_template_environment().get_template(get_config().get("stats", "file PageTemplate"))
45 45
46 @cached(()) 46 @cached(float("inf"))
47 def get_file_overview_template(): 47 def get_file_overview_template():
48 return get_template_environment().get_template(get_config().get("stats", "file OverviewTemplate")) 48 return get_template_environment().get_template(get_config().get("stats", "file OverviewTemplate"))
49 49
50 def default_sort(obj): 50 def default_sort(obj):
51 return sorted(obj.items(), key=lambda (k,v): v["hits"], reverse=True) 51 return sorted(obj.items(), key=lambda (k,v): v["hits"], reverse=True)
52 52
53 def ensure_dir(path): 53 def ensure_dir(path):
Sebastian Noack 2013/08/26 16:05:22 Instead of checking if the dir exist and creating
54 dir = os.path.dirname(path) 54 dir = os.path.dirname(path)
55 if not os.path.exists(dir): 55 try:
56 os.makedirs(dir) 56 os.makedirs(dir)
Wladimir Palant 2013/08/27 07:34:28 Fixed. However, this is a common pattern in our co
57 except OSError:
58 pass
57 59
58 def generate_main_page(outputfile, month, url, data): 60 def generate_main_page(outputfile, month, url, data):
59 ensure_dir(outputfile) 61 ensure_dir(outputfile)
60 get_main_page_template().stream({ 62 get_main_page_template().stream({
61 "now": time.time(), 63 "now": time.time(),
62 "month": month, 64 "month": month,
63 "url": url, 65 "url": url,
64 "data": data, 66 "data": data,
65 }).dump(outputfile) 67 }).dump(outputfile)
66 68
(...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after
106 data = simplejson.load(file) 108 data = simplejson.load(file)
107 109
108 overview_url = "../../overview-" + common.filename_encode(filename + ".h tml") 110 overview_url = "../../overview-" + common.filename_encode(filename + ".h tml")
109 filtered_urls = {} 111 filtered_urls = {}
110 for field in common.fields: 112 for field in common.fields:
111 if field["name"] not in data: 113 if field["name"] not in data:
112 continue 114 continue
113 # Create filtered views for the first thirty values of a field if they 115 # Create filtered views for the first thirty values of a field if they
114 # have filtered data. 116 # have filtered data.
115 for name, value in get_template_environment().filters["sortfield"](dat a[field["name"]], field)[0:30]: 117 for name, value in get_template_environment().filters["sortfield"](dat a[field["name"]], field)[0:30]:
116 if filter(lambda k: k not in ("hits", "bandwidth"), value.keys()): 118 if filter(lambda k: k not in ("hits", "bandwidth"), value.iterkeys() ):
Sebastian Noack 2013/08/26 16:05:22 No need to create a new list with the keys first.
Wladimir Palant 2013/08/27 07:34:28 I rather use value.iterkeys() here - it's obvious
Wladimir Palant 2013/08/27 11:59:47 Reply by Sebastian: It does the same, but isn't a
Wladimir Palant 2013/08/27 12:42:01 See my reply - I already know that. But I prefer i
117 outputfile = os.path.join(outputdir, 119 outputfile = os.path.join(outputdir,
118 common.filename_encode(server_type), 120 common.filename_encode(server_type),
119 common.filename_encode(month), 121 common.filename_encode(month),
120 common.filename_encode(filename), 122 common.filename_encode(filename),
121 "filtered-%s-%s.html" % ( 123 "filtered-%s-%s.html" % (
122 common.filename_encode(field["name"]), 124 common.filename_encode(field["name"]),
123 common.filename_encode(name), 125 common.filename_encode(name),
124 )) 126 ))
125 generate_file_stats(outputfile, month, baseURL + filename, overvie w_url, 127 generate_file_stats(outputfile, month, baseURL + filename, overvie w_url,
126 value, filter={"field": field, "value": name}) 128 value, filter={"field": field, "value": name})
127 129
128 if not field["name"] in filtered_urls: 130 if not field["name"] in filtered_urls:
129 filtered_urls[field["name"]] = {} 131 filtered_urls[field["name"]] = {}
130 filtered_urls[field["name"]][name] = outputfile 132 filtered_urls[field["name"]][name] = os.path.basename(outputfile)
131 133
132 outputfile = os.path.join(outputdir, 134 outputfile = os.path.join(outputdir,
133 common.filename_encode(server_type), 135 common.filename_encode(server_type),
134 common.filename_encode(month), 136 common.filename_encode(month),
135 common.filename_encode(filename), 137 common.filename_encode(filename),
136 "index.html") 138 "index.html")
137 generate_file_stats(outputfile, month, baseURL + filename, overview_url, 139 generate_file_stats(outputfile, month, baseURL + filename, overview_url,
138 data, filtered_urls=filtered_urls) 140 data, filtered_urls=filtered_urls)
139 141
140 if filename not in filedata: 142 if filename not in filedata:
141 filedata[filename] = {} 143 filedata[filename] = {}
142 month_url = (common.filename_encode(month) + "/" + 144 month_url = "%s/%s/%s" % (common.filename_encode(month),
Sebastian Noack 2013/08/26 16:05:22 You should use os.path.join() here as well.
Wladimir Palant 2013/08/27 07:34:28 No, definitely not going to use os.path.join() for
Wladimir Palant 2013/08/27 11:59:47 Reply by Sebastian: Oh, didn't noted, it was an u
143 common.filename_encode(filename) + "/" + 145 common.filename_encode(filename),
144 "index.html") 146 "index.html")
145 filedata[filename][month] = {"url": month_url, "hits": data["hits"], "ba ndwidth": data["bandwidth"]} 147 filedata[filename][month] = {"url": month_url, "hits": data["hits"], "ba ndwidth": data["bandwidth"]}
146 148
147 monthdata = {} 149 monthdata = {}
148 for filename, data in filedata.iteritems(): 150 for filename, data in filedata.iteritems():
149 outputfile = os.path.join(outputdir, 151 outputfile = os.path.join(outputdir,
150 common.filename_encode(server_type), 152 common.filename_encode(server_type),
151 "overview-" + common.filename_encode(filename + ".html")) 153 "overview-" + common.filename_encode(filename + ".html"))
152 generate_file_overview(outputfile, baseURL + filename, data) 154 generate_file_overview(outputfile, baseURL + filename, data)
153 155
154 if current_month in data: 156 if current_month in data:
155 monthdata[filename] = dict(data[current_month]) 157 monthdata[filename] = dict(data[current_month])
156 158
157 outputfile = os.path.join(outputdir, common.filename_encode(server_type), "i ndex.html") 159 outputfile = os.path.join(outputdir, common.filename_encode(server_type), "i ndex.html")
158 generate_main_page(outputfile, current_month, baseURL, monthdata) 160 generate_main_page(outputfile, current_month, baseURL, monthdata)
159 161
160 if __name__ == '__main__': 162 if __name__ == '__main__':
161 setupStderr() 163 setupStderr()
162 164
163 datadir = get_config().get("stats", "dataDirectory") 165 datadir = get_config().get("stats", "dataDirectory")
164 outputdir = get_config().get("stats", "outputDirectory") 166 outputdir = get_config().get("stats", "outputDirectory")
165 generate_pages(datadir, outputdir) 167 generate_pages(datadir, outputdir)
LEFTRIGHT

Powered by Google App Engine
This is Rietveld