Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Delta Between Two Patch Sets: sitescripts/stats/bin/pagegenerator.py

Issue 11481051: Update stats processing (Closed)
Left Patch Set: Created Aug. 23, 2013, 3:53 p.m.
Right Patch Set: Improved performance using memoization Created Aug. 29, 2013, 1:39 p.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
Left: Side by side diff | Download
Right: Side by side diff | Download
« no previous file with change/comment | « sitescripts/stats/bin/logprocessor.py ('k') | sitescripts/stats/common.py » ('j') | no next file with change/comment »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
LEFTRIGHT
1 # coding: utf-8 1 # coding: utf-8
2 2
3 # This file is part of the Adblock Plus web scripts, 3 # This file is part of the Adblock Plus web scripts,
4 # Copyright (C) 2006-2013 Eyeo GmbH 4 # Copyright (C) 2006-2013 Eyeo GmbH
5 # 5 #
6 # Adblock Plus is free software: you can redistribute it and/or modify 6 # Adblock Plus is free software: you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License version 3 as 7 # it under the terms of the GNU General Public License version 3 as
8 # published by the Free Software Foundation. 8 # published by the Free Software Foundation.
9 # 9 #
10 # Adblock Plus is distributed in the hope that it will be useful, 10 # Adblock Plus is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of 11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details. 13 # GNU General Public License for more details.
14 # 14 #
15 # You should have received a copy of the GNU General Public License 15 # You should have received a copy of the GNU General Public License
16 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. 16 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.
17 17
18 import os, re, codecs, simplejson, time, itertools 18 import os, re, codecs, simplejson, time, itertools
19 from datetime import date 19 from datetime import date
20 from sitescripts.utils import get_config, setupStderr, get_custom_template_envir onment, cached 20 from sitescripts.utils import get_config, setupStderr, get_custom_template_envir onment, cached
21 import sitescripts.stats.common as common 21 import sitescripts.stats.common as common
22 from sitescripts.stats.countrycodes import countrycodes 22 from sitescripts.stats.countrycodes import countrycodes
23 23
24 @cached(()) 24 @cached(float("inf"))
25 def get_template_environment(): 25 def get_template_environment():
26 return get_custom_template_environment({ 26 return get_custom_template_environment({
27 "monthname": lambda value: date(int(value[0:4]), int(value[4:]), 1).strftime ("%b %Y"), 27 "monthname": lambda value: date(int(value[0:4]), int(value[4:]), 1).strftime ("%b %Y"),
28 "weekday": lambda value: ["Monday", "Tuesday", "Wednesday", "Thursday", "Fri day", "Saturday", "Sunday"][int(value)],
28 "countryname": lambda value: countrycodes.get(value, "Unknown"), 29 "countryname": lambda value: countrycodes.get(value, "Unknown"),
29 "sortfield": lambda value, field: (field["sort"] if "sort" in field else def ault_sort)(value), 30 "sortfield": lambda value, field: (field["sort"] if "sort" in field else def ault_sort)(value),
30 "maxhits": lambda items: max(itertools.chain((value["hits"] for key, value i n items), [1])), 31 "maxhits": lambda items: max(itertools.chain((value["hits"] for key, value i n items), [1])),
31 "maxbandwidth": lambda items: max(itertools.chain((value["bandwidth"] for ke y, value in items), [1])), 32 "maxbandwidth": lambda items: max(itertools.chain((value["bandwidth"] for ke y, value in items), [1])),
32 "sumhits": lambda items: max(sum(value["hits"] for key, value in items), 1), 33 "sumhits": lambda items: max(sum(value["hits"] for key, value in items), 1),
33 "sumbandwidth": lambda items: max(sum(value["bandwidth"] for key, value in i tems), 1), 34 "sumbandwidth": lambda items: max(sum(value["bandwidth"] for key, value in i tems), 1),
34 "isspecial": lambda name, field: field["isspecial"](name) if "isspecial" in field else False, 35 "isspecial": lambda name, field: field["isspecial"](name) if "isspecial" in field else False,
35 }) 36 })
36 37
37 @cached(()) 38 @cached(float("inf"))
38 def get_main_page_template(): 39 def get_main_page_template():
39 return get_template_environment().get_template(get_config().get("stats", "main PageTemplate")) 40 return get_template_environment().get_template(get_config().get("stats", "main PageTemplate"))
40 41
41 @cached(()) 42 @cached(float("inf"))
42 def get_file_stats_template(): 43 def get_file_stats_template():
43 return get_template_environment().get_template(get_config().get("stats", "file PageTemplate")) 44 return get_template_environment().get_template(get_config().get("stats", "file PageTemplate"))
44 45
45 @cached(()) 46 @cached(float("inf"))
46 def get_file_overview_template(): 47 def get_file_overview_template():
47 return get_template_environment().get_template(get_config().get("stats", "file OverviewTemplate")) 48 return get_template_environment().get_template(get_config().get("stats", "file OverviewTemplate"))
48 49
49 def default_sort(obj): 50 def default_sort(obj):
50 return sorted(obj.items(), key=lambda (k,v): v["hits"], reverse=True) 51 return sorted(obj.items(), key=lambda (k,v): v["hits"], reverse=True)
51 52
52 def ensure_dir(path): 53 def ensure_dir(path):
53 dir = os.path.dirname(path) 54 dir = os.path.dirname(path)
54 if not os.path.exists(dir): 55 try:
55 os.makedirs(dir) 56 os.makedirs(dir)
57 except OSError:
58 pass
56 59
57 def generate_main_page(outputfile, month, url, data): 60 def generate_main_page(outputfile, month, url, data):
58 ensure_dir(outputfile) 61 ensure_dir(outputfile)
59 get_main_page_template().stream({ 62 get_main_page_template().stream({
60 "now": time.time(), 63 "now": time.time(),
61 "month": month, 64 "month": month,
62 "url": url, 65 "url": url,
63 "data": data, 66 "data": data,
64 }).dump(outputfile) 67 }).dump(outputfile)
65 68
(...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after
105 data = simplejson.load(file) 108 data = simplejson.load(file)
106 109
107 overview_url = "../../overview-" + common.filename_encode(filename + ".h tml") 110 overview_url = "../../overview-" + common.filename_encode(filename + ".h tml")
108 filtered_urls = {} 111 filtered_urls = {}
109 for field in common.fields: 112 for field in common.fields:
110 if field["name"] not in data: 113 if field["name"] not in data:
111 continue 114 continue
112 # Create filtered views for the first thirty values of a field if they 115 # Create filtered views for the first thirty values of a field if they
113 # have filtered data. 116 # have filtered data.
114 for name, value in get_template_environment().filters["sortfield"](dat a[field["name"]], field)[0:30]: 117 for name, value in get_template_environment().filters["sortfield"](dat a[field["name"]], field)[0:30]:
115 if filter(lambda k: k not in ("hits", "bandwidth"), value.keys()): 118 if filter(lambda k: k not in ("hits", "bandwidth"), value.iterkeys() ):
116 outputfile = os.path.join(outputdir, 119 outputfile = os.path.join(outputdir,
117 common.filename_encode(server_type), 120 common.filename_encode(server_type),
118 common.filename_encode(month), 121 common.filename_encode(month),
119 common.filename_encode(filename), 122 common.filename_encode(filename),
120 "filtered-%s-%s.html" % ( 123 "filtered-%s-%s.html" % (
121 common.filename_encode(field["name"]), 124 common.filename_encode(field["name"]),
122 common.filename_encode(name), 125 common.filename_encode(name),
123 )) 126 ))
124 generate_file_stats(outputfile, month, baseURL + filename, overvie w_url, 127 generate_file_stats(outputfile, month, baseURL + filename, overvie w_url,
125 value, filter={"field": field, "value": name}) 128 value, filter={"field": field, "value": name})
126 129
127 if not field["name"] in filtered_urls: 130 if not field["name"] in filtered_urls:
128 filtered_urls[field["name"]] = {} 131 filtered_urls[field["name"]] = {}
129 filtered_urls[field["name"]][name] = outputfile 132 filtered_urls[field["name"]][name] = os.path.basename(outputfile)
130 133
131 outputfile = os.path.join(outputdir, 134 outputfile = os.path.join(outputdir,
132 common.filename_encode(server_type), 135 common.filename_encode(server_type),
133 common.filename_encode(month), 136 common.filename_encode(month),
134 common.filename_encode(filename), 137 common.filename_encode(filename),
135 "index.html") 138 "index.html")
136 generate_file_stats(outputfile, month, baseURL + filename, overview_url, 139 generate_file_stats(outputfile, month, baseURL + filename, overview_url,
137 data, filtered_urls=filtered_urls) 140 data, filtered_urls=filtered_urls)
138 141
139 if filename not in filedata: 142 if filename not in filedata:
140 filedata[filename] = {} 143 filedata[filename] = {}
141 month_url = (common.filename_encode(month) + "/" + 144 month_url = "%s/%s/%s" % (common.filename_encode(month),
142 common.filename_encode(filename) + "/" + 145 common.filename_encode(filename),
143 "index.html") 146 "index.html")
144 filedata[filename][month] = {"url": month_url, "hits": data["hits"], "ba ndwidth": data["bandwidth"]} 147 filedata[filename][month] = {"url": month_url, "hits": data["hits"], "ba ndwidth": data["bandwidth"]}
145 148
146 monthdata = {} 149 monthdata = {}
147 for filename, data in filedata.iteritems(): 150 for filename, data in filedata.iteritems():
148 outputfile = os.path.join(outputdir, 151 outputfile = os.path.join(outputdir,
149 common.filename_encode(server_type), 152 common.filename_encode(server_type),
150 "overview-" + common.filename_encode(filename + ".html")) 153 "overview-" + common.filename_encode(filename + ".html"))
151 generate_file_overview(outputfile, baseURL + filename, data) 154 generate_file_overview(outputfile, baseURL + filename, data)
152 155
153 if current_month in data: 156 if current_month in data:
154 monthdata[filename] = dict(data[current_month]) 157 monthdata[filename] = dict(data[current_month])
155 158
156 outputfile = os.path.join(outputdir, common.filename_encode(server_type), "i ndex.html") 159 outputfile = os.path.join(outputdir, common.filename_encode(server_type), "i ndex.html")
157 generate_main_page(outputfile, current_month, baseURL, monthdata) 160 generate_main_page(outputfile, current_month, baseURL, monthdata)
158 161
159 if __name__ == '__main__': 162 if __name__ == '__main__':
160 setupStderr() 163 setupStderr()
161 164
162 datadir = get_config().get("stats", "dataDirectory") 165 datadir = get_config().get("stats", "dataDirectory")
163 outputdir = get_config().get("stats", "outputDirectory") 166 outputdir = get_config().get("stats", "outputDirectory")
164 generate_pages(datadir, outputdir) 167 generate_pages(datadir, outputdir)
LEFTRIGHT

Powered by Google App Engine
This is Rietveld