Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Delta Between Two Patch Sets: sitescripts/stats/bin/datamerger.py

Issue 11481051: Update stats processing (Closed)
Left Patch Set: Fixed two presentation issues Created Aug. 24, 2013, 1:11 p.m.
Right Patch Set: Improved performance using memoization Created Aug. 29, 2013, 1:39 p.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
Left: Side by side diff | Download
Right: Side by side diff | Download
« no previous file with change/comment | « sitescripts/stats/bin/__init__.py ('k') | sitescripts/stats/bin/logprocessor.py » ('j') | no next file with change/comment »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
LEFTRIGHT
1 # coding: utf-8 1 # coding: utf-8
2 2
3 # This file is part of the Adblock Plus web scripts, 3 # This file is part of the Adblock Plus web scripts,
4 # Copyright (C) 2006-2013 Eyeo GmbH 4 # Copyright (C) 2006-2013 Eyeo GmbH
5 # 5 #
6 # Adblock Plus is free software: you can redistribute it and/or modify 6 # Adblock Plus is free software: you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License version 3 as 7 # it under the terms of the GNU General Public License version 3 as
8 # published by the Free Software Foundation. 8 # published by the Free Software Foundation.
9 # 9 #
10 # Adblock Plus is distributed in the hope that it will be useful, 10 # Adblock Plus is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of 11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details. 13 # GNU General Public License for more details.
14 # 14 #
15 # You should have received a copy of the GNU General Public License 15 # You should have received a copy of the GNU General Public License
16 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. 16 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.
17 17
18 import os, sys, re, codecs, subprocess, urllib, simplejson, traceback 18 import os, sys, re, codecs, subprocess, urllib, simplejson, traceback
Sebastian Noack 2013/08/26 16:05:22 Since Python 2.6 you can use the built-in "json" m
Wladimir Palant 2013/08/27 07:34:28 I am aware of that of course. However, simplejson
Wladimir Palant 2013/08/27 11:59:47 Reply by Sebastian: That is strange since the ori
Wladimir Palant 2013/08/27 12:42:01 I did. However, looking at http://www.tablix.org/~
19 import sitescripts.stats.common as common 19 import sitescripts.stats.common as common
20 from sitescripts.utils import get_config, setupStderr 20 from sitescripts.utils import get_config, setupStderr
21 21
22 def read_stats_file(path): 22 def read_stats_file(path):
23 match = re.search(r"^ssh://(\w+)@([^/:]+)(?::(\d+))?", path) 23 match = re.search(r"^ssh://(\w+)@([^/:]+)(?::(\d+))?", path)
24 if match: 24 if match:
25 command = ["ssh", "-q", "-o", "NumberOfPasswordPrompts 0", "-T", "-k", "-l", match.group(1), match.group(2)] 25 command = ["ssh", "-q", "-o", "NumberOfPasswordPrompts 0", "-T", "-k", "-l", match.group(1), match.group(2)]
26 if match.group(3): 26 if match.group(3):
27 command[1:1] = ["-P", match.group(3)] 27 command[1:1] = ["-P", match.group(3)]
28 data = subprocess.check_output(command) 28 data = subprocess.check_output(command)
(...skipping 10 matching lines...) Expand all
39 config = get_config() 39 config = get_config()
40 40
41 if len(mirrors) > 0: 41 if len(mirrors) > 0:
42 options = map(lambda m: "mirror_" + m, mirrors) 42 options = map(lambda m: "mirror_" + m, mirrors)
43 else: 43 else:
44 options = filter(lambda o: o.startswith("mirror_"), config.options("stats")) 44 options = filter(lambda o: o.startswith("mirror_"), config.options("stats"))
45 for option in options: 45 for option in options:
46 if config.has_option("stats", option): 46 if config.has_option("stats", option):
47 value = config.get("stats", option) 47 value = config.get("stats", option)
48 if " " in value: 48 if " " in value:
49 yield re.split(r"\s+", value, 1) 49 yield value.split(None, 1)
Sebastian Noack 2013/08/26 16:05:22 Did you know that value.split(None, 1) would do th
Wladimir Palant 2013/08/27 07:34:28 No, I didn't. Fixed.
50 else: 50 else:
51 print >>sys.stderr, "Option '%s' has invalid value: '%s'" % (option, val ue) 51 print >>sys.stderr, "Option '%s' has invalid value: '%s'" % (option, val ue)
52 else: 52 else:
53 print >>sys.stderr, "Option '%s' not found in the configuration" % option 53 print >>sys.stderr, "Option '%s' not found in the configuration" % option
54 54
55 def merge_objects(object1, object2): 55 def merge_objects(object1, object2):
56 for key, value in object2.iteritems(): 56 for key, value in object2.iteritems():
57 if key in object1: 57 if key in object1:
58 if isinstance(value, int): 58 if isinstance(value, int):
59 object1[key] += value 59 object1[key] += value
60 else: 60 else:
61 merge_objects(object1[key], object2[key]) 61 merge_objects(object1[key], object2[key])
62 else: 62 else:
63 object1[key] = value 63 object1[key] = value
64 64
65 def merge_stats_file(server_type, data): 65 def merge_stats_file(server_type, data):
66 base_dir = os.path.join(get_config().get("stats", "dataDirectory"), common.fil ename_encode(server_type)) 66 base_dir = os.path.join(get_config().get("stats", "dataDirectory"), common.fil ename_encode(server_type))
67 for month, month_data in data.iteritems(): 67 for month, month_data in data.iteritems():
68 for name, file_data in month_data.iteritems(): 68 for name, file_data in month_data.iteritems():
69 path = os.path.join(base_dir, common.filename_encode(month), common.filena me_encode(name + ".json")) 69 path = os.path.join(base_dir, common.filename_encode(month), common.filena me_encode(name + ".json"))
70 if os.path.exists(path): 70 if os.path.exists(path):
71 with codecs.open(path, "rb", encoding="utf-8") as file: 71 with codecs.open(path, "rb", encoding="utf-8") as file:
72 existing = simplejson.load(file) 72 existing = simplejson.load(file)
73 else: 73 else:
74 existing = {} 74 existing = {}
75 75
76 merge_objects(existing, file_data) 76 merge_objects(existing, file_data)
77 77
78 dir = os.path.dirname(path) 78 dir = os.path.dirname(path)
79 if not os.path.exists(dir): 79 try:
80 os.makedirs(dir) 80 os.makedirs(dir)
81 except OSError:
82 pass
81 83
82 with codecs.open(path, "wb", encoding="utf-8") as file: 84 with codecs.open(path, "wb", encoding="utf-8") as file:
83 simplejson.dump(existing, file, indent=2, sort_keys=True) 85 simplejson.dump(existing, file, indent=2, sort_keys=True)
84 86
85 def merge_mirror_stats(mirrors): 87 def merge_mirror_stats(mirrors):
86 for server_type, path in get_stats_files(mirrors): 88 for server_type, path in get_stats_files(mirrors):
87 try: 89 try:
88 merge_stats_file(server_type, read_stats_file(path)) 90 merge_stats_file(server_type, read_stats_file(path))
89 except: 91 except:
90 print >>sys.stderr, "Unable to merge stats for '%s'" % path 92 print >>sys.stderr, "Unable to merge stats for '%s'" % path
91 traceback.print_exc() 93 traceback.print_exc()
92 94
93 if __name__ == "__main__": 95 if __name__ == "__main__":
94 setupStderr() 96 setupStderr()
95 merge_mirror_stats(sys.argv[1:]) 97 merge_mirror_stats(sys.argv[1:])
LEFTRIGHT

Powered by Google App Engine
This is Rietveld