Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Side by Side Diff: sitescripts/stats/bin/datamerger.py

Issue 11481051: Update stats processing (Closed)
Patch Set: Improved performance using memoization Created Aug. 29, 2013, 1:39 p.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View unified diff | Download patch
« no previous file with comments | « sitescripts/stats/bin/__init__.py ('k') | sitescripts/stats/bin/logprocessor.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 # coding: utf-8 1 # coding: utf-8
2 2
3 # This file is part of the Adblock Plus web scripts, 3 # This file is part of the Adblock Plus web scripts,
4 # Copyright (C) 2006-2013 Eyeo GmbH 4 # Copyright (C) 2006-2013 Eyeo GmbH
5 # 5 #
6 # Adblock Plus is free software: you can redistribute it and/or modify 6 # Adblock Plus is free software: you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License version 3 as 7 # it under the terms of the GNU General Public License version 3 as
8 # published by the Free Software Foundation. 8 # published by the Free Software Foundation.
9 # 9 #
10 # Adblock Plus is distributed in the hope that it will be useful, 10 # Adblock Plus is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of 11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details. 13 # GNU General Public License for more details.
14 # 14 #
15 # You should have received a copy of the GNU General Public License 15 # You should have received a copy of the GNU General Public License
16 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. 16 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.
17 17
18 import os, re, subprocess, urllib 18 import os, sys, re, codecs, subprocess, urllib, simplejson, traceback
19 import sitescripts.stats.common as common
19 from sitescripts.utils import get_config, setupStderr 20 from sitescripts.utils import get_config, setupStderr
20 from ConfigParser import SafeConfigParser, NoOptionError
21 from StringIO import StringIO
22 21
23 def readStatsFile(path): 22 def read_stats_file(path):
24 result = SafeConfigParser() 23 match = re.search(r"^ssh://(\w+)@([^/:]+)(?::(\d+))?", path)
25 match = re.search(r'^ssh://(\w+)@([^/:]+)(?::(\d+))?', path)
26 if match: 24 if match:
27 command = ['ssh', '-q', '-o' 'NumberOfPasswordPrompts 0', '-T', '-k', '-l', match.group(1), match.group(2)] 25 command = ["ssh", "-q", "-o", "NumberOfPasswordPrompts 0", "-T", "-k", "-l", match.group(1), match.group(2)]
28 if match.group(3): 26 if match.group(3):
29 command[1:1] = ['-P', match.group(3)] 27 command[1:1] = ["-P", match.group(3)]
30 data = subprocess.check_output(command) 28 data = subprocess.check_output(command)
31 result.readfp(StringIO(data)) 29 return simplejson.loads(data.decode("utf-8"))
32 elif path.startswith('http://') or path.startswith('https://'): 30 elif path.startswith("http://") or path.startswith("https://"):
33 result.readfp(urllib.urlopen(path)) 31 return simplejson.load(urllib.urlopen(path).read().decode("utf-8"))
34 elif os.path.exists(path): 32 elif os.path.exists(path):
35 result.read(path) 33 with codecs.open(path, "rb", encoding="utf-8") as file:
36 return result 34 return simplejson.load(file)
37 35
38 def getStatsFiles(): 36 raise IOError("Path '%s' not recognized" % path)
37
38 def get_stats_files(mirrors):
39 config = get_config() 39 config = get_config()
40 40
41 for option in config.options('subscriptionStats'): 41 if len(mirrors) > 0:
42 match = re.search(r'^mirror_(.*)', option, re.I) 42 options = map(lambda m: "mirror_" + m, mirrors)
43 if match: 43 else:
44 yield (match.group(1), config.get('subscriptionStats', option)) 44 options = filter(lambda o: o.startswith("mirror_"), config.options("stats"))
45 for option in options:
46 if config.has_option("stats", option):
47 value = config.get("stats", option)
48 if " " in value:
49 yield value.split(None, 1)
50 else:
51 print >>sys.stderr, "Option '%s' has invalid value: '%s'" % (option, val ue)
52 else:
53 print >>sys.stderr, "Option '%s' not found in the configuration" % option
45 54
46 def mergeStatsFile(mirrorName, config1, config2): 55 def merge_objects(object1, object2):
47 def increaseOption(section, option, increase): 56 for key, value in object2.iteritems():
48 if config1.has_option(section, option): 57 if key in object1:
49 oldval = config1.getint(section, option) 58 if isinstance(value, int):
50 config1.set(section, option, str(oldval + increase)) 59 object1[key] += value
60 else:
61 merge_objects(object1[key], object2[key])
51 else: 62 else:
52 config1.set(section, option, str(increase)) 63 object1[key] = value
53 64
54 for section in config2.sections(): 65 def merge_stats_file(server_type, data):
55 if not config1.has_section(section): 66 base_dir = os.path.join(get_config().get("stats", "dataDirectory"), common.fil ename_encode(server_type))
56 config1.add_section(section) 67 for month, month_data in data.iteritems():
57 for option in config2.options(section): 68 for name, file_data in month_data.iteritems():
58 increase = config2.getint(section, option) 69 path = os.path.join(base_dir, common.filename_encode(month), common.filena me_encode(name + ".json"))
59 increaseOption(section, option, increase) 70 if os.path.exists(path):
71 with codecs.open(path, "rb", encoding="utf-8") as file:
72 existing = simplejson.load(file)
73 else:
74 existing = {}
60 75
61 match = re.search(r'^(\S+) (hits|bandwidth)$', option, re.I) 76 merge_objects(existing, file_data)
62 if match:
63 increaseOption(section, '%s %s mirror %s' % (match.group(1), match.group (2), mirrorName), increase)
64 77
65 if __name__ == '__main__': 78 dir = os.path.dirname(path)
79 try:
80 os.makedirs(dir)
81 except OSError:
82 pass
83
84 with codecs.open(path, "wb", encoding="utf-8") as file:
85 simplejson.dump(existing, file, indent=2, sort_keys=True)
86
87 def merge_mirror_stats(mirrors):
88 for server_type, path in get_stats_files(mirrors):
89 try:
90 merge_stats_file(server_type, read_stats_file(path))
91 except:
92 print >>sys.stderr, "Unable to merge stats for '%s'" % path
93 traceback.print_exc()
94
95 if __name__ == "__main__":
66 setupStderr() 96 setupStderr()
67 97 merge_mirror_stats(sys.argv[1:])
68 result = readStatsFile(get_config().get('subscriptionStats', 'mainFile'))
69 for (mirrorName, statsFile) in getStatsFiles():
70 mergeStatsFile(mirrorName, result, readStatsFile(statsFile))
71 file = open(get_config().get('subscriptionStats', 'mainFile'), 'wb')
72 result.write(file)
OLDNEW
« no previous file with comments | « sitescripts/stats/bin/__init__.py ('k') | sitescripts/stats/bin/logprocessor.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld