Index: sitescripts/stats/bin/datamerger.py |
=================================================================== |
rename from sitescripts/logs/bin/mergeSubscriptionStats.py |
rename to sitescripts/stats/bin/datamerger.py |
--- a/sitescripts/logs/bin/mergeSubscriptionStats.py |
+++ b/sitescripts/stats/bin/datamerger.py |
@@ -10,63 +10,86 @@ |
# Adblock Plus is distributed in the hope that it will be useful, |
# but WITHOUT ANY WARRANTY; without even the implied warranty of |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
# GNU General Public License for more details. |
# |
# You should have received a copy of the GNU General Public License |
# along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. |
-import os, re, subprocess, urllib |
+import os, sys, re, codecs, subprocess, urllib, simplejson, traceback |
Sebastian Noack
2013/08/26 16:05:22
Since Python 2.6, you can use the built-in module
|
+import sitescripts.stats.common as common |
from sitescripts.utils import get_config, setupStderr |
-from ConfigParser import SafeConfigParser, NoOptionError |
-from StringIO import StringIO |
-def readStatsFile(path): |
- result = SafeConfigParser() |
- match = re.search(r'^ssh://(\w+)@([^/:]+)(?::(\d+))?', path) |
+def read_stats_file(path): |
+ match = re.search(r"^ssh://(\w+)@([^/:]+)(?::(\d+))?", path) |
if match: |
- command = ['ssh', '-q', '-o' 'NumberOfPasswordPrompts 0', '-T', '-k', '-l', match.group(1), match.group(2)] |
+ command = ["ssh", "-q", "-o", "NumberOfPasswordPrompts 0", "-T", "-k", "-l", match.group(1), match.group(2)] |
if match.group(3): |
- command[1:1] = ['-P', match.group(3)] |
+ command[1:1] = ["-P", match.group(3)] |
data = subprocess.check_output(command) |
- result.readfp(StringIO(data)) |
- elif path.startswith('http://') or path.startswith('https://'): |
- result.readfp(urllib.urlopen(path)) |
+ return simplejson.loads(data.decode("utf-8")) |
+ elif path.startswith("http://") or path.startswith("https://"): |
+ return simplejson.load(urllib.urlopen(path).read().decode("utf-8")) |
elif os.path.exists(path): |
- result.read(path) |
- return result |
+ with codecs.open(path, "rb", encoding="utf-8") as file: |
+ return simplejson.load(file) |
-def getStatsFiles(): |
+ raise IOError("Path '%s' not recognized" % path) |
+ |
+def get_stats_files(mirrors): |
config = get_config() |
- for option in config.options('subscriptionStats'): |
- match = re.search(r'^mirror_(.*)', option, re.I) |
- if match: |
- yield (match.group(1), config.get('subscriptionStats', option)) |
+ if len(mirrors) > 0: |
+ options = map(lambda m: "mirror_" + m, mirrors) |
+ else: |
+ options = filter(lambda o: o.startswith("mirror_"), config.options("stats")) |
+ for option in options: |
+ if config.has_option("stats", option): |
+ value = config.get("stats", option) |
+ if " " in value: |
+ yield re.split(r"\s+", value, 1) |
+ else: |
+ print >>sys.stderr, "Option '%s' has invalid value: '%s'" % (option, value) |
+ else: |
+ print >>sys.stderr, "Option '%s' not found in the configuration" % option |
-def mergeStatsFile(mirrorName, config1, config2): |
- def increaseOption(section, option, increase): |
- if config1.has_option(section, option): |
- oldval = config1.getint(section, option) |
- config1.set(section, option, str(oldval + increase)) |
+def merge_objects(object1, object2): |
+ for key, value in object2.iteritems(): |
+ if key in object1: |
+ if isinstance(value, int): |
+ object1[key] += value |
+ else: |
+ merge_objects(object1[key], object2[key]) |
else: |
- config1.set(section, option, str(increase)) |
+ object1[key] = value |
- for section in config2.sections(): |
- if not config1.has_section(section): |
- config1.add_section(section) |
- for option in config2.options(section): |
- increase = config2.getint(section, option) |
- increaseOption(section, option, increase) |
+def merge_stats_file(server_type, data): |
+ base_dir = os.path.join(get_config().get("stats", "dataDirectory"), common.filename_encode(server_type)) |
+ for month, month_data in data.iteritems(): |
+ for name, file_data in month_data.iteritems(): |
+ path = os.path.join(base_dir, common.filename_encode(month), common.filename_encode(name + ".json")) |
+ if os.path.exists(path): |
+ with codecs.open(path, "rb", encoding="utf-8") as file: |
+ existing = simplejson.load(file) |
+ else: |
+ existing = {} |
- match = re.search(r'^(\S+) (hits|bandwidth)$', option, re.I) |
- if match: |
- increaseOption(section, '%s %s mirror %s' % (match.group(1), match.group(2), mirrorName), increase) |
+ merge_objects(existing, file_data) |
-if __name__ == '__main__': |
+ dir = os.path.dirname(path) |
+ if not os.path.exists(dir): |
+ os.makedirs(dir) |
+ |
+ with codecs.open(path, "wb", encoding="utf-8") as file: |
+ simplejson.dump(existing, file, indent=2, sort_keys=True) |
+ |
+def merge_mirror_stats(mirrors): |
+ for server_type, path in get_stats_files(mirrors): |
+ try: |
+ merge_stats_file(server_type, read_stats_file(path)) |
+ except: |
+ print >>sys.stderr, "Unable to merge stats for '%s'" % path |
+ traceback.print_exc() |
+ |
+if __name__ == "__main__": |
setupStderr() |
- |
- result = readStatsFile(get_config().get('subscriptionStats', 'mainFile')) |
- for (mirrorName, statsFile) in getStatsFiles(): |
- mergeStatsFile(mirrorName, result, readStatsFile(statsFile)) |
- file = open(get_config().get('subscriptionStats', 'mainFile'), 'wb') |
- result.write(file) |
+ merge_mirror_stats(sys.argv[1:]) |