| Index: sitescripts/stats/bin/datamerger.py | 
| =================================================================== | 
| rename from sitescripts/logs/bin/mergeSubscriptionStats.py | 
| rename to sitescripts/stats/bin/datamerger.py | 
| --- a/sitescripts/logs/bin/mergeSubscriptionStats.py | 
| +++ b/sitescripts/stats/bin/datamerger.py | 
| @@ -10,63 +10,88 @@ | 
| # Adblock Plus is distributed in the hope that it will be useful, | 
| # but WITHOUT ANY WARRANTY; without even the implied warranty of | 
| # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | 
| # GNU General Public License for more details. | 
| # | 
| # You should have received a copy of the GNU General Public License | 
| # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. | 
| -import os, re, subprocess, urllib | 
| +import os, sys, re, codecs, subprocess, urllib, simplejson, traceback | 
| +import sitescripts.stats.common as common | 
| from sitescripts.utils import get_config, setupStderr | 
| -from ConfigParser import SafeConfigParser, NoOptionError | 
| -from StringIO import StringIO | 
| -def readStatsFile(path): | 
| - result = SafeConfigParser() | 
| - match = re.search(r'^ssh://(\w+)@([^/:]+)(?::(\d+))?', path) | 
| +def read_stats_file(path): | 
| + match = re.search(r"^ssh://(\w+)@([^/:]+)(?::(\d+))?", path) | 
| if match: | 
| - command = ['ssh', '-q', '-o' 'NumberOfPasswordPrompts 0', '-T', '-k', '-l', match.group(1), match.group(2)] | 
| + command = ["ssh", "-q", "-o", "NumberOfPasswordPrompts 0", "-T", "-k", "-l", match.group(1), match.group(2)] | 
| if match.group(3): | 
| - command[1:1] = ['-P', match.group(3)] | 
| + command[1:1] = ["-P", match.group(3)] | 
| data = subprocess.check_output(command) | 
| - result.readfp(StringIO(data)) | 
| - elif path.startswith('http://') or path.startswith('https://'): | 
| - result.readfp(urllib.urlopen(path)) | 
| + return simplejson.loads(data.decode("utf-8")) | 
| + elif path.startswith("http://") or path.startswith("https://"): | 
| + return simplejson.load(urllib.urlopen(path).read().decode("utf-8")) | 
| elif os.path.exists(path): | 
| - result.read(path) | 
| - return result | 
| + with codecs.open(path, "rb", encoding="utf-8") as file: | 
| + return simplejson.load(file) | 
| -def getStatsFiles(): | 
| + raise IOError("Path '%s' not recognized" % path) | 
| + | 
| +def get_stats_files(mirrors): | 
| config = get_config() | 
| - for option in config.options('subscriptionStats'): | 
| - match = re.search(r'^mirror_(.*)', option, re.I) | 
| - if match: | 
| - yield (match.group(1), config.get('subscriptionStats', option)) | 
| + if len(mirrors) > 0: | 
| + options = map(lambda m: "mirror_" + m, mirrors) | 
| + else: | 
| + options = filter(lambda o: o.startswith("mirror_"), config.options("stats")) | 
| + for option in options: | 
| + if config.has_option("stats", option): | 
| + value = config.get("stats", option) | 
| + if " " in value: | 
| + yield value.split(None, 1) | 
| + else: | 
| + print >>sys.stderr, "Option '%s' has invalid value: '%s'" % (option, value) | 
| + else: | 
| + print >>sys.stderr, "Option '%s' not found in the configuration" % option | 
| -def mergeStatsFile(mirrorName, config1, config2): | 
| - def increaseOption(section, option, increase): | 
| - if config1.has_option(section, option): | 
| - oldval = config1.getint(section, option) | 
| - config1.set(section, option, str(oldval + increase)) | 
| +def merge_objects(object1, object2): | 
| + for key, value in object2.iteritems(): | 
| + if key in object1: | 
| + if isinstance(value, int): | 
| + object1[key] += value | 
| + else: | 
| + merge_objects(object1[key], object2[key]) | 
| else: | 
| - config1.set(section, option, str(increase)) | 
| + object1[key] = value | 
| - for section in config2.sections(): | 
| - if not config1.has_section(section): | 
| - config1.add_section(section) | 
| - for option in config2.options(section): | 
| - increase = config2.getint(section, option) | 
| - increaseOption(section, option, increase) | 
| +def merge_stats_file(server_type, data): | 
| + base_dir = os.path.join(get_config().get("stats", "dataDirectory"), common.filename_encode(server_type)) | 
| + for month, month_data in data.iteritems(): | 
| + for name, file_data in month_data.iteritems(): | 
| + path = os.path.join(base_dir, common.filename_encode(month), common.filename_encode(name + ".json")) | 
| + if os.path.exists(path): | 
| + with codecs.open(path, "rb", encoding="utf-8") as file: | 
| + existing = simplejson.load(file) | 
| + else: | 
| + existing = {} | 
| - match = re.search(r'^(\S+) (hits|bandwidth)$', option, re.I) | 
| - if match: | 
| - increaseOption(section, '%s %s mirror %s' % (match.group(1), match.group(2), mirrorName), increase) | 
| + merge_objects(existing, file_data) | 
| -if __name__ == '__main__': | 
| + dir = os.path.dirname(path) | 
| + try: | 
| + os.makedirs(dir) | 
| + except OSError: | 
| + pass | 
| + | 
| + with codecs.open(path, "wb", encoding="utf-8") as file: | 
| + simplejson.dump(existing, file, indent=2, sort_keys=True) | 
| + | 
| +def merge_mirror_stats(mirrors): | 
| + for server_type, path in get_stats_files(mirrors): | 
| + try: | 
| + merge_stats_file(server_type, read_stats_file(path)) | 
| + except: | 
| + print >>sys.stderr, "Unable to merge stats for '%s'" % path | 
| + traceback.print_exc() | 
| + | 
| +if __name__ == "__main__": | 
| setupStderr() | 
| - | 
| - result = readStatsFile(get_config().get('subscriptionStats', 'mainFile')) | 
| - for (mirrorName, statsFile) in getStatsFiles(): | 
| - mergeStatsFile(mirrorName, result, readStatsFile(statsFile)) | 
| - file = open(get_config().get('subscriptionStats', 'mainFile'), 'wb') | 
| - result.write(file) | 
| + merge_mirror_stats(sys.argv[1:]) |