| Index: sitescripts/stats/bin/pagegenerator.py |
| =================================================================== |
| deleted file mode 100644 |
| --- a/sitescripts/stats/bin/pagegenerator.py |
| +++ /dev/null |
| @@ -1,189 +0,0 @@ |
| -# This file is part of the Adblock Plus web scripts, |
| -# Copyright (C) 2006-present eyeo GmbH |
| -# |
| -# Adblock Plus is free software: you can redistribute it and/or modify |
| -# it under the terms of the GNU General Public License version 3 as |
| -# published by the Free Software Foundation. |
| -# |
| -# Adblock Plus is distributed in the hope that it will be useful, |
| -# but WITHOUT ANY WARRANTY; without even the implied warranty of |
| -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| -# GNU General Public License for more details. |
| -# |
| -# You should have received a copy of the GNU General Public License |
| -# along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. |
| - |
| -import os |
| -import re |
| -import codecs |
| -import json |
| -import time |
| -import itertools |
| -from datetime import date |
| -from sitescripts.utils import get_config, setupStderr, get_custom_template_environment, cached |
| -import sitescripts.stats.common as common |
| -from sitescripts.stats.countrycodes import countrycodes |
| - |
| - |
| -@cached(float('inf')) |
| -def get_template_environment(): |
| - return get_custom_template_environment({ |
| - 'monthname': lambda value: date(int(value[0:4]), int(value[4:]), 1).strftime('%b %Y'), |
| - 'weekday': lambda value: ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'][int(value)], |
| - 'countryname': lambda value: countrycodes.get(value, 'Unknown'), |
| - 'sortfield': lambda value, field: (field['sort'] if 'sort' in field else default_sort)(value), |
| - 'maxhits': lambda items: max(value['hits'] for key, value in items), |
| - 'maxbandwidth': lambda items: max(value['bandwidth'] for key, value in items), |
| - 'sumhits': lambda items: sum(value['hits'] for key, value in items), |
| - 'sumbandwidth': lambda items: sum(value['bandwidth'] for key, value in items), |
| - 'percentage': lambda value, total: float(value) / total * 100 if total != 0 else 0, |
| - 'isspecial': lambda name, field: field['isspecial'](name) if 'isspecial' in field else False, |
| - 'defaultcount': get_default_count, |
| - }) |
| - |
| - |
| -@cached(float('inf')) |
| -def get_main_page_template(): |
| - return get_template_environment().get_template(get_config().get('stats', 'mainPageTemplate')) |
| - |
| - |
| -@cached(float('inf')) |
| -def get_file_stats_template(): |
| - return get_template_environment().get_template(get_config().get('stats', 'filePageTemplate')) |
| - |
| - |
| -@cached(float('inf')) |
| -def get_file_overview_template(): |
| - return get_template_environment().get_template(get_config().get('stats', 'fileOverviewTemplate')) |
| - |
| - |
| -def default_sort(obj): |
| - return sorted(obj.items(), key=lambda (k, v): v['hits'], reverse=True) |
| - |
| - |
| -def ensure_dir(path): |
| - dir = os.path.dirname(path) |
| - try: |
| - os.makedirs(dir) |
| - except OSError: |
| - pass |
| - |
| - |
| -def generate_main_page(outputfile, month, url, data): |
| - ensure_dir(outputfile) |
| - get_main_page_template().stream({ |
| - 'now': time.time(), |
| - 'month': month, |
| - 'url': url, |
| - 'data': data, |
| - }).dump(outputfile, encoding='utf-8') |
| - |
| - |
| -def generate_file_stats(outputfile, month, url, overview_url, data, filter=None, filtered_urls={}): |
| - ensure_dir(outputfile) |
| - get_file_stats_template().stream({ |
| - 'now': time.time(), |
| - 'month': month, |
| - 'url': url, |
| - 'overview_url': overview_url, |
| - 'data': data, |
| - 'fields': common.fields, |
| - 'filter': filter, |
| - 'filtered_urls': filtered_urls, |
| - }).dump(outputfile, encoding='utf-8') |
| - |
| - |
| -def generate_file_overview(outputfile, url, data): |
| - ensure_dir(outputfile) |
| - get_file_overview_template().stream({ |
| - 'now': time.time(), |
| - 'url': url, |
| - 'data': data, |
| - }).dump(outputfile, encoding='utf-8') |
| - |
| - |
| -def get_names(dir, needdirectories): |
| - for file in os.listdir(dir): |
| - path = os.path.join(dir, file) |
| - if (needdirectories and os.path.isdir(path)) or (not needdirectories and os.path.isfile(path)): |
| - yield common.filename_decode(file), path |
| - |
| - |
| -def get_default_count(field): |
| - return field.get('defaultcount', 30) |
| - |
| - |
| -def generate_pages(datadir, outputdir): |
| - for server_type, server_type_dir in get_names(datadir, True): |
| - baseURL = get_config().get('stats', 'baseURL_' + server_type) |
| - filedata = {} |
| - current_month = None |
| - for month, month_dir in get_names(server_type_dir, True): |
| - if current_month == None or month > current_month: |
| - current_month = month |
| - |
| - for filename, path in get_names(month_dir, False): |
| - filename = re.sub(r'\.json$', '', filename) |
| - with codecs.open(path, 'rb', encoding='utf-8') as file: |
| - data = json.load(file) |
| - |
| - overview_url = '../../overview-' + common.filename_encode(filename + '.html') |
| - filtered_urls = {} |
| - for field in common.fields: |
| - if field['name'] not in data: |
| - continue |
| - # Create filtered views for the first thirty values of a field if they |
| - # have filtered data. |
| - sorted_field = get_template_environment().filters['sortfield'](data[field['name']], field) |
| - for name, value in sorted_field[0:get_default_count(field)]: |
| - if filter(lambda k: k not in ('hits', 'bandwidth'), value.iterkeys()): |
| - outputfile = os.path.join(outputdir, |
| - common.filename_encode(server_type), |
| - common.filename_encode(month), |
| - common.filename_encode(filename), |
| - 'filtered-%s-%s.html' % ( |
| - common.filename_encode(field['name']), |
| - common.filename_encode(name), |
| - )) |
| - generate_file_stats(outputfile, month, baseURL + filename, overview_url, |
| - value, filter={'field': field, 'value': name}) |
| - |
| - if not field['name'] in filtered_urls: |
| - filtered_urls[field['name']] = {} |
| - filtered_urls[field['name']][name] = os.path.basename(outputfile) |
| - |
| - outputfile = os.path.join(outputdir, |
| - common.filename_encode(server_type), |
| - common.filename_encode(month), |
| - common.filename_encode(filename), |
| - 'index.html') |
| - generate_file_stats(outputfile, month, baseURL + filename, overview_url, |
| - data, filtered_urls=filtered_urls) |
| - |
| - if filename not in filedata: |
| - filedata[filename] = {} |
| - month_url = '%s/%s/%s' % (common.filename_encode(month), |
| - common.filename_encode(filename), |
| - 'index.html') |
| - filedata[filename][month] = {'url': month_url, 'hits': data['hits'], 'bandwidth': data['bandwidth']} |
| - |
| - monthdata = {} |
| - for filename, data in filedata.iteritems(): |
| - outputfile = os.path.join(outputdir, |
| - common.filename_encode(server_type), |
| - 'overview-' + common.filename_encode(filename + '.html')) |
| - generate_file_overview(outputfile, baseURL + filename, data) |
| - |
| - if current_month in data: |
| - monthdata[filename] = dict(data[current_month]) |
| - |
| - outputfile = os.path.join(outputdir, common.filename_encode(server_type), 'index.html') |
| - generate_main_page(outputfile, current_month, baseURL, monthdata) |
| - |
| - |
| -if __name__ == '__main__': |
| - setupStderr() |
| - |
| - datadir = get_config().get('stats', 'dataDirectory') |
| - outputdir = get_config().get('stats', 'outputDirectory') |
| - generate_pages(datadir, outputdir) |