sitescripts/stats/bin/pagegenerator.py - Issue 29934561: #1537 - Remove stats processing from sitescripts

Unified Diff: sitescripts/stats/bin/pagegenerator.py

Issue 29934561: #1537 - Remove stats processing from sitescripts (Closed) Base URL: https://hg.adblockplus.org/sitescripts

Patch Set: Created Nov. 2, 2018, 12:42 p.m.

Use n/p to move between diff chunks; N/P to move between comments.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: sitescripts/stats/bin/pagegenerator.py

===================================================================

deleted file mode 100644

--- a/sitescripts/stats/bin/pagegenerator.py

+++ /dev/null

@@ -1,189 +0,0 @@

-# This file is part of the Adblock Plus web scripts,

-# Adblock Plus is free software: you can redistribute it and/or modify

-# it under the terms of the GNU General Public License version 3 as

-# published by the Free Software Foundation.

-# Adblock Plus is distributed in the hope that it will be useful,

-# but WITHOUT ANY WARRANTY; without even the implied warranty of

-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

-# GNU General Public License for more details.

-# You should have received a copy of the GNU General Public License

-# along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.

-import os

-import re

-import codecs

-import json

-import time

-import itertools

-from datetime import date

-from sitescripts.utils import get_config, setupStderr, get_custom_template_environment, cached

-import sitescripts.stats.common as common

-from sitescripts.stats.countrycodes import countrycodes

-@cached(float('inf'))

-def get_template_environment():

- return get_custom_template_environment({

- 'monthname': lambda value: date(int(value[0:4]), int(value[4:]), 1).strftime('%b %Y'),

- 'weekday': lambda value: ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'][int(value)],

- 'countryname': lambda value: countrycodes.get(value, 'Unknown'),

- 'sortfield': lambda value, field: (field['sort'] if 'sort' in field else default_sort)(value),

- 'maxhits': lambda items: max(value['hits'] for key, value in items),

- 'maxbandwidth': lambda items: max(value['bandwidth'] for key, value in items),

- 'sumhits': lambda items: sum(value['hits'] for key, value in items),

- 'sumbandwidth': lambda items: sum(value['bandwidth'] for key, value in items),

- 'percentage': lambda value, total: float(value) / total * 100 if total != 0 else 0,

- 'isspecial': lambda name, field: field['isspecial'](name) if 'isspecial' in field else False,

- 'defaultcount': get_default_count,

- })

-@cached(float('inf'))

-def get_main_page_template():

- return get_template_environment().get_template(get_config().get('stats', 'mainPageTemplate'))

-@cached(float('inf'))

-def get_file_stats_template():

- return get_template_environment().get_template(get_config().get('stats', 'filePageTemplate'))

-@cached(float('inf'))

-def get_file_overview_template():

- return get_template_environment().get_template(get_config().get('stats', 'fileOverviewTemplate'))

-def default_sort(obj):

- return sorted(obj.items(), key=lambda (k, v): v['hits'], reverse=True)

-def ensure_dir(path):

- dir = os.path.dirname(path)

- try:

- os.makedirs(dir)

- except OSError:

- pass

-def generate_main_page(outputfile, month, url, data):

- ensure_dir(outputfile)

- get_main_page_template().stream({

- 'now': time.time(),

- 'month': month,

- 'url': url,

- 'data': data,

- }).dump(outputfile, encoding='utf-8')

-def generate_file_stats(outputfile, month, url, overview_url, data, filter=None, filtered_urls={}):

- ensure_dir(outputfile)

- get_file_stats_template().stream({

- 'now': time.time(),

- 'month': month,

- 'url': url,

- 'overview_url': overview_url,

- 'data': data,

- 'fields': common.fields,

- 'filter': filter,

- 'filtered_urls': filtered_urls,

- }).dump(outputfile, encoding='utf-8')

-def generate_file_overview(outputfile, url, data):

- ensure_dir(outputfile)

- get_file_overview_template().stream({

- 'now': time.time(),

- 'url': url,

- 'data': data,

- }).dump(outputfile, encoding='utf-8')

-def get_names(dir, needdirectories):

- for file in os.listdir(dir):

- path = os.path.join(dir, file)

- if (needdirectories and os.path.isdir(path)) or (not needdirectories and os.path.isfile(path)):

- yield common.filename_decode(file), path

-def get_default_count(field):

- return field.get('defaultcount', 30)

-def generate_pages(datadir, outputdir):

- for server_type, server_type_dir in get_names(datadir, True):

- baseURL = get_config().get('stats', 'baseURL_' + server_type)

- filedata = {}

- current_month = None

- for month, month_dir in get_names(server_type_dir, True):

- if current_month == None or month > current_month:

- current_month = month

- for filename, path in get_names(month_dir, False):

- filename = re.sub(r'\.json$', '', filename)

- with codecs.open(path, 'rb', encoding='utf-8') as file:

- data = json.load(file)

- overview_url = '../../overview-' + common.filename_encode(filename + '.html')

- filtered_urls = {}

- for field in common.fields:

- if field['name'] not in data:

- continue

- # Create filtered views for the first thirty values of a field if they

- # have filtered data.

- sorted_field = get_template_environment().filters['sortfield'](data[field['name']], field)

- for name, value in sorted_field[0:get_default_count(field)]:

- if filter(lambda k: k not in ('hits', 'bandwidth'), value.iterkeys()):

- outputfile = os.path.join(outputdir,

- common.filename_encode(server_type),

- common.filename_encode(month),

- common.filename_encode(filename),

- 'filtered-%s-%s.html' % (

- common.filename_encode(field['name']),

- common.filename_encode(name),

- ))

- generate_file_stats(outputfile, month, baseURL + filename, overview_url,

- value, filter={'field': field, 'value': name})

- if not field['name'] in filtered_urls:

- filtered_urls[field['name']] = {}

- filtered_urls[field['name']][name] = os.path.basename(outputfile)

- outputfile = os.path.join(outputdir,

- common.filename_encode(server_type),

- common.filename_encode(month),

- common.filename_encode(filename),

- 'index.html')

- generate_file_stats(outputfile, month, baseURL + filename, overview_url,

- data, filtered_urls=filtered_urls)

- if filename not in filedata:

- filedata[filename] = {}

- month_url = '%s/%s/%s' % (common.filename_encode(month),

- common.filename_encode(filename),

- 'index.html')

- filedata[filename][month] = {'url': month_url, 'hits': data['hits'], 'bandwidth': data['bandwidth']}

- monthdata = {}

- for filename, data in filedata.iteritems():

- outputfile = os.path.join(outputdir,

- common.filename_encode(server_type),

- 'overview-' + common.filename_encode(filename + '.html'))

- generate_file_overview(outputfile, baseURL + filename, data)

- if current_month in data:

- monthdata[filename] = dict(data[current_month])

- outputfile = os.path.join(outputdir, common.filename_encode(server_type), 'index.html')

- generate_main_page(outputfile, current_month, baseURL, monthdata)

-if __name__ == '__main__':

- setupStderr()

- datadir = get_config().get('stats', 'dataDirectory')

- outputdir = get_config().get('stats', 'outputDirectory')

- generate_pages(datadir, outputdir)

« no previous file with comments | « sitescripts/stats/bin/logprocessor.py ('k') | sitescripts/stats/common.py » ('j') | tox.ini » ('J')