| Index: sitescripts/filterhits/geometrical_mean.py |
| diff --git a/sitescripts/filterhits/geometrical_mean.py b/sitescripts/filterhits/geometrical_mean.py |
| index 00bd3e43cc83a91ae7b8be72f485e910fbb98285..f23b231e52770d7b2c022019fb5a1dff17e4a2bd 100644 |
| --- a/sitescripts/filterhits/geometrical_mean.py |
| +++ b/sitescripts/filterhits/geometrical_mean.py |
| @@ -1,7 +1,7 @@ |
| # coding: utf-8 |
| # This file is part of the Adblock Plus web scripts, |
| -# Copyright (C) 2006-2014 Eyeo GmbH |
| +# Copyright (C) 2006-2015 Eyeo GmbH |
| # |
| # Adblock Plus is free software: you can redistribute it and/or modify |
| # it under the terms of the GNU General Public License version 3 as |
| @@ -19,34 +19,10 @@ import itertools |
| import sitescripts.filterhits.db as db |
| from functools import partial |
| -def update_sql(interval, filter, domain, hits, timestamp): |
| - """ |
| - Returns the SQL required to insert / update the hits for a |
| - given domain + filter combo and to insert the filter if required. |
| - """ |
| - filter = db.escape(filter) |
| - |
| - return ( |
| - ("INSERT IGNORE INTO `filters` " + |
| - "(filter, md5) VALUES ('%s', UNHEX(MD5(filter)));" + |
| - "INSERT INTO `geometrical_mean` " + |
| - "(filter_md5, domain, hits, timestamp) " + |
| - "VALUES (UNHEX(MD5('%s')), '%s', %d, FROM_UNIXTIME(%d)) " + |
| - "ON DUPLICATE KEY UPDATE " + |
| - "hits = (" + |
| - " POW(hits, 1 - (UNIX_TIMESTAMP(VALUES(timestamp)) - " + |
| - " UNIX_TIMESTAMP(timestamp)) / %d) * " + |
| - " POW(VALUES(hits), (UNIX_TIMESTAMP(VALUES(timestamp)) - " + |
| - " UNIX_TIMESTAMP(timestamp)) / %d)), " + |
| - "timestamp = VALUES(timestamp);") % ( |
| - filter, |
| - filter, db.escape(domain), int(hits), int(timestamp), |
| - int(interval), int(interval))) |
| - |
| def filter_hits(data): |
| """ |
| - Generator that provides all filter hits for the given data, |
| - in tuples like (filter, domain, hits, latest). |
| + Generator that provides all filter hits for the given data, |
| + in tuples like (filter, domain, hits, latest). |
| """ |
| for filter, filter_data in data['filters'].iteritems(): |
| domains = itertools.chain(filter_data.get("thirdParty", {}).iteritems(), |
| @@ -54,10 +30,31 @@ def filter_hits(data): |
| for domain, domain_data in domains: |
| yield (filter, domain, domain_data["hits"], domain_data["latest"] / 1000) |
| +def update_query(interval, filter, domain, hits, latest): |
| + """ |
| + Function that takes the fields for a filter hit and returns them arranged |
| + as the update SQL requires along with the SQL itself. |
| + """ |
| + return (("""INSERT IGNORE INTO `filters` |
| + (filter, sha1) VALUES (%s, UNHEX(SHA1(filter)))""", |
| + filter), |
| + ("""INSERT INTO `geometrical_mean` |
| + (filter_sha1, domain, hits, timestamp) |
| + VALUES (UNHEX(SHA1(%s)), %s, %s, FROM_UNIXTIME(%s)) |
| + ON DUPLICATE KEY UPDATE |
| + hits = ( |
| + POW(hits, 1 - (UNIX_TIMESTAMP(VALUES(timestamp)) - |
| + UNIX_TIMESTAMP(timestamp)) / %s) * |
| + POW(VALUES(hits), (UNIX_TIMESTAMP(VALUES(timestamp)) - |
| + UNIX_TIMESTAMP(timestamp)) / %s)), |
| + timestamp = VALUES(timestamp)""", |
| + filter, domain, hits, int(latest), interval, interval)) |
| + |
| def update(interval, data): |
| """ |
| - Returns an iterator of all the SQL statements needed to |
| - update the aggregations for the given data + interval. |
| + Returns an iterator of all the SQL and parameters needed to |
| + update the aggregations for the given data + interval in the database. |
| """ |
| - return itertools.imap(lambda fields: apply(partial(update_sql, interval), fields), |
| - filter_hits(data)) |
| + flatten = itertools.chain.from_iterable |
| + return flatten(itertools.imap(lambda fields: update_query(interval, *fields), |
|
Sebastian Noack
2015/02/17 14:59:17
I feel that you are kinda overusing itertools. A g
kzar
2015/02/24 18:05:11
Done.
|
| + filter_hits(data))) |