Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Side by Side Diff: sitescripts/filterhits/geometrical_mean.py

Issue 4615801646612480: Issue 395 - Filter hits statistics backend (Closed)
Patch Set: Added API tests, addressed comments and some other improvements. Created Feb. 28, 2015, 7:36 p.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 # coding: utf-8
2
3 # This file is part of the Adblock Plus web scripts,
4 # Copyright (C) 2006-2015 Eyeo GmbH
5 #
6 # Adblock Plus is free software: you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License version 3 as
8 # published by the Free Software Foundation.
9 #
10 # Adblock Plus is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
14 #
15 # You should have received a copy of the GNU General Public License
16 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.
17
18 import itertools
19 import sitescripts.filterhits.db as db
20 from functools import partial
21
22 def filter_hits(data):
23 """
24 Generator that provides all filter hits for the given data,
25 in tuples like (filter, domain, hits, latest).
26 """
27 for filter, filter_data in data['filters'].iteritems():
28 domains = itertools.chain(filter_data.get("thirdParty", {}).iteritems(),
29 filter_data.get("firstParty", {}).iteritems())
30 for domain, domain_data in domains:
31 yield (filter, domain, domain_data["hits"], domain_data["latest"] / 1000)
32
33 def update_query(interval, filter, domain, hits, latest):
34 """
35 Function that takes the fields for a filter hit and returns them arranged
36 as the update SQL requires along with the SQL itself.
37 """
38 return (("""INSERT IGNORE INTO `filters`
39 (filter, sha1) VALUES (%s, UNHEX(SHA1(filter)))""",
40 filter),
41 ("""INSERT INTO `geometrical_mean`
42 (filter_sha1, domain, hits, timestamp)
43 VALUES (UNHEX(SHA1(%s)), %s, %s, FROM_UNIXTIME(%s))
44 ON DUPLICATE KEY UPDATE
45 hits = (
46 POW(hits, 1 - (UNIX_TIMESTAMP(VALUES(timestamp)) -
47 UNIX_TIMESTAMP(timestamp)) / %s) *
48 POW(VALUES(hits), (UNIX_TIMESTAMP(VALUES(timestamp)) -
49 UNIX_TIMESTAMP(timestamp)) / %s)),
50 timestamp = VALUES(timestamp)""",
51 filter, domain, hits, int(latest), interval, interval))
52
53 def update(interval, data):
54 """
55 Returns an iterator of all the SQL and parameters needed to
56 update the aggregations for the given data + interval in the database.
57 """
58 for fields in filter_hits(data):
59 for query in update_query(interval, *fields):
60 yield query
OLDNEW

Powered by Google App Engine
This is Rietveld