Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Side by Side Diff: sitescripts/filterhits/geometrical_mean.py

Issue 4615801646612480: Issue 395 - Filter hits statistics backend (Closed)
Patch Set: Improvements regarding comments Created Feb. 17, 2015, 10:50 a.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View unified diff | Download patch
OLDNEW
1 # coding: utf-8 1 # coding: utf-8
2 2
3 # This file is part of the Adblock Plus web scripts, 3 # This file is part of the Adblock Plus web scripts,
4 # Copyright (C) 2006-2014 Eyeo GmbH 4 # Copyright (C) 2006-2015 Eyeo GmbH
5 # 5 #
6 # Adblock Plus is free software: you can redistribute it and/or modify 6 # Adblock Plus is free software: you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License version 3 as 7 # it under the terms of the GNU General Public License version 3 as
8 # published by the Free Software Foundation. 8 # published by the Free Software Foundation.
9 # 9 #
10 # Adblock Plus is distributed in the hope that it will be useful, 10 # Adblock Plus is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of 11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details. 13 # GNU General Public License for more details.
14 # 14 #
15 # You should have received a copy of the GNU General Public License 15 # You should have received a copy of the GNU General Public License
16 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. 16 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.
17 17
18 import itertools 18 import itertools
19 import sitescripts.filterhits.db as db 19 import sitescripts.filterhits.db as db
20 from functools import partial 20 from functools import partial
21 21
22 def update_sql(interval, filter, domain, hits, timestamp):
23 """
24 Returns the SQL required to insert / update the hits for a
25 given domain + filter combo and to insert the filter if required.
26 """
27 filter = db.escape(filter)
28
29 return (
30 ("INSERT IGNORE INTO `filters` " +
31 "(filter, md5) VALUES ('%s', UNHEX(MD5(filter)));" +
32 "INSERT INTO `geometrical_mean` " +
33 "(filter_md5, domain, hits, timestamp) " +
34 "VALUES (UNHEX(MD5('%s')), '%s', %d, FROM_UNIXTIME(%d)) " +
35 "ON DUPLICATE KEY UPDATE " +
36 "hits = (" +
37 " POW(hits, 1 - (UNIX_TIMESTAMP(VALUES(timestamp)) - " +
38 " UNIX_TIMESTAMP(timestamp)) / %d) * " +
39 " POW(VALUES(hits), (UNIX_TIMESTAMP(VALUES(timestamp)) - " +
40 " UNIX_TIMESTAMP(timestamp)) / %d)), " +
41 "timestamp = VALUES(timestamp);") % (
42 filter,
43 filter, db.escape(domain), int(hits), int(timestamp),
44 int(interval), int(interval)))
45
46 def filter_hits(data): 22 def filter_hits(data):
47 """ 23 """
48 Generator that provides all filter hits for the given data, 24 Generator that provides all filter hits for the given data,
49 in tuples like (filter, domain, hits, latest). 25 in tuples like (filter, domain, hits, latest).
50 """ 26 """
51 for filter, filter_data in data['filters'].iteritems(): 27 for filter, filter_data in data['filters'].iteritems():
52 domains = itertools.chain(filter_data.get("thirdParty", {}).iteritems(), 28 domains = itertools.chain(filter_data.get("thirdParty", {}).iteritems(),
53 filter_data.get("firstParty", {}).iteritems()) 29 filter_data.get("firstParty", {}).iteritems())
54 for domain, domain_data in domains: 30 for domain, domain_data in domains:
55 yield (filter, domain, domain_data["hits"], domain_data["latest"] / 1000) 31 yield (filter, domain, domain_data["hits"], domain_data["latest"] / 1000)
56 32
33 def update_query(interval, filter, domain, hits, latest):
34 """
35 Function that takes the fields for a filter hit and returns them arranged
36 as the update SQL requires along with the SQL itself.
37 """
38 return (("""INSERT IGNORE INTO `filters`
39 (filter, sha1) VALUES (%s, UNHEX(SHA1(filter)))""",
40 filter),
41 ("""INSERT INTO `geometrical_mean`
42 (filter_sha1, domain, hits, timestamp)
43 VALUES (UNHEX(SHA1(%s)), %s, %s, FROM_UNIXTIME(%s))
44 ON DUPLICATE KEY UPDATE
45 hits = (
46 POW(hits, 1 - (UNIX_TIMESTAMP(VALUES(timestamp)) -
47 UNIX_TIMESTAMP(timestamp)) / %s) *
48 POW(VALUES(hits), (UNIX_TIMESTAMP(VALUES(timestamp)) -
49 UNIX_TIMESTAMP(timestamp)) / %s)),
50 timestamp = VALUES(timestamp)""",
51 filter, domain, hits, int(latest), interval, interval))
52
57 def update(interval, data): 53 def update(interval, data):
58 """ 54 """
59 Returns an iterator of all the SQL statements needed to 55 Returns an iterator of all the SQL and parameters needed to
60 update the aggregations for the given data + interval. 56 update the aggregations for the given data + interval in the database.
61 """ 57 """
62 return itertools.imap(lambda fields: apply(partial(update_sql, interval), fiel ds), 58 flatten = itertools.chain.from_iterable
63 filter_hits(data)) 59 return flatten(itertools.imap(lambda fields: update_query(interval, *fields),
Sebastian Noack 2015/02/17 14:59:17 I feel that you are kinda overusing itertools. A g
kzar 2015/02/24 18:05:11 Done.
60 filter_hits(data)))
OLDNEW

Powered by Google App Engine
This is Rietveld