Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Delta Between Two Patch Sets: sitescripts/filterhits/geometrical_mean.py

Issue 4615801646612480: Issue 395 - Filter hits statistics backend (Closed)
Left Patch Set: Simplified geometrical_mean code and reduced filter inserts. Created March 16, 2015, 4:24 p.m.
Right Patch Set: Addressed further comments from Sebastian. Created April 2, 2015, 10:13 a.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
Left: Side by side diff | Download
Right: Side by side diff | Download
« no previous file with change/comment | « sitescripts/filterhits/db.py ('k') | sitescripts/filterhits/schema.sql » ('j') | no next file with change/comment »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
LEFTRIGHT
1 # coding: utf-8 1 # coding: utf-8
2 2
3 # This file is part of the Adblock Plus web scripts, 3 # This file is part of the Adblock Plus web scripts,
4 # Copyright (C) 2006-2015 Eyeo GmbH 4 # Copyright (C) 2006-2015 Eyeo GmbH
5 # 5 #
6 # Adblock Plus is free software: you can redistribute it and/or modify 6 # Adblock Plus is free software: you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License version 3 as 7 # it under the terms of the GNU General Public License version 3 as
8 # published by the Free Software Foundation. 8 # published by the Free Software Foundation.
9 # 9 #
10 # Adblock Plus is distributed in the hope that it will be useful, 10 # Adblock Plus is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of 11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details. 13 # GNU General Public License for more details.
14 # 14 #
15 # You should have received a copy of the GNU General Public License 15 # You should have received a copy of the GNU General Public License
16 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. 16 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.
17 17
18 import itertools 18 import itertools
19
19 from sitescripts.filterhits import db 20 from sitescripts.filterhits import db
20 21
21 def update(interval, data): 22 def update(interval, data):
22 """ 23 """
23 Generator that provides all the SQL and parameters needed to update the 24 Generator that provides all the SQL and parameters needed to update the
24 aggregations for the given data + interval in the database. 25 aggregations for the given data + interval in the database.
25 """ 26 """
26 for filter, filter_data in data['filters'].iteritems(): 27 for filter, filter_data in data["filters"].iteritems():
27 yield ("""INSERT IGNORE INTO `filters` 28 yield ("""INSERT IGNORE INTO `filters`
28 (filter, sha1) VALUES (%s, UNHEX(SHA1(filter)))""", filter) 29 (filter, sha1) VALUES (%s, UNHEX(SHA1(filter)))""", filter)
29 30
30 domains = itertools.chain(filter_data.get("thirdParty", {}).iteritems(), 31 domains = itertools.chain(filter_data.get("thirdParty", {}).iteritems(),
31 filter_data.get("firstParty", {}).iteritems()) 32 filter_data.get("firstParty", {}).iteritems())
32 for domain, domain_data in domains: 33 for domain, domain_data in domains:
33 yield ("""INSERT INTO `geometrical_mean` 34 yield ("""INSERT INTO `frequencies`
Wladimir Palant 2015/03/26 22:56:50 geometrical_mean is merely the aggregation approac
kzar 2015/03/27 11:59:57 (I chose the plural to keep consistent with the ot
34 (filter_sha1, domain, hits, timestamp) 35 (filter_sha1, domain, frequency, timestamp)
35 VALUES (UNHEX(SHA1(%s)), %s, %s, FROM_UNIXTIME(%s)) 36 VALUES (UNHEX(SHA1(%s)), %s, %s, FROM_UNIXTIME(%s))
36 ON DUPLICATE KEY UPDATE 37 ON DUPLICATE KEY UPDATE
37 hits = ( 38 frequency = (
38 POW(hits, 1 - (UNIX_TIMESTAMP(VALUES(timestamp)) - 39 POW(frequency, 1 - (UNIX_TIMESTAMP(VALUES(timestamp)) -
39 UNIX_TIMESTAMP(timestamp)) / %s) * 40 UNIX_TIMESTAMP(timestamp)) / %s) *
40 POW(VALUES(hits), (UNIX_TIMESTAMP(VALUES(timestamp)) - 41 POW(VALUES(frequency), (UNIX_TIMESTAMP(VALUES(timestamp)) -
41 UNIX_TIMESTAMP(timestamp)) / %s)), 42 UNIX_TIMESTAMP(timestamp)) / %s)),
42 timestamp = VALUES(timestamp)""", 43 timestamp = VALUES(timestamp)""",
43 filter, domain, domain_data["hits"], 44 filter, domain, domain_data["hits"],
44 int(domain_data["latest"] / 1000), interval, interval) 45 int(domain_data["latest"] / 1000), interval, interval)
LEFTRIGHT

Powered by Google App Engine
This is Rietveld