Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Side by Side Diff: sitescripts/filterhits/bin/process_logs.py

Issue 4615801646612480: Issue 395 - Filter hits statistics backend (Closed)
Patch Set: Improvements regarding comments Created Feb. 17, 2015, 10:50 a.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View unified diff | Download patch
OLDNEW
1 # coding: utf-8 1 # coding: utf-8
2 2
3 # This file is part of the Adblock Plus web scripts, 3 # This file is part of the Adblock Plus web scripts,
4 # Copyright (C) 2006-2014 Eyeo GmbH 4 # Copyright (C) 2006-2015 Eyeo GmbH
5 # 5 #
6 # Adblock Plus is free software: you can redistribute it and/or modify 6 # Adblock Plus is free software: you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License version 3 as 7 # it under the terms of the GNU General Public License version 3 as
8 # published by the Free Software Foundation. 8 # published by the Free Software Foundation.
9 # 9 #
10 # Adblock Plus is distributed in the hope that it will be useful, 10 # Adblock Plus is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of 11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details. 13 # GNU General Public License for more details.
14 # 14 #
15 # You should have received a copy of the GNU General Public License 15 # You should have received a copy of the GNU General Public License
16 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. 16 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.
17 17
18 import MySQLdb, itertools, json, os, sys 18 import MySQLdb, itertools, json, os, sys
19 from sitescripts.utils import get_config 19 from sitescripts.utils import get_config
20 import sitescripts.filterhits.common as common 20 import sitescripts.filterhits.common as common
21 import sitescripts.filterhits.db as db 21 import sitescripts.filterhits.db as db
22 import sitescripts.filterhits.geometrical_mean as geometrical_mean 22 import sitescripts.filterhits.geometrical_mean as geometrical_mean
23 23
24 last_log_file = None 24 last_log_file = None
25 25
26 def log_files(dir): 26 def log_files(dir):
27 """ 27 """
28 Provides a generator of filter hits log files for the given directory. 28 Provides a generator of filter hits log files for the given directory.
29 Works recursively, relative path of log file is returned. 29 Works recursively, relative path of log file is returned.
30 """ 30 """
31 for root, subdirs, files in os.walk(dir): 31 for root, subdirs, files in os.walk(dir):
32 for f in files: 32 for f in files:
33 if f.endswith(".log") and f[0].isdigit(): 33 if os.path.splitext(f)[1] == ".log" and f[0].isdigit():
34 yield os.path.join(root, f) 34 yield os.path.join(root, f)
35 35
36 def read_data(log_file): 36 def read_data(log_file):
37 """ 37 """
38 Read, parse and return the JSON data for the given log file name. 38 Read, parse and return the JSON data for the given log file name.
39 (As a side effect sets the global last_log_file to the log file name.) 39 (As a side effect sets the global last_log_file to the log file name.)
40 """ 40 """
41 global last_log_file 41 global last_log_file
42 try: 42 try:
43 with open(log_file, "r") as f: 43 with open(log_file, "r") as f:
44 # Skip past the date and GET parameters 44 # Skip past the date and GET parameters
45 s = "" 45 current = last = None
46 while s != "\" ": 46 while not (last == '"' and current == " "):
Sebastian Noack 2015/02/17 14:59:17 If you use the != instead the == operator in the f
kzar 2015/02/24 18:05:11 I'm aware of Demorgan's law but I think the intent
Sebastian Noack 2015/02/26 16:39:25 I'd rather say, as more logical operations involve
kzar 2015/02/28 19:39:56 Well what we're saying is something like "While th
Sebastian Noack 2015/03/02 10:04:01 Not sure whether I like it, but fair enough.
47 s = s[-1:] + f.read(1) 47 last, current = current, f.read(1)
48 if not current:
49 sys.exit("Unexpected EOF in log file %s" % log_file)
50
48 # Read the JSON 51 # Read the JSON
49 data = json.load(f) 52 data = json.load(f)
50 # Keep track of the current log file in global variable in case we need to 53 # Keep track of the current log file in global variable in case we need to
51 # identify it later if there's a problem. (This works because the files ar e 54 # identify it later if there's a problem. (This works because the files ar e
52 # processed lazily.) 55 # processed lazily.)
53 last_log_file = log_file 56 last_log_file = log_file
54 except IOError: 57 except IOError:
55 sys.exit("Could not read log file %s" % log_file) 58 sys.exit("Could not read log file %s" % log_file)
56 if not common.valid_log_data(data):
57 sys.exit("Invalid data in log file %s." % log_file)
58 return data 59 return data
59 60
60 if __name__ == "__main__": 61 if __name__ == "__main__":
61 if not len(sys.argv) == 2: 62 if not len(sys.argv) == 2:
62 print "Usage: python -m sitescripts.filterhits.bin.process_logs /path/to/log s" 63 print "Usage: python -m sitescripts.filterhits.bin.process_logs /path/to/log s"
63 sys.exit(1) 64 sys.exit(1)
64 65
65 config = get_config() 66 config = get_config()
66 interval = config.get("filterhitstats", "interval") 67 interval = config.get("filterhitstats", "interval")
67 68
68 def read_update(f): 69 def read_update(f):
69 return geometrical_mean.update(interval, read_data(f)) 70 return geometrical_mean.update(interval, read_data(f))
70 71
71 if sys.argv[1].endswith(".log"): 72 if sys.argv[1].endswith(".log"):
72 sql = read_update(sys.argv[1]) 73 sql = read_update(sys.argv[1])
73 else: 74 else:
74 sql = itertools.chain.from_iterable(itertools.imap(read_update, 75 sql = itertools.chain.from_iterable(itertools.imap(read_update,
75 log_files(sys.argv[1]))) 76 log_files(sys.argv[1])))
76 77
77 try: 78 try:
78 db.connect(config.get("filterhitstats", "dbuser"), 79 db_connection = db.connect(config.get("filterhitstats", "dbuser"),
79 config.get("filterhitstats", "dbpassword"), 80 config.get("filterhitstats", "dbpassword"),
80 config.get("filterhitstats", "database")) 81 config.get("filterhitstats", "database"))
81 db.write(sql) 82 db.write(db_connection, sql)
82 except MySQLdb.Error, e: 83 except MySQLdb.Error, e:
83 sys.exit("Failed to process file %s, all changes rolled back. MySQl error (% d): \"%s\"\n" % ( 84 sys.exit("Failed to process file %s, all changes rolled back. MySQl error (% d): \"%s\"\n" % (
84 last_log_file, e.args[0], e.args[1] 85 last_log_file, e.args[0], e.args[1]
85 )) 86 ))
86 finally: 87 finally:
87 db.disconnect() 88 if db_connection:
89 db_connection.close()
OLDNEW

Powered by Google App Engine
This is Rietveld