Left: | ||
Right: |
OLD | NEW |
---|---|
1 # coding: utf-8 | 1 # coding: utf-8 |
2 | 2 |
3 # This file is part of the Adblock Plus web scripts, | 3 # This file is part of the Adblock Plus web scripts, |
4 # Copyright (C) 2006-2014 Eyeo GmbH | 4 # Copyright (C) 2006-2015 Eyeo GmbH |
5 # | 5 # |
6 # Adblock Plus is free software: you can redistribute it and/or modify | 6 # Adblock Plus is free software: you can redistribute it and/or modify |
7 # it under the terms of the GNU General Public License version 3 as | 7 # it under the terms of the GNU General Public License version 3 as |
8 # published by the Free Software Foundation. | 8 # published by the Free Software Foundation. |
9 # | 9 # |
10 # Adblock Plus is distributed in the hope that it will be useful, | 10 # Adblock Plus is distributed in the hope that it will be useful, |
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of | 11 # but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | 12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
13 # GNU General Public License for more details. | 13 # GNU General Public License for more details. |
14 # | 14 # |
15 # You should have received a copy of the GNU General Public License | 15 # You should have received a copy of the GNU General Public License |
16 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. | 16 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. |
17 | 17 |
18 import MySQLdb, itertools, json, os, sys | 18 import MySQLdb, itertools, json, os, sys |
19 from sitescripts.utils import get_config | 19 from sitescripts.utils import get_config |
20 import sitescripts.filterhits.common as common | 20 import sitescripts.filterhits.common as common |
21 import sitescripts.filterhits.db as db | 21 import sitescripts.filterhits.db as db |
22 import sitescripts.filterhits.geometrical_mean as geometrical_mean | 22 import sitescripts.filterhits.geometrical_mean as geometrical_mean |
23 | 23 |
24 last_log_file = None | 24 last_log_file = None |
25 | 25 |
26 def log_files(dir): | 26 def log_files(dir): |
27 """ | 27 """ |
28 Provides a generator of filter hits log files for the given directory. | 28 Provides a generator of filter hits log files for the given directory. |
29 Works recursively, relative path of log file is returned. | 29 Works recursively, relative path of log file is returned. |
30 """ | 30 """ |
31 for root, subdirs, files in os.walk(dir): | 31 for root, subdirs, files in os.walk(dir): |
32 for f in files: | 32 for f in files: |
33 if f.endswith(".log") and f[0].isdigit(): | 33 if os.path.splitext(f)[1] == ".log" and f[0].isdigit(): |
34 yield os.path.join(root, f) | 34 yield os.path.join(root, f) |
35 | 35 |
36 def read_data(log_file): | 36 def read_data(log_file): |
37 """ | 37 """ |
38 Read, parse and return the JSON data for the given log file name. | 38 Read, parse and return the JSON data for the given log file name. |
39 (As a side effect sets the global last_log_file to the log file name.) | 39 (As a side effect sets the global last_log_file to the log file name.) |
40 """ | 40 """ |
41 global last_log_file | 41 global last_log_file |
42 try: | 42 try: |
43 with open(log_file, "r") as f: | 43 with open(log_file, "r") as f: |
44 # Skip past the date and GET parameters | 44 # Skip past the date and GET parameters |
45 s = "" | 45 current = last = None |
46 while s != "\" ": | 46 while not (last == '"' and current == " "): |
Sebastian Noack
2015/02/17 14:59:17
If you use the != instead the == operator in the f
kzar
2015/02/24 18:05:11
I'm aware of Demorgan's law but I think the intent
Sebastian Noack
2015/02/26 16:39:25
I'd rather say, as more logical operations involve
kzar
2015/02/28 19:39:56
Well what we're saying is something like "While th
Sebastian Noack
2015/03/02 10:04:01
Not sure whether I like it, but fair enough.
| |
47 s = s[-1:] + f.read(1) | 47 last, current = current, f.read(1) |
48 if not current: | |
49 sys.exit("Unexpected EOF in log file %s" % log_file) | |
50 | |
48 # Read the JSON | 51 # Read the JSON |
49 data = json.load(f) | 52 data = json.load(f) |
50 # Keep track of the current log file in global variable in case we need to | 53 # Keep track of the current log file in global variable in case we need to |
51 # identify it later if there's a problem. (This works because the files ar e | 54 # identify it later if there's a problem. (This works because the files ar e |
52 # processed lazily.) | 55 # processed lazily.) |
53 last_log_file = log_file | 56 last_log_file = log_file |
54 except IOError: | 57 except IOError: |
55 sys.exit("Could not read log file %s" % log_file) | 58 sys.exit("Could not read log file %s" % log_file) |
56 if not common.valid_log_data(data): | |
57 sys.exit("Invalid data in log file %s." % log_file) | |
58 return data | 59 return data |
59 | 60 |
60 if __name__ == "__main__": | 61 if __name__ == "__main__": |
61 if not len(sys.argv) == 2: | 62 if not len(sys.argv) == 2: |
62 print "Usage: python -m sitescripts.filterhits.bin.process_logs /path/to/log s" | 63 print "Usage: python -m sitescripts.filterhits.bin.process_logs /path/to/log s" |
63 sys.exit(1) | 64 sys.exit(1) |
64 | 65 |
65 config = get_config() | 66 config = get_config() |
66 interval = config.get("filterhitstats", "interval") | 67 interval = config.get("filterhitstats", "interval") |
67 | 68 |
68 def read_update(f): | 69 def read_update(f): |
69 return geometrical_mean.update(interval, read_data(f)) | 70 return geometrical_mean.update(interval, read_data(f)) |
70 | 71 |
71 if sys.argv[1].endswith(".log"): | 72 if sys.argv[1].endswith(".log"): |
72 sql = read_update(sys.argv[1]) | 73 sql = read_update(sys.argv[1]) |
73 else: | 74 else: |
74 sql = itertools.chain.from_iterable(itertools.imap(read_update, | 75 sql = itertools.chain.from_iterable(itertools.imap(read_update, |
75 log_files(sys.argv[1]))) | 76 log_files(sys.argv[1]))) |
76 | 77 |
77 try: | 78 try: |
78 db.connect(config.get("filterhitstats", "dbuser"), | 79 db_connection = db.connect(config.get("filterhitstats", "dbuser"), |
79 config.get("filterhitstats", "dbpassword"), | 80 config.get("filterhitstats", "dbpassword"), |
80 config.get("filterhitstats", "database")) | 81 config.get("filterhitstats", "database")) |
81 db.write(sql) | 82 db.write(db_connection, sql) |
82 except MySQLdb.Error, e: | 83 except MySQLdb.Error, e: |
83 sys.exit("Failed to process file %s, all changes rolled back. MySQl error (% d): \"%s\"\n" % ( | 84 sys.exit("Failed to process file %s, all changes rolled back. MySQl error (% d): \"%s\"\n" % ( |
84 last_log_file, e.args[0], e.args[1] | 85 last_log_file, e.args[0], e.args[1] |
85 )) | 86 )) |
86 finally: | 87 finally: |
87 db.disconnect() | 88 if db_connection: |
89 db_connection.close() | |
OLD | NEW |