Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Side by Side Diff: modules/nagios/files/check_bandwidth

Issue 12375002: Implement more detailed bandwidth monitoring (Closed)
Patch Set: Increased socket timeout and offloaded time calculations to tcpdump Created Oct. 9, 2013, 7:36 a.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View unified diff | Download patch
OLDNEW
1 #!/usr/bin/env python 1 #!/usr/bin/env python
2 2
3 import os, re, subprocess, sys 3 import os, re, subprocess, sys, socket, struct, fcntl
4
5 INTERVAL = 5
4 6
5 def format_bandwidth(bits): 7 def format_bandwidth(bits):
6 if bits >= 1000000: 8 if bits >= 1000000:
7 return "%.2f Mbit/s" % (bits / 1000000) 9 return "%.2f Mbit/s" % (bits / 1000000)
8 elif bits >= 1000: 10 elif bits >= 1000:
9 return "%.2f kbit/s" % (bits / 1000) 11 return "%.2f kbit/s" % (bits / 1000)
10 else: 12 else:
11 return "%.2f bit/s" % bits 13 return "%.2f bit/s" % bits
12 14
15 def getmacaddress():
16 # We are calling SIOCGIFHWADDR (0x8927 according to man ioctl_list) here. See
17 # man netdevice for the request structure: it has to start with 16 bytes
18 # containing the interface name, the OS will write 8 bytes after that (2 bytes
19 # family name and 6 bytes actual MAC address).
20 s = socket.socket()
21 return fcntl.ioctl(s.fileno(), 0x8927, struct.pack("24s", "eth0"))[18:24]
Felix Dahlke 2013/10/10 08:29:08 I'm pretty sure this will only work on Linux like
Wladimir Palant 2013/10/10 09:37:46 Yes, I've seen that. This function looks very much
22
13 if __name__ == "__main__": 23 if __name__ == "__main__":
14 if len(sys.argv) != 3: 24 if len(sys.argv) != 3:
15 script_name = os.path.basename(sys.argv[0]) 25 script_name = os.path.basename(sys.argv[0])
16 print "Usage: %s WARN CRIT" % script_name 26 print "Usage: %s WARN CRIT" % script_name
17 sys.exit(0) 27 sys.exit(0)
18 28
19 (warn, crit) = sys.argv[1:3] 29 (warn, crit) = sys.argv[1:3]
20 warn = int(sys.argv[1]) 30 warn = int(sys.argv[1])
21 crit = int(sys.argv[2]) 31 crit = int(sys.argv[2])
22 32
23 process_output = subprocess.check_output(["bwm-ng", "-I", "eth0", "-t", "5000" , "-c", "1", "-o", "csv"]) 33 process = subprocess.Popen(
24 data = process_output.splitlines()[0].split(";") 34 ["sudo", "tcpdump", "-q", "-s", "64", "-G", str(INTERVAL), "-W", "1", "-w", "-"],
25 tx = float(data[2]) * 8 35 stdout=subprocess.PIPE, stderr=subprocess.PIPE)
26 rx = float(data[3]) * 8 36 mac = getmacaddress()
27 status = "rx %s tx %s" % (format_bandwidth(rx), format_bandwidth(tx))
28 37
29 perfdata = "rx=%i;%i;%i tx=%i;%i;%i" % (rx, warn, crit, tx, warn, crit) 38 total = {"rx": 0, "tx": 0}
39 http = {"rx": 0, "tx": 0}
40 https = {"rx": 0, "tx": 0}
41 ssh = {"rx": 0, "tx": 0}
42 dns = {"rx": 0, "tx": 0}
43 other = {"rx": 0, "tx": 0}
44 other_detailed = {}
30 45
31 output = "%s|%s" % (status, perfdata) 46 # See http://wiki.wireshark.org/Development/LibpcapFileFormat for libpcap form at description
47 magic_number, _, _, _, _, _, _ = struct.unpack("IHHiIII", process.stdout.read( 24))
Felix Dahlke 2013/10/10 08:29:08 I'd find this more readable if the result of proce
48 if magic_number != 0xa1b2c3d4:
49 raise Exception("Unexpected format")
Felix Dahlke 2013/10/10 08:29:08 "Unsupported byte order" or something along those
Wladimir Palant 2013/10/10 09:37:46 No, there can be no other byte order - this script
50 while True:
51 header = process.stdout.read(16)
Felix Dahlke 2013/10/10 08:29:08 header -> record_header?
52 if header == "":
53 break;
54 _, _, incl_len, orig_len = struct.unpack("IIII", header)
32 55
33 if rx >= crit or tx >= crit: 56 # Convert bytes to bits and normalize to seconds
57 length = float(orig_len * 8) / INTERVAL
Felix Dahlke 2013/10/10 08:29:08 length -> bits_per_second?
Wladimir Palant 2013/10/10 09:37:46 bps?
Felix Dahlke 2013/10/10 09:44:34 Sure.
58
59 def add_other(description):
60 other[direction] += length
61 other_detailed[description] = other_detailed.get(description, 0) + length
62
63 payload = process.stdout.read(incl_len)
64
65 # Unpack Ethernet frame, http://en.wikipedia.org/wiki/Ethernet_frame#Structu re
66 destination, source, protocol = struct.unpack("!6s6sH", payload[:14])
Felix Dahlke 2013/10/10 08:29:08 1. Shouldn't the offset be 8 instead of 14? What a
Wladimir Palant 2013/10/10 09:37:46 14 isn't offset but length here. The Ethernet fram
67 payload = payload[14:]
Felix Dahlke 2013/10/10 08:29:08 The payload should be the field after EtherType ac
Wladimir Palant 2013/10/10 09:37:46 As with the previous comment, preamble isn't being
68 direction = "rx" if destination == mac else "tx"
69 total[direction] += length
70
71 # Check Level 3 protocol
72 if protocol == 0x0800: # IPv4, http://en.wikipedia.org/wiki/Internet_Pro tocol_version_4#Header
73 ihl = ord(payload[0]) & 0xF
Felix Dahlke 2013/10/10 08:29:08 Shouldn't it be 0x4? 0xF would get us both version
Wladimir Palant 2013/10/10 09:37:46 No, that's correct - "& 0xF0" gives you the first
Felix Dahlke 2013/10/10 09:44:34 Um, yes, I confused something there, it's the numb
74 protocol = ord(payload[9])
75 payload = payload[ihl * 4:]
76 elif protocol == 0x86DD: # IPv6, http://en.wikipedia.org/wiki/IPv6_packet# Fixed_header
77 protocol = ord(payload[6])
78 payload = payload[40:]
79 else:
80 add_other("L3 0x%04X" % protocol)
81 continue
82
83 # Check Level 4 protocol
84 if protocol in (0x06, 0x11): # TCP, UDP
85 # The lower port number should be the real port, the other one will be
Felix Dahlke 2013/10/10 08:29:08 I think this comment should move down a bit, on to
86 # the ephemeral port.
87 source_port, destination_port = struct.unpack('!HH', payload[:4])
88 protocol = "TCP" if protocol == 0x06 else "UDP"
89 port = min(source_port, destination_port)
Felix Dahlke 2013/10/10 08:29:08 Why not do this based on the direction? port = so
Wladimir Palant 2013/10/10 09:37:46 Because our servers can open connections as well -
90 else:
91 add_other("L4 0x%02X" % protocol)
92 continue
93
94 if protocol == "TCP" and port == 80:
95 http[direction] += length
96 elif protocol == "TCP" and port == 443:
97 https[direction] += length
98 elif protocol == "TCP" and port == 22:
99 ssh[direction] += length
100 elif port == 53:
101 dns[direction] += length
102 else:
103 add_other("Port %i" % port)
104 continue
105
106 status = []
107 perfdata = []
108 def add_status(id, values):
109 rx = values["rx"]
110 tx = values["tx"]
111 status.append("%srx %s %stx %s" % (id, format_bandwidth(rx), id, format_band width(tx)))
112 if id == "":
113 perfdata.append("rx=%i;%i;%i tx=%i;%i;%i" % (rx, warn, crit, tx, warn, cri t))
114 else:
115 perfdata.append("%srx=%i %stx=%i" % (id, rx, id, tx))
116
117 add_status("", total)
118 add_status("http_", http)
119 add_status("https_", https)
120 add_status("ssh_", ssh)
121 add_status("dns_", dns)
122 add_status("other_", other)
123 for key in sorted(other_detailed.iterkeys(), key=lambda k: other_detailed[k], reverse=True):
124 status.append("%s %s" % (key, format_bandwidth(float(other_detailed[key]) / INTERVAL)))
125
126 output = "%s|%s" % (", ".join(status), " ".join(perfdata))
127
128 if total["rx"] >= crit or total["tx"] >= crit:
34 print "CRITICAL - " + output 129 print "CRITICAL - " + output
35 sys.exit(2) 130 sys.exit(2)
36 131
37 if rx >= warn or tx >= warn: 132 if total["rx"] >= warn or total["tx"] >= warn:
38 print "WARNING - " + output 133 print "WARNING - " + output
39 sys.exit(1) 134 sys.exit(1)
40 135
41 print "OK - " + output 136 print "OK - " + output
OLDNEW

Powered by Google App Engine
This is Rietveld