| Index: modules/nagios/files/check_bandwidth |
| =================================================================== |
| --- a/modules/nagios/files/check_bandwidth |
| +++ b/modules/nagios/files/check_bandwidth |
| @@ -1,41 +1,136 @@ |
| #!/usr/bin/env python |
| -import os, re, subprocess, sys |
| +import os, re, subprocess, sys, socket, struct, fcntl |
| + |
| +INTERVAL = 5 |
| def format_bandwidth(bits): |
| if bits >= 1000000: |
| return "%.2f Mbit/s" % (bits / 1000000) |
| elif bits >= 1000: |
| return "%.2f kbit/s" % (bits / 1000) |
| else: |
| return "%.2f bit/s" % bits |
| +def getmacaddress(): |
| + # We are calling SIOCGIFHWADDR (0x8927 according to man ioctl_list) here. See |
| + # man netdevice for the request structure: it has to start with 16 bytes |
| + # containing the interface name, the OS will write 8 bytes after that (2 bytes |
| + # family name and 6 bytes actual MAC address). |
| + s = socket.socket() |
| + return fcntl.ioctl(s.fileno(), 0x8927, struct.pack("24s", "eth0"))[18:24] |
|
Felix Dahlke
2013/10/10 08:29:08
I'm pretty sure this will only work on Linux like
Wladimir Palant
2013/10/10 09:37:46
Yes, I've seen that. This function looks very much
|
| + |
| if __name__ == "__main__": |
| if len(sys.argv) != 3: |
| script_name = os.path.basename(sys.argv[0]) |
| print "Usage: %s WARN CRIT" % script_name |
| sys.exit(0) |
| (warn, crit) = sys.argv[1:3] |
| warn = int(sys.argv[1]) |
| crit = int(sys.argv[2]) |
| - process_output = subprocess.check_output(["bwm-ng", "-I", "eth0", "-t", "5000", "-c", "1", "-o", "csv"]) |
| - data = process_output.splitlines()[0].split(";") |
| - tx = float(data[2]) * 8 |
| - rx = float(data[3]) * 8 |
| - status = "rx %s tx %s" % (format_bandwidth(rx), format_bandwidth(tx)) |
| + process = subprocess.Popen( |
| + ["sudo", "tcpdump", "-q", "-s", "64", "-G", str(INTERVAL), "-W", "1", "-w", "-"], |
| + stdout=subprocess.PIPE, stderr=subprocess.PIPE) |
| + mac = getmacaddress() |
| - perfdata = "rx=%i;%i;%i tx=%i;%i;%i" % (rx, warn, crit, tx, warn, crit) |
| + total = {"rx": 0, "tx": 0} |
| + http = {"rx": 0, "tx": 0} |
| + https = {"rx": 0, "tx": 0} |
| + ssh = {"rx": 0, "tx": 0} |
| + dns = {"rx": 0, "tx": 0} |
| + other = {"rx": 0, "tx": 0} |
| + other_detailed = {} |
| - output = "%s|%s" % (status, perfdata) |
| + # See http://wiki.wireshark.org/Development/LibpcapFileFormat for libpcap format description |
| + magic_number, _, _, _, _, _, _ = struct.unpack("IHHiIII", process.stdout.read(24)) |
|
Felix Dahlke
2013/10/10 08:29:08
I'd find this more readable if the result of proce
|
| + if magic_number != 0xa1b2c3d4: |
| + raise Exception("Unexpected format") |
|
Felix Dahlke
2013/10/10 08:29:08
"Unsupported byte order" or something along those
Wladimir Palant
2013/10/10 09:37:46
No, there can be no other byte order - this script
|
| + while True: |
| + header = process.stdout.read(16) |
|
Felix Dahlke
2013/10/10 08:29:08
header -> record_header?
|
| + if header == "": |
| + break; |
| + _, _, incl_len, orig_len = struct.unpack("IIII", header) |
| - if rx >= crit or tx >= crit: |
| + # Convert bytes to bits and normalize to seconds |
| + length = float(orig_len * 8) / INTERVAL |
|
Felix Dahlke
2013/10/10 08:29:08
length -> bits_per_second?
Wladimir Palant
2013/10/10 09:37:46
bps?
Felix Dahlke
2013/10/10 09:44:34
Sure.
|
| + |
| + def add_other(description): |
| + other[direction] += length |
| + other_detailed[description] = other_detailed.get(description, 0) + length |
| + |
| + payload = process.stdout.read(incl_len) |
| + |
| + # Unpack Ethernet frame, http://en.wikipedia.org/wiki/Ethernet_frame#Structure |
| + destination, source, protocol = struct.unpack("!6s6sH", payload[:14]) |
|
Felix Dahlke
2013/10/10 08:29:08
1. Shouldn't the offset be 8 instead of 14? What a
Wladimir Palant
2013/10/10 09:37:46
14 isn't offset but length here. The Ethernet fram
|
| + payload = payload[14:] |
|
Felix Dahlke
2013/10/10 08:29:08
The payload should be the field after EtherType ac
Wladimir Palant
2013/10/10 09:37:46
As with the previous comment, preamble isn't being
|
| + direction = "rx" if destination == mac else "tx" |
| + total[direction] += length |
| + |
| + # Check Level 3 protocol |
| + if protocol == 0x0800: # IPv4, http://en.wikipedia.org/wiki/Internet_Protocol_version_4#Header |
| + ihl = ord(payload[0]) & 0xF |
|
Felix Dahlke
2013/10/10 08:29:08
Shouldn't it be 0x4? 0xF would get us both version
Wladimir Palant
2013/10/10 09:37:46
No, that's correct - "& 0xF0" gives you the first
Felix Dahlke
2013/10/10 09:44:34
Um, yes, I confused something there, it's the numb
|
| + protocol = ord(payload[9]) |
| + payload = payload[ihl * 4:] |
| + elif protocol == 0x86DD: # IPv6, http://en.wikipedia.org/wiki/IPv6_packet#Fixed_header |
| + protocol = ord(payload[6]) |
| + payload = payload[40:] |
| + else: |
| + add_other("L3 0x%04X" % protocol) |
| + continue |
| + |
| + # Check Level 4 protocol |
| + if protocol in (0x06, 0x11): # TCP, UDP |
| + # The lower port number should be the real port, the other one will be |
|
Felix Dahlke
2013/10/10 08:29:08
I think this comment should move down a bit, on to
|
| + # the ephemeral port. |
| + source_port, destination_port = struct.unpack('!HH', payload[:4]) |
| + protocol = "TCP" if protocol == 0x06 else "UDP" |
| + port = min(source_port, destination_port) |
|
Felix Dahlke
2013/10/10 08:29:08
Why not do this based on the direction?
port = so
Wladimir Palant
2013/10/10 09:37:46
Because our servers can open connections as well -
|
| + else: |
| + add_other("L4 0x%02X" % protocol) |
| + continue |
| + |
| + if protocol == "TCP" and port == 80: |
| + http[direction] += length |
| + elif protocol == "TCP" and port == 443: |
| + https[direction] += length |
| + elif protocol == "TCP" and port == 22: |
| + ssh[direction] += length |
| + elif port == 53: |
| + dns[direction] += length |
| + else: |
| + add_other("Port %i" % port) |
| + continue |
| + |
| + status = [] |
| + perfdata = [] |
| + def add_status(id, values): |
| + rx = values["rx"] |
| + tx = values["tx"] |
| + status.append("%srx %s %stx %s" % (id, format_bandwidth(rx), id, format_bandwidth(tx))) |
| + if id == "": |
| + perfdata.append("rx=%i;%i;%i tx=%i;%i;%i" % (rx, warn, crit, tx, warn, crit)) |
| + else: |
| + perfdata.append("%srx=%i %stx=%i" % (id, rx, id, tx)) |
| + |
| + add_status("", total) |
| + add_status("http_", http) |
| + add_status("https_", https) |
| + add_status("ssh_", ssh) |
| + add_status("dns_", dns) |
| + add_status("other_", other) |
| + for key in sorted(other_detailed.iterkeys(), key=lambda k: other_detailed[k], reverse=True): |
| + status.append("%s %s" % (key, format_bandwidth(float(other_detailed[key]) / INTERVAL))) |
| + |
| + output = "%s|%s" % (", ".join(status), " ".join(perfdata)) |
| + |
| + if total["rx"] >= crit or total["tx"] >= crit: |
| print "CRITICAL - " + output |
| sys.exit(2) |
| - if rx >= warn or tx >= warn: |
| + if total["rx"] >= warn or total["tx"] >= warn: |
| print "WARNING - " + output |
| sys.exit(1) |
| print "OK - " + output |