| OLD | NEW | 
|---|
| (Empty) |  | 
|  | 1 #!/usr/bin/env python | 
|  | 2 """Anonymize data in access log lines. | 
|  | 3 | 
|  | 4 Read a line from stdin, write it to stdout with the following changes: | 
|  | 5 1. IP (v4 or v6) replaced with a salted hash of the IP and the date | 
|  | 6 2. Country and city information (extracted from IP) added after the salted hash. | 
|  | 7 | 
|  | 8 If the country or city information is unavailable in the database, '-' is added 
     instead | 
|  | 9 of ISO 3166-1 alpha-2 country/city code (like 'DE'). | 
|  | 10 | 
|  | 11 Salt and the country/city information database are taken as command line options
      and | 
|  | 12 default to environment variables. | 
|  | 13 | 
|  | 14 Malformed lines are passed on as is, based on the assumption that they don't | 
|  | 15 contain sensitive information. Malformed here means the line couldn't be split | 
|  | 16 on space character. If it could be split, and an error occurs afterwards | 
|  | 17 (e.g. while trying to parse out the date), the script will fail and exit in | 
|  | 18 order to bring attention to the fact that something might not be getting | 
|  | 19 anonymized. | 
|  | 20 """ | 
|  | 21 | 
|  | 22 from __future__ import print_function | 
|  | 23 from __future__ import unicode_literals | 
|  | 24 | 
|  | 25 import argparse | 
|  | 26 import hashlib | 
|  | 27 import hmac | 
|  | 28 import os | 
|  | 29 import sys | 
|  | 30 | 
|  | 31 import geoip2.database | 
|  | 32 | 
|  | 33 def main(salt, country_db, city_db): | 
|  | 34     country_reader = geoip2.database.Reader(country_db) | 
|  | 35     city_reader = geoip2.database.Reader(city_db) | 
|  | 36     salt = salt.encode('utf-8') | 
|  | 37 | 
|  | 38     for line in sys.stdin: | 
|  | 39         try: | 
|  | 40             ip, non_sensitive_info = line.split(' ', 1) | 
|  | 41         except ValueError: | 
|  | 42             print(line, end='') | 
|  | 43             continue | 
|  | 44 | 
|  | 45         # http://geoip2.readthedocs.io/en/latest/#geoip2.database.Reader.country | 
|  | 46         try: | 
|  | 47             record = country_reader.country(ip) | 
|  | 48         except geoip2.errors.AddressNotFoundError: | 
|  | 49             country = '-' | 
|  | 50         else: | 
|  | 51             country = record.country.iso_code | 
|  | 52 | 
|  | 53         try: | 
|  | 54             record = city_reader.city(ip) | 
|  | 55         except geoip2.errors.AddressNotFoundError: | 
|  | 56             city = '-' | 
|  | 57         else: | 
|  | 58             city = record.city.name | 
|  | 59 | 
|  | 60         # 218.215.212.209 - - [04/May/2018:05:20:48 +0000] "GET /... | 
|  | 61         date_start = line.index('[') + 1 | 
|  | 62         # IP might be v4 or v6 | 
|  | 63         date_end = line.index(':', date_start) | 
|  | 64         date = line[date_start:date_end] | 
|  | 65 | 
|  | 66         # https://docs.python.org/2/library/hmac.html | 
|  | 67         to_hash = (ip + date).encode('utf-8') | 
|  | 68         token = hmac.HMAC(salt, to_hash, hashlib.sha1).hexdigest() | 
|  | 69 | 
|  | 70         print(token, country, city, non_sensitive_info, end='') | 
|  | 71 | 
|  | 72     country_reader.close() | 
|  | 73     city_reader.close() | 
|  | 74 | 
|  | 75 | 
|  | 76 if __name__ == '__main__': | 
|  | 77     parser = argparse.ArgumentParser( | 
|  | 78         description='Filter out sensitive data from access logs', | 
|  | 79     ) | 
|  | 80 | 
|  | 81     parser.add_argument( | 
|  | 82         '--salt', | 
|  | 83         dest='salt', | 
|  | 84         default=os.getenv('ANONYMIZE_SALT'), | 
|  | 85         help='Salt for hashing sensitive data, defaults to $ANONYMIZE_SALT' | 
|  | 86     ) | 
|  | 87 | 
|  | 88     # https://dev.maxmind.com/geoip/geoip2/geolite2/ | 
|  | 89     parser.add_argument( | 
|  | 90         '--geolite2-country-db', | 
|  | 91         dest='country_db', | 
|  | 92         default=os.getenv('ANONYMIZE_GEOLITE2_DB'), | 
|  | 93         help='Path to MaxMind DB file with GeoLite2 Country data, defaults ' | 
|  | 94              'to $ANONYMIZE_GEOLITE2_DB' | 
|  | 95     ) | 
|  | 96 | 
|  | 97     # https://dev.maxmind.com/geoip/geoip2/geolite2/ | 
|  | 98     parser.add_argument( | 
|  | 99         '--geolite2-city-db', | 
|  | 100         dest='city_db', | 
|  | 101         default=os.getenv('ANONYMIZE_GEOLITE2_DB'), | 
|  | 102         help='Path to MaxMind DB file with GeoLite2 City data, defaults ' | 
|  | 103              'to $ANONYMIZE_GEOLITE2_DB' | 
|  | 104     ) | 
|  | 105 | 
|  | 106     args = parser.parse_args() | 
|  | 107 | 
|  | 108     if args.salt is None or args.country_db is None or args.city_db is None: | 
|  | 109         parser.print_help() | 
|  | 110         sys.exit(1) | 
|  | 111 | 
|  | 112     main(args.salt, args.country_db, args.city_db) | 
|  | 113 | 
| OLD | NEW | 
|---|