| Left: | ||
| Right: |
| LEFT | RIGHT |
|---|---|
| 1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
| 2 """Anonymize data in access log lines. | 2 """Anonymize data in access log lines. |
| 3 | 3 |
| 4 Read a line from stdin, write it to stdout with the following changes: | 4 Read a line from stdin, write it to stdout with the following changes: |
| 5 1. IP (v4 or v6) replaced with a salted hash of the IP and the date | 5 1. IP (v4 or v6) replaced with a salted hash of the IP and the date |
| 6 2. Country information (extracted from IP) added after the salted hash. | 6 2. Country information (extracted from IP) added after the salted hash. |
| 7 | 7 |
| 8 If the country information is unavailable in the database, '-' is added instead | 8 If the country information is unavailable in the database, '-' is added instead |
| 9 of ISO 3166-1 alpha-2 country code (like 'DE'). | 9 of ISO 3166-1 alpha-2 country code (like 'DE'). |
| 10 | 10 |
| (...skipping 49 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 60 to_hash = (ip + date).encode('utf-8') | 60 to_hash = (ip + date).encode('utf-8') |
| 61 token = hmac.HMAC(salt, to_hash, hashlib.sha1).hexdigest() | 61 token = hmac.HMAC(salt, to_hash, hashlib.sha1).hexdigest() |
| 62 | 62 |
| 63 print(token, country, non_sensitive_info, end='') | 63 print(token, country, non_sensitive_info, end='') |
| 64 | 64 |
| 65 reader.close() | 65 reader.close() |
| 66 | 66 |
| 67 | 67 |
| 68 if __name__ == '__main__': | 68 if __name__ == '__main__': |
| 69 parser = argparse.ArgumentParser( | 69 parser = argparse.ArgumentParser( |
| 70 description='Filter out sensitive data from access logs', | 70 description='Filter out sensitive data from access logs', |
|
tlucas
2018/05/08 16:08:21
What do you think about
...
description=__
l.kryvonos
2018/05/08 16:26:19
I like doing `description=__doc__`, but I think th
tlucas
2018/05/08 16:40:55
Fair enough - let's see if those encountering the
| |
| 71 ) | 71 ) |
| 72 | 72 |
| 73 parser.add_argument( | 73 parser.add_argument( |
| 74 '--salt', | 74 '--salt', |
| 75 dest='salt', | 75 dest='salt', |
| 76 default=os.getenv('ANONYMIZE_SALT'), | 76 default=os.getenv('ANONYMIZE_SALT'), |
| 77 help='Salt for hashing sensitive data, defaults to $ANONYMIZE_SALT' | 77 help='Salt for hashing sensitive data, defaults to $ANONYMIZE_SALT' |
| 78 ) | 78 ) |
| 79 | 79 |
| 80 # https://dev.maxmind.com/geoip/geoip2/geolite2/ | 80 # https://dev.maxmind.com/geoip/geoip2/geolite2/ |
| 81 parser.add_argument( | 81 parser.add_argument( |
| 82 '--geolite2-db', | 82 '--geolite2-db', |
| 83 dest='country_db', | 83 dest='country_db', |
| 84 default=os.getenv('ANONYMIZE_GEOLITE2_DB'), | 84 default=os.getenv('ANONYMIZE_GEOLITE2_DB'), |
| 85 help=('Path to MaxMind DB file with GeoLite2 Country data, defaults ' | 85 help='Path to MaxMind DB file with GeoLite2 Country data, defaults ' |
|
tlucas
2018/05/08 16:08:21
Those parentheses are redundant (and trigger our c
l.kryvonos
2018/05/08 16:26:19
Acknowledged.
| |
| 86 'to $ANONYMIZE_GEOLITE2_DB') | 86 'to $ANONYMIZE_GEOLITE2_DB' |
| 87 ) | 87 ) |
| 88 | 88 |
| 89 args = parser.parse_args() | 89 args = parser.parse_args() |
| 90 | 90 |
| 91 if args.salt is None or args.country_db is None: | 91 if args.salt is None or args.country_db is None: |
| 92 parser.print_help() | 92 parser.print_help() |
| 93 sys.exit(1) | 93 sys.exit(1) |
| 94 | 94 |
| 95 main(args.salt, args.country_db) | 95 main(args.salt, args.country_db) |
| 96 | |
|
tlucas
2018/05/08 16:08:21
Nit: this blank line is redundant.
l.kryvonos
2018/05/08 16:26:19
Do all of our repositories follow the 'no blank li
tlucas
2018/05/08 16:40:55
Yes - or at least they should (this is also a buil
| |
| LEFT | RIGHT |