Index: pygeoip/__init__.py |
=================================================================== |
new file mode 100644 |
--- /dev/null |
+++ b/pygeoip/__init__.py |
@@ -0,0 +1,668 @@ |
+# -*- coding: utf-8 -*- |
+""" |
+Pure Python GeoIP API |
+ |
+The API is based on MaxMind's C-based Python API, but the code itself is |
+ported from the Pure PHP GeoIP API by Jim Winstead and Hans Lellelid. |
+ |
+@author: Jennifer Ennis <zaylea@gmail.com> |
+ |
+@license: Copyright(C) 2004 MaxMind LLC |
+ |
+This program is free software: you can redistribute it and/or modify |
+it under the terms of the GNU Lesser General Public License as published by |
+the Free Software Foundation, either version 3 of the License, or |
+(at your option) any later version. |
+ |
+This program is distributed in the hope that it will be useful, |
+but WITHOUT ANY WARRANTY; without even the implied warranty of |
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
+GNU General Public License for more details. |
+ |
+You should have received a copy of the GNU Lesser General Public License |
+along with this program. If not, see <http://www.gnu.org/licenses/lgpl.txt>. |
+""" |
+ |
+import os |
+import math |
+import socket |
+import mmap |
+import codecs |
+from threading import Lock |
+ |
+try: |
+ from StringIO import StringIO |
+except ImportError: |
+ from io import StringIO, BytesIO |
+ |
+from pygeoip import util, const |
+from pygeoip.const import PY2, PY3 |
+from pygeoip.timezone import time_zone_by_country_and_region |
+ |
+ |
+STANDARD = const.STANDARD |
+MMAP_CACHE = const.MMAP_CACHE |
+MEMORY_CACHE = const.MEMORY_CACHE |
+ |
+ENCODING = const.ENCODING |
+ |
+ |
+class GeoIPError(Exception): |
+ pass |
+ |
+ |
+class GeoIPMetaclass(type): |
+ def __new__(cls, *args, **kwargs): |
+ """ |
+ Singleton method to gets an instance without reparsing the db. Unique |
+ instances are instantiated based on the filename of the db. Flags are |
+ ignored for this, i.e. if you initialize one with STANDARD |
+ flag (default) and then try later to initialize with MEMORY_CACHE, it |
+ will still return the STANDARD one. |
+ """ |
+ if not hasattr(cls, '_instances'): |
+ cls._instances = {} |
+ |
+ if len(args) > 0: |
+ filename = args[0] |
+ elif 'filename' in kwargs: |
+ filename = kwargs['filename'] |
+ |
+ if filename not in cls._instances: |
+ cls._instances[filename] = type.__new__(cls, *args, **kwargs) |
+ |
+ return cls._instances[filename] |
+ |
+ |
+GeoIPBase = GeoIPMetaclass('GeoIPBase', (object,), {}) |
+ |
+ |
+class GeoIP(GeoIPBase): |
+ def __init__(self, filename, flags=0): |
+ """ |
+ Initialize the class. |
+ |
+ @param filename: Path to a geoip database. |
+ @type filename: str |
+ @param flags: Flags that affect how the database is processed. |
+ Currently supported flags are STANDARD (the default), |
+ MEMORY_CACHE (preload the whole file into memory) and |
+ MMAP_CACHE (access the file via mmap). |
+ @type flags: int |
+ """ |
+ self._filename = filename |
+ self._flags = flags |
+ |
+ if self._flags & const.MMAP_CACHE: |
+ f = open(filename, 'rb') |
+ access = mmap.ACCESS_READ |
+ self._filehandle = mmap.mmap(f.fileno(), 0, access=access) |
+ f.close() |
+ |
+ elif self._flags & const.MEMORY_CACHE: |
+ f = open(filename, 'rb') |
+ self._memoryBuffer = f.read() |
+ iohandle = BytesIO if PY3 else StringIO |
+ self._filehandle = iohandle(self._memoryBuffer) |
+ f.close() |
+ |
+ else: |
+ self._filehandle = codecs.open(filename, 'rb', ENCODING) |
+ |
+ self._lock = Lock() |
+ self._setup_segments() |
+ |
+ def _setup_segments(self): |
+ """ |
+ Parses the database file to determine what kind of database is |
+ being used and setup segment sizes and start points that will |
+ be used by the seek*() methods later. |
+ |
+ Supported databases: |
+ |
+ * COUNTRY_EDITION |
+ * COUNTRY_EDITION_V6 |
+ * REGION_EDITION_REV0 |
+ * REGION_EDITION_REV1 |
+ * CITY_EDITION_REV0 |
+ * CITY_EDITION_REV1 |
+ * CITY_EDITION_REV1_V6 |
+ * ORG_EDITION |
+ * ISP_EDITION |
+ * ASNUM_EDITION |
+ * ASNUM_EDITION_V6 |
+ |
+ """ |
+ self._databaseType = const.COUNTRY_EDITION |
+ self._recordLength = const.STANDARD_RECORD_LENGTH |
+ self._databaseSegments = const.COUNTRY_BEGIN |
+ |
+ self._lock.acquire() |
+ filepos = self._filehandle.tell() |
+ self._filehandle.seek(-3, os.SEEK_END) |
+ |
+ for i in range(const.STRUCTURE_INFO_MAX_SIZE): |
+ chars = chr(255) * 3 |
+ delim = self._filehandle.read(3) |
+ |
+ if PY3 and type(delim) is bytes: |
+ delim = delim.decode(ENCODING) |
+ |
+ if PY2: |
+ chars = chars.decode(ENCODING) |
+ if type(delim) is str: |
+ delim = delim.decode(ENCODING) |
+ |
+ if delim == chars: |
+ byte = self._filehandle.read(1) |
+ self._databaseType = ord(byte) |
+ |
+ # Compatibility with databases from April 2003 and earlier |
+ if (self._databaseType >= 106): |
+ self._databaseType -= 105 |
+ |
+ if self._databaseType == const.REGION_EDITION_REV0: |
+ self._databaseSegments = const.STATE_BEGIN_REV0 |
+ |
+ elif self._databaseType == const.REGION_EDITION_REV1: |
+ self._databaseSegments = const.STATE_BEGIN_REV1 |
+ |
+ elif self._databaseType in (const.CITY_EDITION_REV0, |
+ const.CITY_EDITION_REV1, |
+ const.CITY_EDITION_REV1_V6, |
+ const.ORG_EDITION, |
+ const.ISP_EDITION, |
+ const.ASNUM_EDITION, |
+ const.ASNUM_EDITION_V6): |
+ self._databaseSegments = 0 |
+ buf = self._filehandle.read(const.SEGMENT_RECORD_LENGTH) |
+ |
+ if PY3 and type(buf) is bytes: |
+ buf = buf.decode(ENCODING) |
+ |
+ for j in range(const.SEGMENT_RECORD_LENGTH): |
+ self._databaseSegments += (ord(buf[j]) << (j * 8)) |
+ |
+ LONG_RECORDS = (const.ORG_EDITION, const.ISP_EDITION) |
+ if self._databaseType in LONG_RECORDS: |
+ self._recordLength = const.ORG_RECORD_LENGTH |
+ break |
+ else: |
+ self._filehandle.seek(-4, os.SEEK_CUR) |
+ |
+ self._filehandle.seek(filepos, os.SEEK_SET) |
+ self._lock.release() |
+ |
+ def _seek_country(self, ipnum): |
+ """ |
+ Using the record length and appropriate start points, seek to the |
+ country that corresponds to the converted IP address integer. |
+ |
+ @param ipnum: result of ip2long conversion |
+ @type ipnum: int |
+ @return: offset of start of record |
+ @rtype: int |
+ """ |
+ try: |
+ offset = 0 |
+ seek_depth = 127 if len(str(ipnum)) > 10 else 31 |
+ |
+ for depth in range(seek_depth, -1, -1): |
+ if self._flags & const.MEMORY_CACHE: |
+ startIndex = 2 * self._recordLength * offset |
+ endIndex = startIndex + (2 * self._recordLength) |
+ buf = self._memoryBuffer[startIndex:endIndex] |
+ else: |
+ startIndex = 2 * self._recordLength * offset |
+ readLength = 2 * self._recordLength |
+ self._lock.acquire() |
+ self._filehandle.seek(startIndex, os.SEEK_SET) |
+ buf = self._filehandle.read(readLength) |
+ self._lock.release() |
+ |
+ if PY3 and type(buf) is bytes: |
+ buf = buf.decode(ENCODING) |
+ |
+ x = [0, 0] |
+ for i in range(2): |
+ for j in range(self._recordLength): |
+ byte = buf[self._recordLength * i + j] |
+ x[i] += ord(byte) << (j * 8) |
+ if ipnum & (1 << depth): |
+ if x[1] >= self._databaseSegments: |
+ return x[1] |
+ offset = x[1] |
+ else: |
+ if x[0] >= self._databaseSegments: |
+ return x[0] |
+ offset = x[0] |
+ except: |
+ pass |
+ |
+ raise GeoIPError('Corrupt database') |
+ |
+ def _get_org(self, ipnum): |
+ """ |
+ Seek and return organization or ISP name for ipnum. |
+ @param ipnum: Converted IP address |
+ @type ipnum: int |
+ @return: org/isp name |
+ @rtype: str |
+ """ |
+ seek_org = self._seek_country(ipnum) |
+ if seek_org == self._databaseSegments: |
+ return None |
+ |
+ read_length = (2 * self._recordLength - 1) * self._databaseSegments |
+ self._lock.acquire() |
+ self._filehandle.seek(seek_org + read_length, os.SEEK_SET) |
+ buf = self._filehandle.read(const.MAX_ORG_RECORD_LENGTH) |
+ self._lock.release() |
+ |
+ if PY3 and type(buf) is bytes: |
+ buf = buf.decode(ENCODING) |
+ |
+ return buf[:buf.index(chr(0))] |
+ |
+ def _get_region(self, ipnum): |
+ """ |
+ Seek and return the region info (dict containing country_code |
+ and region_name). |
+ |
+ @param ipnum: Converted IP address |
+ @type ipnum: int |
+ @return: dict containing country_code and region_name |
+ @rtype: dict |
+ """ |
+ region = '' |
+ country_code = '' |
+ seek_country = self._seek_country(ipnum) |
+ |
+ def get_region_name(offset): |
+ region1 = chr(offset // 26 + 65) |
+ region2 = chr(offset % 26 + 65) |
+ return ''.join([region1, region2]) |
+ |
+ if self._databaseType == const.REGION_EDITION_REV0: |
+ seek_region = seek_country - const.STATE_BEGIN_REV0 |
+ if seek_region >= 1000: |
+ country_code = 'US' |
+ region = get_region_name(seek_region - 1000) |
+ else: |
+ country_code = const.COUNTRY_CODES[seek_region] |
+ elif self._databaseType == const.REGION_EDITION_REV1: |
+ seek_region = seek_country - const.STATE_BEGIN_REV1 |
+ if seek_region < const.US_OFFSET: |
+ pass |
+ elif seek_region < const.CANADA_OFFSET: |
+ country_code = 'US' |
+ region = get_region_name(seek_region - const.US_OFFSET) |
+ elif seek_region < const.WORLD_OFFSET: |
+ country_code = 'CA' |
+ region = get_region_name(seek_region - const.CANADA_OFFSET) |
+ else: |
+ index = (seek_region - const.WORLD_OFFSET) // const.FIPS_RANGE |
+ if index in const.COUNTRY_CODES: |
+ country_code = const.COUNTRY_CODES[index] |
+ elif self._databaseType in const.CITY_EDITIONS: |
+ rec = self._get_record(ipnum) |
+ region = rec.get('region_name', '') |
+ country_code = rec.get('country_code', '') |
+ |
+ return {'country_code': country_code, 'region_name': region} |
+ |
+ def _get_record(self, ipnum): |
+ """ |
+ Populate location dict for converted IP. |
+ |
+ @param ipnum: Converted IP address |
+ @type ipnum: int |
+ @return: dict with country_code, country_code3, country_name, |
+ region, city, postal_code, latitude, longitude, |
+ dma_code, metro_code, area_code, region_name, time_zone |
+ @rtype: dict |
+ """ |
+ seek_country = self._seek_country(ipnum) |
+ if seek_country == self._databaseSegments: |
+ return {} |
+ |
+ read_length = (2 * self._recordLength - 1) * self._databaseSegments |
+ self._lock.acquire() |
+ self._filehandle.seek(seek_country + read_length, os.SEEK_SET) |
+ buf = self._filehandle.read(const.FULL_RECORD_LENGTH) |
+ self._lock.release() |
+ |
+ if PY3 and type(buf) is bytes: |
+ buf = buf.decode(ENCODING) |
+ |
+ record = { |
+ 'dma_code': 0, |
+ 'area_code': 0, |
+ 'metro_code': '', |
+ 'postal_code': '' |
+ } |
+ |
+ latitude = 0 |
+ longitude = 0 |
+ buf_pos = 0 |
+ |
+ # Get country |
+ char = ord(buf[buf_pos]) |
+ record['country_code'] = const.COUNTRY_CODES[char] |
+ record['country_code3'] = const.COUNTRY_CODES3[char] |
+ record['country_name'] = const.COUNTRY_NAMES[char] |
+ record['continent'] = const.CONTINENT_NAMES[char] |
+ |
+ buf_pos += 1 |
+ def get_data(buf, buf_pos): |
+ offset = buf_pos |
+ char = ord(buf[offset]) |
+ while (char != 0): |
+ offset += 1 |
+ char = ord(buf[offset]) |
+ if offset > buf_pos: |
+ return (offset, buf[buf_pos:offset]) |
+ return (offset, '') |
+ |
+ offset, record['region_name'] = get_data(buf, buf_pos) |
+ offset, record['city'] = get_data(buf, offset + 1) |
+ offset, record['postal_code'] = get_data(buf, offset + 1) |
+ buf_pos = offset + 1 |
+ |
+ for j in range(3): |
+ char = ord(buf[buf_pos]) |
+ buf_pos += 1 |
+ latitude += (char << (j * 8)) |
+ |
+ for j in range(3): |
+ char = ord(buf[buf_pos]) |
+ buf_pos += 1 |
+ longitude += (char << (j * 8)) |
+ |
+ record['latitude'] = (latitude / 10000.0) - 180.0 |
+ record['longitude'] = (longitude / 10000.0) - 180.0 |
+ |
+ if self._databaseType in (const.CITY_EDITION_REV1, const.CITY_EDITION_REV1_V6): |
+ dmaarea_combo = 0 |
+ if record['country_code'] == 'US': |
+ for j in range(3): |
+ char = ord(buf[buf_pos]) |
+ dmaarea_combo += (char << (j * 8)) |
+ buf_pos += 1 |
+ |
+ record['dma_code'] = int(math.floor(dmaarea_combo / 1000)) |
+ record['area_code'] = dmaarea_combo % 1000 |
+ |
+ record['metro_code'] = const.DMA_MAP.get(record['dma_code']) |
+ params = (record['country_code'], record['region_name']) |
+ record['time_zone'] = time_zone_by_country_and_region(*params) |
+ |
+ return record |
+ |
+ def _gethostbyname(self, hostname): |
+ if self._databaseType in const.IPV6_EDITIONS: |
+ try: |
+ response = socket.getaddrinfo(hostname, 0, socket.AF_INET6) |
+ family, socktype, proto, canonname, sockaddr = response[0] |
+ address, port, flow, scope = sockaddr |
+ return address |
+ except socket.gaierror: |
+ return '' |
+ else: |
+ return socket.gethostbyname(hostname) |
+ |
+ def id_by_addr(self, addr): |
+ """ |
+ Get the country index. |
+ Looks up the index for the country which is the key for |
+ the code and name. |
+ |
+ @param addr: The IP address |
+ @type addr: str |
+ @return: network byte order 32-bit integer |
+ @rtype: int |
+ """ |
+ ipnum = util.ip2long(addr) |
+ if not ipnum: |
+ raise ValueError("Invalid IP address: %s" % addr) |
+ |
+ COUNTY_EDITIONS = (const.COUNTRY_EDITION, const.COUNTRY_EDITION_V6) |
+ if self._databaseType not in COUNTY_EDITIONS: |
+ message = 'Invalid database type, expected Country' |
+ raise GeoIPError(message) |
+ |
+ return self._seek_country(ipnum) - const.COUNTRY_BEGIN |
+ |
+ def country_code_by_addr(self, addr): |
+ """ |
+ Returns 2-letter country code (e.g. 'US') for specified IP address. |
+ Use this method if you have a Country, Region, or City database. |
+ |
+ @param addr: IP address |
+ @type addr: str |
+ @return: 2-letter country code |
+ @rtype: str |
+ """ |
+ try: |
+ VALID_EDITIONS = (const.COUNTRY_EDITION, const.COUNTRY_EDITION_V6) |
+ if self._databaseType in VALID_EDITIONS: |
+ ipv = 6 if addr.find(':') >= 0 else 4 |
+ |
+ if ipv == 4 and self._databaseType != const.COUNTRY_EDITION: |
+ message = 'Invalid database type; expected IPv6 address' |
+ raise ValueError(message) |
+ if ipv == 6 and self._databaseType != const.COUNTRY_EDITION_V6: |
+ message = 'Invalid database type; expected IPv4 address' |
+ raise ValueError(message) |
+ |
+ country_id = self.id_by_addr(addr) |
+ return const.COUNTRY_CODES[country_id] |
+ elif self._databaseType in const.REGION_CITY_EDITIONS: |
+ return self.region_by_addr(addr).get('country_code') |
+ |
+ message = 'Invalid database type, expected Country, City or Region' |
+ raise GeoIPError(message) |
+ except ValueError: |
+ raise GeoIPError('Failed to lookup address %s' % addr) |
+ |
+ def country_code_by_name(self, hostname): |
+ """ |
+ Returns 2-letter country code (e.g. 'US') for specified hostname. |
+ Use this method if you have a Country, Region, or City database. |
+ |
+ @param hostname: Hostname |
+ @type hostname: str |
+ @return: 2-letter country code |
+ @rtype: str |
+ """ |
+ addr = self._gethostbyname(hostname) |
+ return self.country_code_by_addr(addr) |
+ |
+ def country_name_by_addr(self, addr): |
+ """ |
+ Returns full country name for specified IP address. |
+ Use this method if you have a Country or City database. |
+ |
+ @param addr: IP address |
+ @type addr: str |
+ @return: country name |
+ @rtype: str |
+ """ |
+ try: |
+ VALID_EDITIONS = (const.COUNTRY_EDITION, const.COUNTRY_EDITION_V6) |
+ if self._databaseType in VALID_EDITIONS: |
+ country_id = self.id_by_addr(addr) |
+ return const.COUNTRY_NAMES[country_id] |
+ elif self._databaseType in const.CITY_EDITIONS: |
+ return self.record_by_addr(addr).get('country_name') |
+ else: |
+ message = 'Invalid database type, expected Country or City' |
+ raise GeoIPError(message) |
+ except ValueError: |
+ raise GeoIPError('Failed to lookup address %s' % addr) |
+ |
+ def country_name_by_name(self, hostname): |
+ """ |
+ Returns full country name for specified hostname. |
+ Use this method if you have a Country database. |
+ |
+ @param hostname: Hostname |
+ @type hostname: str |
+ @return: country name |
+ @rtype: str |
+ """ |
+ addr = self._gethostbyname(hostname) |
+ return self.country_name_by_addr(addr) |
+ |
+ def org_by_addr(self, addr): |
+ """ |
+ Lookup Organization, ISP or ASNum for given IP address. |
+ Use this method if you have an Organization, ISP or ASNum database. |
+ |
+ @param addr: IP address |
+ @type addr: str |
+ @return: organization or ISP name |
+ @rtype: str |
+ """ |
+ try: |
+ ipnum = util.ip2long(addr) |
+ if not ipnum: |
+ raise ValueError('Invalid IP address') |
+ |
+ valid = (const.ORG_EDITION, const.ISP_EDITION, const.ASNUM_EDITION, const.ASNUM_EDITION_V6) |
+ if self._databaseType not in valid: |
+ message = 'Invalid database type, expected Org, ISP or ASNum' |
+ raise GeoIPError(message) |
+ |
+ return self._get_org(ipnum) |
+ except ValueError: |
+ raise GeoIPError('Failed to lookup address %s' % addr) |
+ |
+ def org_by_name(self, hostname): |
+ """ |
+ Lookup the organization (or ISP) for hostname. |
+ Use this method if you have an Organization/ISP database. |
+ |
+ @param hostname: Hostname |
+ @type hostname: str |
+ @return: Organization or ISP name |
+ @rtype: str |
+ """ |
+ addr = self._gethostbyname(hostname) |
+ return self.org_by_addr(addr) |
+ |
+ def record_by_addr(self, addr): |
+ """ |
+ Look up the record for a given IP address. |
+ Use this method if you have a City database. |
+ |
+ @param addr: IP address |
+ @type addr: str |
+ @return: Dictionary with country_code, country_code3, country_name, |
+ region, city, postal_code, latitude, longitude, dma_code, |
+ metro_code, area_code, region_name, time_zone |
+ @rtype: dict |
+ """ |
+ try: |
+ ipnum = util.ip2long(addr) |
+ if not ipnum: |
+ raise ValueError('Invalid IP address') |
+ |
+ if self._databaseType not in const.CITY_EDITIONS: |
+ message = 'Invalid database type, expected City' |
+ raise GeoIPError(message) |
+ |
+ rec = self._get_record(ipnum) |
+ if not rec: |
+ return None |
+ |
+ return rec |
+ except ValueError: |
+ raise GeoIPError('Failed to lookup address %s' % addr) |
+ |
+ def record_by_name(self, hostname): |
+ """ |
+ Look up the record for a given hostname. |
+ Use this method if you have a City database. |
+ |
+ @param hostname: Hostname |
+ @type hostname: str |
+ @return: Dictionary with country_code, country_code3, country_name, |
+ region, city, postal_code, latitude, longitude, dma_code, |
+ metro_code, area_code, region_name, time_zone |
+ @rtype: dict |
+ """ |
+ addr = self._gethostbyname(hostname) |
+ return self.record_by_addr(addr) |
+ |
+ def region_by_addr(self, addr): |
+ """ |
+ Lookup the region for given IP address. |
+ Use this method if you have a Region database. |
+ |
+ @param addr: IP address |
+ @type addr: str |
+ @return: Dictionary containing country_code, region and region_name |
+ @rtype: dict |
+ """ |
+ try: |
+ ipnum = util.ip2long(addr) |
+ if not ipnum: |
+ raise ValueError('Invalid IP address') |
+ |
+ if self._databaseType not in const.REGION_CITY_EDITIONS: |
+ message = 'Invalid database type, expected Region or City' |
+ raise GeoIPError(message) |
+ |
+ return self._get_region(ipnum) |
+ except ValueError: |
+ raise GeoIPError('Failed to lookup address %s' % addr) |
+ |
+ def region_by_name(self, hostname): |
+ """ |
+ Lookup the region for given hostname. |
+ Use this method if you have a Region database. |
+ |
+ @param hostname: Hostname |
+ @type hostname: str |
+ @return: Dictionary containing country_code, region, and region_name |
+ @rtype: dict |
+ """ |
+ addr = self._gethostbyname(hostname) |
+ return self.region_by_addr(addr) |
+ |
+ def time_zone_by_addr(self, addr): |
+ """ |
+ Look up the time zone for a given IP address. |
+ Use this method if you have a Region or City database. |
+ |
+ @param addr: IP address |
+ @type addr: str |
+ @return: Time zone |
+ @rtype: str |
+ """ |
+ try: |
+ ipnum = util.ip2long(addr) |
+ if not ipnum: |
+ raise ValueError('Invalid IP address') |
+ |
+ if self._databaseType not in const.CITY_EDITIONS: |
+ message = 'Invalid database type, expected City' |
+ raise GeoIPError(message) |
+ |
+ return self._get_record(ipnum).get('time_zone') |
+ except ValueError: |
+ raise GeoIPError('Failed to lookup address %s' % addr) |
+ |
+ def time_zone_by_name(self, hostname): |
+ """ |
+ Look up the time zone for a given hostname. |
+ Use this method if you have a Region or City database. |
+ |
+ @param hostname: Hostname |
+ @type hostname: str |
+ @return: Time zone |
+ @rtype: str |
+ """ |
+ addr = self._gethostbyname(hostname) |
+ return self.time_zone_by_addr(addr) |