Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Side by Side Diff: pygeoip/__init__.py

Issue 29934561: #1537 - Remove stats processing from sitescripts (Closed) Base URL: https://hg.adblockplus.org/sitescripts
Patch Set: Created Nov. 2, 2018, 12:42 p.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View unified diff | Download patch
« no previous file with comments | « pygeoip/README.md ('k') | pygeoip/const.py » ('j') | tox.ini » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 # -*- coding: utf-8 -*-
2 """
3 Pure Python GeoIP API
4
5 The API is based on MaxMind's C-based Python API, but the code itself is
6 ported from the Pure PHP GeoIP API by Jim Winstead and Hans Lellelid.
7
8 @author: Jennifer Ennis <zaylea@gmail.com>
9
10 @license: Copyright(C) 2004 MaxMind LLC
11
12 This program is free software: you can redistribute it and/or modify
13 it under the terms of the GNU Lesser General Public License as published by
14 the Free Software Foundation, either version 3 of the License, or
15 (at your option) any later version.
16
17 This program is distributed in the hope that it will be useful,
18 but WITHOUT ANY WARRANTY; without even the implied warranty of
19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 GNU General Public License for more details.
21
22 You should have received a copy of the GNU Lesser General Public License
23 along with this program. If not, see <http://www.gnu.org/licenses/lgpl.txt>.
24 """
25
26 import os
27 import math
28 import socket
29 import mmap
30 import codecs
31 from threading import Lock
32
33 try:
34 from StringIO import StringIO
35 except ImportError:
36 from io import StringIO, BytesIO
37
38 from pygeoip import util, const
39 from pygeoip.const import PY2, PY3
40 from pygeoip.timezone import time_zone_by_country_and_region
41
42
43 STANDARD = const.STANDARD
44 MMAP_CACHE = const.MMAP_CACHE
45 MEMORY_CACHE = const.MEMORY_CACHE
46
47 ENCODING = const.ENCODING
48
49
50 class GeoIPError(Exception):
51 pass
52
53
54 class GeoIPMetaclass(type):
55 def __new__(cls, *args, **kwargs):
56 """
57 Singleton method to gets an instance without reparsing the db. Unique
58 instances are instantiated based on the filename of the db. Flags are
59 ignored for this, i.e. if you initialize one with STANDARD
60 flag (default) and then try later to initialize with MEMORY_CACHE, it
61 will still return the STANDARD one.
62 """
63 if not hasattr(cls, '_instances'):
64 cls._instances = {}
65
66 if len(args) > 0:
67 filename = args[0]
68 elif 'filename' in kwargs:
69 filename = kwargs['filename']
70
71 if filename not in cls._instances:
72 cls._instances[filename] = type.__new__(cls, *args, **kwargs)
73
74 return cls._instances[filename]
75
76
77 GeoIPBase = GeoIPMetaclass('GeoIPBase', (object,), {})
78
79
80 class GeoIP(GeoIPBase):
81 def __init__(self, filename, flags=0):
82 """
83 Initialize the class.
84
85 @param filename: Path to a geoip database.
86 @type filename: str
87 @param flags: Flags that affect how the database is processed.
88 Currently supported flags are STANDARD (the default),
89 MEMORY_CACHE (preload the whole file into memory) and
90 MMAP_CACHE (access the file via mmap).
91 @type flags: int
92 """
93 self._filename = filename
94 self._flags = flags
95
96 if self._flags & const.MMAP_CACHE:
97 f = open(filename, 'rb')
98 access = mmap.ACCESS_READ
99 self._filehandle = mmap.mmap(f.fileno(), 0, access=access)
100 f.close()
101
102 elif self._flags & const.MEMORY_CACHE:
103 f = open(filename, 'rb')
104 self._memoryBuffer = f.read()
105 iohandle = BytesIO if PY3 else StringIO
106 self._filehandle = iohandle(self._memoryBuffer)
107 f.close()
108
109 else:
110 self._filehandle = codecs.open(filename, 'rb', ENCODING)
111
112 self._lock = Lock()
113 self._setup_segments()
114
115 def _setup_segments(self):
116 """
117 Parses the database file to determine what kind of database is
118 being used and setup segment sizes and start points that will
119 be used by the seek*() methods later.
120
121 Supported databases:
122
123 * COUNTRY_EDITION
124 * COUNTRY_EDITION_V6
125 * REGION_EDITION_REV0
126 * REGION_EDITION_REV1
127 * CITY_EDITION_REV0
128 * CITY_EDITION_REV1
129 * CITY_EDITION_REV1_V6
130 * ORG_EDITION
131 * ISP_EDITION
132 * ASNUM_EDITION
133 * ASNUM_EDITION_V6
134
135 """
136 self._databaseType = const.COUNTRY_EDITION
137 self._recordLength = const.STANDARD_RECORD_LENGTH
138 self._databaseSegments = const.COUNTRY_BEGIN
139
140 self._lock.acquire()
141 filepos = self._filehandle.tell()
142 self._filehandle.seek(-3, os.SEEK_END)
143
144 for i in range(const.STRUCTURE_INFO_MAX_SIZE):
145 chars = chr(255) * 3
146 delim = self._filehandle.read(3)
147
148 if PY3 and type(delim) is bytes:
149 delim = delim.decode(ENCODING)
150
151 if PY2:
152 chars = chars.decode(ENCODING)
153 if type(delim) is str:
154 delim = delim.decode(ENCODING)
155
156 if delim == chars:
157 byte = self._filehandle.read(1)
158 self._databaseType = ord(byte)
159
160 # Compatibility with databases from April 2003 and earlier
161 if (self._databaseType >= 106):
162 self._databaseType -= 105
163
164 if self._databaseType == const.REGION_EDITION_REV0:
165 self._databaseSegments = const.STATE_BEGIN_REV0
166
167 elif self._databaseType == const.REGION_EDITION_REV1:
168 self._databaseSegments = const.STATE_BEGIN_REV1
169
170 elif self._databaseType in (const.CITY_EDITION_REV0,
171 const.CITY_EDITION_REV1,
172 const.CITY_EDITION_REV1_V6,
173 const.ORG_EDITION,
174 const.ISP_EDITION,
175 const.ASNUM_EDITION,
176 const.ASNUM_EDITION_V6):
177 self._databaseSegments = 0
178 buf = self._filehandle.read(const.SEGMENT_RECORD_LENGTH)
179
180 if PY3 and type(buf) is bytes:
181 buf = buf.decode(ENCODING)
182
183 for j in range(const.SEGMENT_RECORD_LENGTH):
184 self._databaseSegments += (ord(buf[j]) << (j * 8))
185
186 LONG_RECORDS = (const.ORG_EDITION, const.ISP_EDITION)
187 if self._databaseType in LONG_RECORDS:
188 self._recordLength = const.ORG_RECORD_LENGTH
189 break
190 else:
191 self._filehandle.seek(-4, os.SEEK_CUR)
192
193 self._filehandle.seek(filepos, os.SEEK_SET)
194 self._lock.release()
195
196 def _seek_country(self, ipnum):
197 """
198 Using the record length and appropriate start points, seek to the
199 country that corresponds to the converted IP address integer.
200
201 @param ipnum: result of ip2long conversion
202 @type ipnum: int
203 @return: offset of start of record
204 @rtype: int
205 """
206 try:
207 offset = 0
208 seek_depth = 127 if len(str(ipnum)) > 10 else 31
209
210 for depth in range(seek_depth, -1, -1):
211 if self._flags & const.MEMORY_CACHE:
212 startIndex = 2 * self._recordLength * offset
213 endIndex = startIndex + (2 * self._recordLength)
214 buf = self._memoryBuffer[startIndex:endIndex]
215 else:
216 startIndex = 2 * self._recordLength * offset
217 readLength = 2 * self._recordLength
218 self._lock.acquire()
219 self._filehandle.seek(startIndex, os.SEEK_SET)
220 buf = self._filehandle.read(readLength)
221 self._lock.release()
222
223 if PY3 and type(buf) is bytes:
224 buf = buf.decode(ENCODING)
225
226 x = [0, 0]
227 for i in range(2):
228 for j in range(self._recordLength):
229 byte = buf[self._recordLength * i + j]
230 x[i] += ord(byte) << (j * 8)
231 if ipnum & (1 << depth):
232 if x[1] >= self._databaseSegments:
233 return x[1]
234 offset = x[1]
235 else:
236 if x[0] >= self._databaseSegments:
237 return x[0]
238 offset = x[0]
239 except:
240 pass
241
242 raise GeoIPError('Corrupt database')
243
244 def _get_org(self, ipnum):
245 """
246 Seek and return organization or ISP name for ipnum.
247 @param ipnum: Converted IP address
248 @type ipnum: int
249 @return: org/isp name
250 @rtype: str
251 """
252 seek_org = self._seek_country(ipnum)
253 if seek_org == self._databaseSegments:
254 return None
255
256 read_length = (2 * self._recordLength - 1) * self._databaseSegments
257 self._lock.acquire()
258 self._filehandle.seek(seek_org + read_length, os.SEEK_SET)
259 buf = self._filehandle.read(const.MAX_ORG_RECORD_LENGTH)
260 self._lock.release()
261
262 if PY3 and type(buf) is bytes:
263 buf = buf.decode(ENCODING)
264
265 return buf[:buf.index(chr(0))]
266
267 def _get_region(self, ipnum):
268 """
269 Seek and return the region info (dict containing country_code
270 and region_name).
271
272 @param ipnum: Converted IP address
273 @type ipnum: int
274 @return: dict containing country_code and region_name
275 @rtype: dict
276 """
277 region = ''
278 country_code = ''
279 seek_country = self._seek_country(ipnum)
280
281 def get_region_name(offset):
282 region1 = chr(offset // 26 + 65)
283 region2 = chr(offset % 26 + 65)
284 return ''.join([region1, region2])
285
286 if self._databaseType == const.REGION_EDITION_REV0:
287 seek_region = seek_country - const.STATE_BEGIN_REV0
288 if seek_region >= 1000:
289 country_code = 'US'
290 region = get_region_name(seek_region - 1000)
291 else:
292 country_code = const.COUNTRY_CODES[seek_region]
293 elif self._databaseType == const.REGION_EDITION_REV1:
294 seek_region = seek_country - const.STATE_BEGIN_REV1
295 if seek_region < const.US_OFFSET:
296 pass
297 elif seek_region < const.CANADA_OFFSET:
298 country_code = 'US'
299 region = get_region_name(seek_region - const.US_OFFSET)
300 elif seek_region < const.WORLD_OFFSET:
301 country_code = 'CA'
302 region = get_region_name(seek_region - const.CANADA_OFFSET)
303 else:
304 index = (seek_region - const.WORLD_OFFSET) // const.FIPS_RANGE
305 if index in const.COUNTRY_CODES:
306 country_code = const.COUNTRY_CODES[index]
307 elif self._databaseType in const.CITY_EDITIONS:
308 rec = self._get_record(ipnum)
309 region = rec.get('region_name', '')
310 country_code = rec.get('country_code', '')
311
312 return {'country_code': country_code, 'region_name': region}
313
314 def _get_record(self, ipnum):
315 """
316 Populate location dict for converted IP.
317
318 @param ipnum: Converted IP address
319 @type ipnum: int
320 @return: dict with country_code, country_code3, country_name,
321 region, city, postal_code, latitude, longitude,
322 dma_code, metro_code, area_code, region_name, time_zone
323 @rtype: dict
324 """
325 seek_country = self._seek_country(ipnum)
326 if seek_country == self._databaseSegments:
327 return {}
328
329 read_length = (2 * self._recordLength - 1) * self._databaseSegments
330 self._lock.acquire()
331 self._filehandle.seek(seek_country + read_length, os.SEEK_SET)
332 buf = self._filehandle.read(const.FULL_RECORD_LENGTH)
333 self._lock.release()
334
335 if PY3 and type(buf) is bytes:
336 buf = buf.decode(ENCODING)
337
338 record = {
339 'dma_code': 0,
340 'area_code': 0,
341 'metro_code': '',
342 'postal_code': ''
343 }
344
345 latitude = 0
346 longitude = 0
347 buf_pos = 0
348
349 # Get country
350 char = ord(buf[buf_pos])
351 record['country_code'] = const.COUNTRY_CODES[char]
352 record['country_code3'] = const.COUNTRY_CODES3[char]
353 record['country_name'] = const.COUNTRY_NAMES[char]
354 record['continent'] = const.CONTINENT_NAMES[char]
355
356 buf_pos += 1
357 def get_data(buf, buf_pos):
358 offset = buf_pos
359 char = ord(buf[offset])
360 while (char != 0):
361 offset += 1
362 char = ord(buf[offset])
363 if offset > buf_pos:
364 return (offset, buf[buf_pos:offset])
365 return (offset, '')
366
367 offset, record['region_name'] = get_data(buf, buf_pos)
368 offset, record['city'] = get_data(buf, offset + 1)
369 offset, record['postal_code'] = get_data(buf, offset + 1)
370 buf_pos = offset + 1
371
372 for j in range(3):
373 char = ord(buf[buf_pos])
374 buf_pos += 1
375 latitude += (char << (j * 8))
376
377 for j in range(3):
378 char = ord(buf[buf_pos])
379 buf_pos += 1
380 longitude += (char << (j * 8))
381
382 record['latitude'] = (latitude / 10000.0) - 180.0
383 record['longitude'] = (longitude / 10000.0) - 180.0
384
385 if self._databaseType in (const.CITY_EDITION_REV1, const.CITY_EDITION_RE V1_V6):
386 dmaarea_combo = 0
387 if record['country_code'] == 'US':
388 for j in range(3):
389 char = ord(buf[buf_pos])
390 dmaarea_combo += (char << (j * 8))
391 buf_pos += 1
392
393 record['dma_code'] = int(math.floor(dmaarea_combo / 1000))
394 record['area_code'] = dmaarea_combo % 1000
395
396 record['metro_code'] = const.DMA_MAP.get(record['dma_code'])
397 params = (record['country_code'], record['region_name'])
398 record['time_zone'] = time_zone_by_country_and_region(*params)
399
400 return record
401
402 def _gethostbyname(self, hostname):
403 if self._databaseType in const.IPV6_EDITIONS:
404 try:
405 response = socket.getaddrinfo(hostname, 0, socket.AF_INET6)
406 family, socktype, proto, canonname, sockaddr = response[0]
407 address, port, flow, scope = sockaddr
408 return address
409 except socket.gaierror:
410 return ''
411 else:
412 return socket.gethostbyname(hostname)
413
414 def id_by_addr(self, addr):
415 """
416 Get the country index.
417 Looks up the index for the country which is the key for
418 the code and name.
419
420 @param addr: The IP address
421 @type addr: str
422 @return: network byte order 32-bit integer
423 @rtype: int
424 """
425 ipnum = util.ip2long(addr)
426 if not ipnum:
427 raise ValueError("Invalid IP address: %s" % addr)
428
429 COUNTY_EDITIONS = (const.COUNTRY_EDITION, const.COUNTRY_EDITION_V6)
430 if self._databaseType not in COUNTY_EDITIONS:
431 message = 'Invalid database type, expected Country'
432 raise GeoIPError(message)
433
434 return self._seek_country(ipnum) - const.COUNTRY_BEGIN
435
436 def country_code_by_addr(self, addr):
437 """
438 Returns 2-letter country code (e.g. 'US') for specified IP address.
439 Use this method if you have a Country, Region, or City database.
440
441 @param addr: IP address
442 @type addr: str
443 @return: 2-letter country code
444 @rtype: str
445 """
446 try:
447 VALID_EDITIONS = (const.COUNTRY_EDITION, const.COUNTRY_EDITION_V6)
448 if self._databaseType in VALID_EDITIONS:
449 ipv = 6 if addr.find(':') >= 0 else 4
450
451 if ipv == 4 and self._databaseType != const.COUNTRY_EDITION:
452 message = 'Invalid database type; expected IPv6 address'
453 raise ValueError(message)
454 if ipv == 6 and self._databaseType != const.COUNTRY_EDITION_V6:
455 message = 'Invalid database type; expected IPv4 address'
456 raise ValueError(message)
457
458 country_id = self.id_by_addr(addr)
459 return const.COUNTRY_CODES[country_id]
460 elif self._databaseType in const.REGION_CITY_EDITIONS:
461 return self.region_by_addr(addr).get('country_code')
462
463 message = 'Invalid database type, expected Country, City or Region'
464 raise GeoIPError(message)
465 except ValueError:
466 raise GeoIPError('Failed to lookup address %s' % addr)
467
468 def country_code_by_name(self, hostname):
469 """
470 Returns 2-letter country code (e.g. 'US') for specified hostname.
471 Use this method if you have a Country, Region, or City database.
472
473 @param hostname: Hostname
474 @type hostname: str
475 @return: 2-letter country code
476 @rtype: str
477 """
478 addr = self._gethostbyname(hostname)
479 return self.country_code_by_addr(addr)
480
481 def country_name_by_addr(self, addr):
482 """
483 Returns full country name for specified IP address.
484 Use this method if you have a Country or City database.
485
486 @param addr: IP address
487 @type addr: str
488 @return: country name
489 @rtype: str
490 """
491 try:
492 VALID_EDITIONS = (const.COUNTRY_EDITION, const.COUNTRY_EDITION_V6)
493 if self._databaseType in VALID_EDITIONS:
494 country_id = self.id_by_addr(addr)
495 return const.COUNTRY_NAMES[country_id]
496 elif self._databaseType in const.CITY_EDITIONS:
497 return self.record_by_addr(addr).get('country_name')
498 else:
499 message = 'Invalid database type, expected Country or City'
500 raise GeoIPError(message)
501 except ValueError:
502 raise GeoIPError('Failed to lookup address %s' % addr)
503
504 def country_name_by_name(self, hostname):
505 """
506 Returns full country name for specified hostname.
507 Use this method if you have a Country database.
508
509 @param hostname: Hostname
510 @type hostname: str
511 @return: country name
512 @rtype: str
513 """
514 addr = self._gethostbyname(hostname)
515 return self.country_name_by_addr(addr)
516
517 def org_by_addr(self, addr):
518 """
519 Lookup Organization, ISP or ASNum for given IP address.
520 Use this method if you have an Organization, ISP or ASNum database.
521
522 @param addr: IP address
523 @type addr: str
524 @return: organization or ISP name
525 @rtype: str
526 """
527 try:
528 ipnum = util.ip2long(addr)
529 if not ipnum:
530 raise ValueError('Invalid IP address')
531
532 valid = (const.ORG_EDITION, const.ISP_EDITION, const.ASNUM_EDITION, const.ASNUM_EDITION_V6)
533 if self._databaseType not in valid:
534 message = 'Invalid database type, expected Org, ISP or ASNum'
535 raise GeoIPError(message)
536
537 return self._get_org(ipnum)
538 except ValueError:
539 raise GeoIPError('Failed to lookup address %s' % addr)
540
541 def org_by_name(self, hostname):
542 """
543 Lookup the organization (or ISP) for hostname.
544 Use this method if you have an Organization/ISP database.
545
546 @param hostname: Hostname
547 @type hostname: str
548 @return: Organization or ISP name
549 @rtype: str
550 """
551 addr = self._gethostbyname(hostname)
552 return self.org_by_addr(addr)
553
554 def record_by_addr(self, addr):
555 """
556 Look up the record for a given IP address.
557 Use this method if you have a City database.
558
559 @param addr: IP address
560 @type addr: str
561 @return: Dictionary with country_code, country_code3, country_name,
562 region, city, postal_code, latitude, longitude, dma_code,
563 metro_code, area_code, region_name, time_zone
564 @rtype: dict
565 """
566 try:
567 ipnum = util.ip2long(addr)
568 if not ipnum:
569 raise ValueError('Invalid IP address')
570
571 if self._databaseType not in const.CITY_EDITIONS:
572 message = 'Invalid database type, expected City'
573 raise GeoIPError(message)
574
575 rec = self._get_record(ipnum)
576 if not rec:
577 return None
578
579 return rec
580 except ValueError:
581 raise GeoIPError('Failed to lookup address %s' % addr)
582
583 def record_by_name(self, hostname):
584 """
585 Look up the record for a given hostname.
586 Use this method if you have a City database.
587
588 @param hostname: Hostname
589 @type hostname: str
590 @return: Dictionary with country_code, country_code3, country_name,
591 region, city, postal_code, latitude, longitude, dma_code,
592 metro_code, area_code, region_name, time_zone
593 @rtype: dict
594 """
595 addr = self._gethostbyname(hostname)
596 return self.record_by_addr(addr)
597
598 def region_by_addr(self, addr):
599 """
600 Lookup the region for given IP address.
601 Use this method if you have a Region database.
602
603 @param addr: IP address
604 @type addr: str
605 @return: Dictionary containing country_code, region and region_name
606 @rtype: dict
607 """
608 try:
609 ipnum = util.ip2long(addr)
610 if not ipnum:
611 raise ValueError('Invalid IP address')
612
613 if self._databaseType not in const.REGION_CITY_EDITIONS:
614 message = 'Invalid database type, expected Region or City'
615 raise GeoIPError(message)
616
617 return self._get_region(ipnum)
618 except ValueError:
619 raise GeoIPError('Failed to lookup address %s' % addr)
620
621 def region_by_name(self, hostname):
622 """
623 Lookup the region for given hostname.
624 Use this method if you have a Region database.
625
626 @param hostname: Hostname
627 @type hostname: str
628 @return: Dictionary containing country_code, region, and region_name
629 @rtype: dict
630 """
631 addr = self._gethostbyname(hostname)
632 return self.region_by_addr(addr)
633
634 def time_zone_by_addr(self, addr):
635 """
636 Look up the time zone for a given IP address.
637 Use this method if you have a Region or City database.
638
639 @param addr: IP address
640 @type addr: str
641 @return: Time zone
642 @rtype: str
643 """
644 try:
645 ipnum = util.ip2long(addr)
646 if not ipnum:
647 raise ValueError('Invalid IP address')
648
649 if self._databaseType not in const.CITY_EDITIONS:
650 message = 'Invalid database type, expected City'
651 raise GeoIPError(message)
652
653 return self._get_record(ipnum).get('time_zone')
654 except ValueError:
655 raise GeoIPError('Failed to lookup address %s' % addr)
656
657 def time_zone_by_name(self, hostname):
658 """
659 Look up the time zone for a given hostname.
660 Use this method if you have a Region or City database.
661
662 @param hostname: Hostname
663 @type hostname: str
664 @return: Time zone
665 @rtype: str
666 """
667 addr = self._gethostbyname(hostname)
668 return self.time_zone_by_addr(addr)
OLDNEW
« no previous file with comments | « pygeoip/README.md ('k') | pygeoip/const.py » ('j') | tox.ini » ('J')

Powered by Google App Engine
This is Rietveld