Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Delta Between Two Patch Sets: sitescripts/subscriptions/bin/updateMalwareDomainsList.py

Issue 29338216: Issue 3774 - Support multiple mirrors for the Malware Domains List (Closed)
Left Patch Set: Addressed review comments Created March 14, 2016, 2:45 p.m.
Right Patch Set: Addressed review comments 2 Created March 15, 2016, 10:51 p.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
Left: Side by side diff | Download
Right: Side by side diff | Download
« no previous file with change/comment | « .sitescripts.example ('k') | no next file » | no next file with change/comment »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
LEFTRIGHT
1 # coding: utf-8 1 # coding: utf-8
2 2
3 # This file is part of the Adblock Plus web scripts, 3 # This file is part of the Adblock Plus web scripts,
4 # Copyright (C) 2006-2016 Eyeo GmbH 4 # Copyright (C) 2006-2016 Eyeo GmbH
5 # 5 #
6 # Adblock Plus is free software: you can redistribute it and/or modify 6 # Adblock Plus is free software: you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License version 3 as 7 # it under the terms of the GNU General Public License version 3 as
8 # published by the Free Software Foundation. 8 # published by the Free Software Foundation.
9 # 9 #
10 # Adblock Plus is distributed in the hope that it will be useful, 10 # Adblock Plus is distributed in the hope that it will be useful,
(...skipping 19 matching lines...) Expand all
30 30
31 DEFAULT_MIRRORS_LIST = [ 31 DEFAULT_MIRRORS_LIST = [
32 'http://mirror3.malwaredomains.com', 32 'http://mirror3.malwaredomains.com',
33 'http://mirror1.malwaredomains.com', 33 'http://mirror1.malwaredomains.com',
34 'http://mirror2.malwaredomains.com' 34 'http://mirror2.malwaredomains.com'
35 ] 35 ]
36 36
37 MALWAREDOMAINS_PATH = '/files/justdomains.zip' 37 MALWAREDOMAINS_PATH = '/files/justdomains.zip'
38 38
39 39
40 def try_mirror(mirror, path): 40 def try_mirror(mirror):
41 try: 41 try:
42 response = urllib2.urlopen(mirror + path) 42 response = urllib2.urlopen(mirror + MALWAREDOMAINS_PATH)
43 return response.read() 43 return response.read()
44 except urllib2.HTTPError: 44 except urllib2.HTTPError:
45 return None 45 return None
46 46
47 47
48 if __name__ == '__main__': 48 if __name__ == '__main__':
49 config = get_config() 49 config = get_config()
50 repository = config.get('subscriptionDownloads', 'malwaredomains_repository') 50 repository = config.get('subscriptionDownloads', 'malwaredomains_repository')
51 try: 51 try:
52 mirrors = config.get('subscriptionDownloads', 'malwaredomains_mirrors') 52 mirrors = config.get('subscriptionDownloads', 'malwaredomains_mirrors')
53 mirrors_list = filter(None, [mirror.strip() for mirror in mirrors.split()]) 53 mirrors_list = mirrors.split()
54 except ConfigParser.NoOptionError: 54 except ConfigParser.NoOptionError:
55 mirrors_list = DEFAULT_MIRRORS_LIST 55 mirrors_list = DEFAULT_MIRRORS_LIST
56 56
57 tempdir = tempfile.mkdtemp(prefix='malwaredomains') 57 tempdir = tempfile.mkdtemp(prefix='malwaredomains')
58 try: 58 try:
59 subprocess.check_call(['hg', '-q', 'clone', '-U', repository, tempdir]) 59 subprocess.check_call(['hg', '-q', 'clone', '-U', repository, tempdir])
60 subprocess.check_call(['hg', '-q', 'up', '-R', tempdir, '-r', 'default']) 60 subprocess.check_call(['hg', '-q', 'up', '-R', tempdir, '-r', 'default'])
61 61
62 path = os.path.join(tempdir, 'malwaredomains_full.txt') 62 path = os.path.join(tempdir, 'malwaredomains_full.txt')
63 file = codecs.open(path, 'wb', encoding='utf-8') 63 file = codecs.open(path, 'wb', encoding='utf-8')
64 64
65 print >>file, FILTERLIST_HEADER 65 print >>file, FILTERLIST_HEADER
66 66
67 for mirror in mirrors_list: 67 for mirror in mirrors_list:
68 data = try_mirror(mirror, MALWAREDOMAINS_PATH) 68 data = try_mirror(mirror)
Sebastian Noack 2016/03/14 15:40:12 Since MALWAREDOMAINS_PATH is a global anyway, ther
Vasily Kuznetsov 2016/03/14 16:20:38 Yeah, I was thinking about it. I guess try_mirror
69 if data is not None: 69 if data is not None:
70 break 70 break
71 else: 71 else:
72 print >>sys.stderr, 'Unable to fetch malware domains list.' 72 sys.exit('Unable to fetch malware domains list.')
73 sys.exit(1)
74 73
75 zip = zipfile.ZipFile(StringIO(data), 'r') 74 zip = zipfile.ZipFile(StringIO(data), 'r')
76 info = zip.infolist()[0] 75 info = zip.infolist()[0]
77 for line in str(zip.read(info.filename)).splitlines(): 76 for line in str(zip.read(info.filename)).splitlines():
78 domain = line.strip() 77 domain = line.strip()
79 if not domain: 78 if not domain:
80 continue 79 continue
81 80
82 print >>file, '||%s^' % domain.decode('idna') 81 print >>file, '||%s^' % domain.decode('idna')
83 file.close(); 82 file.close();
84 83
85 if subprocess.check_output(['hg', 'stat', '-R', tempdir]) != '': 84 if subprocess.check_output(['hg', 'stat', '-R', tempdir]) != '':
86 subprocess.check_call(['hg', '-q', 'commit', '-R', tempdir, '-A', '-u', 'h gbot', '-m', 'Updated malwaredomains.com data']) 85 subprocess.check_call(['hg', '-q', 'commit', '-R', tempdir, '-A', '-u', 'h gbot', '-m', 'Updated malwaredomains.com data'])
87 subprocess.check_call(['hg', '-q', 'push', '-R', tempdir]) 86 subprocess.check_call(['hg', '-q', 'push', '-R', tempdir])
88 finally: 87 finally:
89 shutil.rmtree(tempdir, ignore_errors=True) 88 shutil.rmtree(tempdir, ignore_errors=True)
LEFTRIGHT

Powered by Google App Engine
This is Rietveld