Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Side by Side Diff: sitescripts/subscriptions/bin/updateMalwareDomainsList.py

Issue 29370984: Fixes 4784 - Improve error reporting in updateMalwareDomainsList and add tests (Closed)
Patch Set: Created Jan. 10, 2017, 6:08 p.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | sitescripts/subscriptions/test/test_updateMalwareDomainsList.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 # This file is part of the Adblock Plus web scripts, 1 # This file is part of the Adblock Plus web scripts,
2 # Copyright (C) 2006-2016 Eyeo GmbH 2 # Copyright (C) 2006-2016 Eyeo GmbH
3 # 3 #
4 # Adblock Plus is free software: you can redistribute it and/or modify 4 # Adblock Plus is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU General Public License version 3 as 5 # it under the terms of the GNU General Public License version 3 as
6 # published by the Free Software Foundation. 6 # published by the Free Software Foundation.
7 # 7 #
8 # Adblock Plus is distributed in the hope that it will be useful, 8 # Adblock Plus is distributed in the hope that it will be useful,
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of 9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
(...skipping 19 matching lines...) Expand all
30 ! Homepage: http://malwaredomains.com/?page_id=2 30 ! Homepage: http://malwaredomains.com/?page_id=2
31 ! Last modified: %timestamp% 31 ! Last modified: %timestamp%
32 ! Expires: 1d 32 ! Expires: 1d
33 !''' 33 !'''
34 34
35 MALWAREDOMAINS_PATH = '/files/justdomains.zip' 35 MALWAREDOMAINS_PATH = '/files/justdomains.zip'
36 36
37 37
38 def try_mirror(mirror): 38 def try_mirror(mirror):
39 try: 39 try:
40 response = urllib2.urlopen(mirror + MALWAREDOMAINS_PATH) 40 url = mirror + MALWAREDOMAINS_PATH
Sebastian Noack 2017/01/19 09:01:29 This line doesn't seem to belong in to the try-blo
Vasily Kuznetsov 2017/01/19 16:34:08 Done.
41 return response.read() 41 response = urllib2.urlopen(url)
Sebastian Noack 2017/01/19 09:01:29 I think we should explicitly close the response.
Jon Sonesen 2017/01/19 09:31:24 Could this be resolved by using a 'with' statement
Sebastian Noack 2017/01/19 09:48:18 Yes, this should be implemented either using try-f
Jon Sonesen 2017/01/19 09:55:01 Thanks for elaborating on those points :)
Vasily Kuznetsov 2017/01/19 16:34:08 Done.
42 except urllib2.HTTPError: 42 return None, response.read()
43 return None 43 except urllib2.URLError as exc:
Sebastian Noack 2017/01/19 09:01:29 Note that HTTPError (a subclass of URLError) is al
Vasily Kuznetsov 2017/01/19 16:34:08 Done.
44 error_message = 'Failed to fetch {}: {}'.format(url, exc)
Jon Sonesen 2017/01/18 17:04:48 Perhaps it is more useful to you the reason attrib
Vasily Kuznetsov 2017/01/18 18:46:35 We might also catch subclasses of URLError here. B
45 return error_message, None
44 46
45 47
46 if __name__ == '__main__': 48 def main():
47 config = get_config() 49 config = get_config()
48 section = 'subscriptionDownloads' 50 section = 'subscriptionDownloads'
49 repository = config.get(section, 'malwaredomains_repository') 51 repository = config.get(section, 'malwaredomains_repository')
50 mirrors = config.get(section, 'malwaredomains_mirrors').split() 52 mirrors = config.get(section, 'malwaredomains_mirrors').split()
51 53
52 tempdir = tempfile.mkdtemp(prefix='malwaredomains') 54 tempdir = tempfile.mkdtemp(prefix='malwaredomains')
53 try: 55 try:
54 subprocess.check_call(['hg', '-q', 'clone', '-U', repository, tempdir]) 56 subprocess.check_call(['hg', '-q', 'clone', '-U', repository, tempdir])
55 subprocess.check_call(['hg', '-q', 'up', '-R', tempdir, '-r', 'default'] ) 57 subprocess.check_call(['hg', '-q', 'up', '-R', tempdir, '-r', 'default'] )
56 58
57 path = os.path.join(tempdir, 'malwaredomains_full.txt') 59 path = os.path.join(tempdir, 'malwaredomains_full.txt')
58 file = codecs.open(path, 'wb', encoding='utf-8') 60 file = codecs.open(path, 'wb', encoding='utf-8')
59 61
60 print >>file, FILTERLIST_HEADER 62 print >>file, FILTERLIST_HEADER
61 63
64 error_report = ['Unable to fetch malware domains list', 'Errors:']
62 for mirror in mirrors: 65 for mirror in mirrors:
63 data = try_mirror(mirror) 66 error_message, data = try_mirror(mirror)
64 if data is not None: 67 if data is not None:
65 break 68 break
69 error_report.append(error_message)
66 else: 70 else:
67 sys.exit('Unable to fetch malware domains list.') 71 sys.exit('\n'.join(error_report))
68 72
69 zip = zipfile.ZipFile(StringIO(data), 'r') 73 zf = zipfile.ZipFile(StringIO(data), 'r')
Vasily Kuznetsov 2017/01/10 18:18:29 These three lines are an unrelated change. Flake8
70 info = zip.infolist()[0] 74 info = zf.infolist()[0]
71 for line in str(zip.read(info.filename)).splitlines(): 75 for line in str(zf.read(info.filename)).splitlines():
72 domain = line.strip() 76 domain = line.strip()
73 if not domain: 77 if not domain:
74 continue 78 continue
75 79
76 print >>file, '||%s^' % domain.decode('idna') 80 print >>file, '||%s^' % domain.decode('idna')
77 file.close() 81 file.close()
78 82
79 if subprocess.check_output(['hg', 'stat', '-R', tempdir]) != '': 83 if subprocess.check_output(['hg', 'stat', '-R', tempdir]) != '':
80 subprocess.check_call(['hg', '-q', 'commit', '-R', tempdir, '-A', '- u', 'hgbot', '-m', 'Updated malwaredomains.com data']) 84 subprocess.check_call(['hg', '-q', 'commit', '-R', tempdir, '-A', '- u', 'hgbot', '-m', 'Updated malwaredomains.com data'])
81 subprocess.check_call(['hg', '-q', 'push', '-R', tempdir]) 85 subprocess.check_call(['hg', '-q', 'push', '-R', tempdir])
82 finally: 86 finally:
83 shutil.rmtree(tempdir, ignore_errors=True) 87 shutil.rmtree(tempdir, ignore_errors=True)
88
89
90 if __name__ == '__main__':
91 main()
OLDNEW
« no previous file with comments | « no previous file | sitescripts/subscriptions/test/test_updateMalwareDomainsList.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld