Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Side by Side Diff: sitescripts/subscriptions/bin/updateMalwareDomainsList.py

Issue 29370984: Fixes 4784 - Improve error reporting in updateMalwareDomainsList and add tests (Closed)
Patch Set: Improve the error handling flow in try_mirror Created Jan. 20, 2017, 1:43 p.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | sitescripts/subscriptions/test/test_updateMalwareDomainsList.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 # This file is part of the Adblock Plus web scripts, 1 # This file is part of the Adblock Plus web scripts,
2 # Copyright (C) 2006-2016 Eyeo GmbH 2 # Copyright (C) 2006-2016 Eyeo GmbH
3 # 3 #
4 # Adblock Plus is free software: you can redistribute it and/or modify 4 # Adblock Plus is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU General Public License version 3 as 5 # it under the terms of the GNU General Public License version 3 as
6 # published by the Free Software Foundation. 6 # published by the Free Software Foundation.
7 # 7 #
8 # Adblock Plus is distributed in the hope that it will be useful, 8 # Adblock Plus is distributed in the hope that it will be useful,
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of 9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # GNU General Public License for more details. 11 # GNU General Public License for more details.
12 # 12 #
13 # You should have received a copy of the GNU General Public License 13 # You should have received a copy of the GNU General Public License
14 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. 14 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.
15 15
16 import os 16 import os
17 import subprocess 17 import subprocess
18 import codecs 18 import codecs
19 import contextlib
19 import urllib2 20 import urllib2
20 import zipfile 21 import zipfile
21 import tempfile 22 import tempfile
22 import shutil 23 import shutil
23 import sys 24 import sys
24 from StringIO import StringIO 25 from StringIO import StringIO
25 from sitescripts.utils import get_config 26 from sitescripts.utils import get_config
26 27
27 28
28 FILTERLIST_HEADER = '''[Adblock Plus 1.1] 29 FILTERLIST_HEADER = '''[Adblock Plus 1.1]
29 ! This is a list of malware domains generated from malwaredomains.com data. 30 ! This is a list of malware domains generated from malwaredomains.com data.
30 ! Homepage: http://malwaredomains.com/?page_id=2 31 ! Homepage: http://malwaredomains.com/?page_id=2
31 ! Last modified: %timestamp% 32 ! Last modified: %timestamp%
32 ! Expires: 1d 33 ! Expires: 1d
33 !''' 34 !'''
34 35
35 MALWAREDOMAINS_PATH = '/files/justdomains.zip' 36 MALWAREDOMAINS_PATH = '/files/justdomains.zip'
36 37
37 38
38 def try_mirror(mirror): 39 def try_mirror(mirror):
40 url = mirror + MALWAREDOMAINS_PATH
39 try: 41 try:
40 response = urllib2.urlopen(mirror + MALWAREDOMAINS_PATH) 42 with contextlib.closing(urllib2.urlopen(url)) as response:
41 return response.read() 43 return None, response.read()
42 except urllib2.HTTPError: 44 except urllib2.HTTPError as exc:
43 return None 45 exc.close()
46 except urllib2.URLError as exc:
47 pass
48 return 'Failed to fetch {}: {}'.format(url, exc), None
44 49
45 50
46 if __name__ == '__main__': 51 def main():
47 config = get_config() 52 config = get_config()
48 section = 'subscriptionDownloads' 53 section = 'subscriptionDownloads'
49 repository = config.get(section, 'malwaredomains_repository') 54 repository = config.get(section, 'malwaredomains_repository')
50 mirrors = config.get(section, 'malwaredomains_mirrors').split() 55 mirrors = config.get(section, 'malwaredomains_mirrors').split()
51 56
52 tempdir = tempfile.mkdtemp(prefix='malwaredomains') 57 tempdir = tempfile.mkdtemp(prefix='malwaredomains')
53 try: 58 try:
54 subprocess.check_call(['hg', '-q', 'clone', '-U', repository, tempdir]) 59 subprocess.check_call(['hg', '-q', 'clone', '-U', repository, tempdir])
55 subprocess.check_call(['hg', '-q', 'up', '-R', tempdir, '-r', 'default'] ) 60 subprocess.check_call(['hg', '-q', 'up', '-R', tempdir, '-r', 'default'] )
56 61
57 path = os.path.join(tempdir, 'malwaredomains_full.txt') 62 path = os.path.join(tempdir, 'malwaredomains_full.txt')
58 file = codecs.open(path, 'wb', encoding='utf-8') 63 file = codecs.open(path, 'wb', encoding='utf-8')
59 64
60 print >>file, FILTERLIST_HEADER 65 print >>file, FILTERLIST_HEADER
61 66
67 error_report = ['Unable to fetch malware domains list', 'Errors:']
62 for mirror in mirrors: 68 for mirror in mirrors:
63 data = try_mirror(mirror) 69 error_message, data = try_mirror(mirror)
64 if data is not None: 70 if data is not None:
65 break 71 break
72 error_report.append(error_message)
66 else: 73 else:
67 sys.exit('Unable to fetch malware domains list.') 74 sys.exit('\n'.join(error_report))
68 75
69 zip = zipfile.ZipFile(StringIO(data), 'r') 76 zf = zipfile.ZipFile(StringIO(data), 'r')
70 info = zip.infolist()[0] 77 info = zf.infolist()[0]
71 for line in str(zip.read(info.filename)).splitlines(): 78 for line in str(zf.read(info.filename)).splitlines():
72 domain = line.strip() 79 domain = line.strip()
73 if not domain: 80 if not domain:
74 continue 81 continue
75 82
76 print >>file, '||%s^' % domain.decode('idna') 83 print >>file, '||%s^' % domain.decode('idna')
77 file.close() 84 file.close()
78 85
79 if subprocess.check_output(['hg', 'stat', '-R', tempdir]) != '': 86 if subprocess.check_output(['hg', 'stat', '-R', tempdir]) != '':
80 subprocess.check_call(['hg', '-q', 'commit', '-R', tempdir, '-A', '- u', 'hgbot', '-m', 'Updated malwaredomains.com data']) 87 subprocess.check_call(['hg', '-q', 'commit', '-R', tempdir, '-A', '- u', 'hgbot', '-m', 'Updated malwaredomains.com data'])
81 subprocess.check_call(['hg', '-q', 'push', '-R', tempdir]) 88 subprocess.check_call(['hg', '-q', 'push', '-R', tempdir])
82 finally: 89 finally:
83 shutil.rmtree(tempdir, ignore_errors=True) 90 shutil.rmtree(tempdir, ignore_errors=True)
91
92
93 if __name__ == '__main__':
94 main()
OLDNEW
« no previous file with comments | « no previous file | sitescripts/subscriptions/test/test_updateMalwareDomainsList.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld