Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Unified Diff: sitescripts/subscriptions/bin/updateMalwareDomainsList.py

Issue 29370984: Fixes 4784 - Improve error reporting in updateMalwareDomainsList and add tests (Closed)
Patch Set: Improve the error handling flow in try_mirror Created Jan. 20, 2017, 1:43 p.m.
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « no previous file | sitescripts/subscriptions/test/test_updateMalwareDomainsList.py » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: sitescripts/subscriptions/bin/updateMalwareDomainsList.py
===================================================================
--- a/sitescripts/subscriptions/bin/updateMalwareDomainsList.py
+++ b/sitescripts/subscriptions/bin/updateMalwareDomainsList.py
@@ -11,16 +11,17 @@
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.
import os
import subprocess
import codecs
+import contextlib
import urllib2
import zipfile
import tempfile
import shutil
import sys
from StringIO import StringIO
from sitescripts.utils import get_config
@@ -31,53 +32,63 @@
! Last modified: %timestamp%
! Expires: 1d
!'''
MALWAREDOMAINS_PATH = '/files/justdomains.zip'
def try_mirror(mirror):
+ url = mirror + MALWAREDOMAINS_PATH
try:
- response = urllib2.urlopen(mirror + MALWAREDOMAINS_PATH)
- return response.read()
- except urllib2.HTTPError:
- return None
+ with contextlib.closing(urllib2.urlopen(url)) as response:
+ return None, response.read()
+ except urllib2.HTTPError as exc:
+ exc.close()
+ except urllib2.URLError as exc:
+ pass
+ return 'Failed to fetch {}: {}'.format(url, exc), None
-if __name__ == '__main__':
+def main():
config = get_config()
section = 'subscriptionDownloads'
repository = config.get(section, 'malwaredomains_repository')
mirrors = config.get(section, 'malwaredomains_mirrors').split()
tempdir = tempfile.mkdtemp(prefix='malwaredomains')
try:
subprocess.check_call(['hg', '-q', 'clone', '-U', repository, tempdir])
subprocess.check_call(['hg', '-q', 'up', '-R', tempdir, '-r', 'default'])
path = os.path.join(tempdir, 'malwaredomains_full.txt')
file = codecs.open(path, 'wb', encoding='utf-8')
print >>file, FILTERLIST_HEADER
+ error_report = ['Unable to fetch malware domains list', 'Errors:']
for mirror in mirrors:
- data = try_mirror(mirror)
+ error_message, data = try_mirror(mirror)
if data is not None:
break
+ error_report.append(error_message)
else:
- sys.exit('Unable to fetch malware domains list.')
+ sys.exit('\n'.join(error_report))
- zip = zipfile.ZipFile(StringIO(data), 'r')
- info = zip.infolist()[0]
- for line in str(zip.read(info.filename)).splitlines():
+ zf = zipfile.ZipFile(StringIO(data), 'r')
+ info = zf.infolist()[0]
+ for line in str(zf.read(info.filename)).splitlines():
domain = line.strip()
if not domain:
continue
print >>file, '||%s^' % domain.decode('idna')
file.close()
if subprocess.check_output(['hg', 'stat', '-R', tempdir]) != '':
subprocess.check_call(['hg', '-q', 'commit', '-R', tempdir, '-A', '-u', 'hgbot', '-m', 'Updated malwaredomains.com data'])
subprocess.check_call(['hg', '-q', 'push', '-R', tempdir])
finally:
shutil.rmtree(tempdir, ignore_errors=True)
+
+
+if __name__ == '__main__':
+ main()
« no previous file with comments | « no previous file | sitescripts/subscriptions/test/test_updateMalwareDomainsList.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld