Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Side by Side Diff: sitescripts/subscriptions/bin/updateMalwareDomainsList.py

Issue 29338412: Issue 3810 - Remove hardcoded default mirror list from Malware Domains List conversion script (Closed)
Patch Set: Rename mirror_list to mirrors. Created March 16, 2016, 11:56 a.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 # coding: utf-8 1 # coding: utf-8
2 2
3 # This file is part of the Adblock Plus web scripts, 3 # This file is part of the Adblock Plus web scripts,
4 # Copyright (C) 2006-2016 Eyeo GmbH 4 # Copyright (C) 2006-2016 Eyeo GmbH
5 # 5 #
6 # Adblock Plus is free software: you can redistribute it and/or modify 6 # Adblock Plus is free software: you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License version 3 as 7 # it under the terms of the GNU General Public License version 3 as
8 # published by the Free Software Foundation. 8 # published by the Free Software Foundation.
9 # 9 #
10 # Adblock Plus is distributed in the hope that it will be useful, 10 # Adblock Plus is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of 11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details. 13 # GNU General Public License for more details.
14 # 14 #
15 # You should have received a copy of the GNU General Public License 15 # You should have received a copy of the GNU General Public License
16 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. 16 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.
17 17
18 import os, subprocess, codecs, urllib2, zipfile, tempfile, shutil, sys 18 import os, subprocess, codecs, urllib2, zipfile, tempfile, shutil, sys
19 import ConfigParser
20 from StringIO import StringIO 19 from StringIO import StringIO
21 from sitescripts.utils import get_config 20 from sitescripts.utils import get_config
22 21
23 22
24 FILTERLIST_HEADER = '''[Adblock Plus 1.1] 23 FILTERLIST_HEADER = '''[Adblock Plus 1.1]
25 ! This is a list of malware domains generated from malwaredomains.com data. 24 ! This is a list of malware domains generated from malwaredomains.com data.
26 ! Homepage: http://malwaredomains.com/?page_id=2 25 ! Homepage: http://malwaredomains.com/?page_id=2
27 ! Last modified: %timestamp% 26 ! Last modified: %timestamp%
28 ! Expires: 1d 27 ! Expires: 1d
29 !''' 28 !'''
30 29
31 DEFAULT_MIRRORS_LIST = [
32 'http://mirror3.malwaredomains.com',
33 'http://mirror1.malwaredomains.com',
34 'http://mirror2.malwaredomains.com'
35 ]
36
37 MALWAREDOMAINS_PATH = '/files/justdomains.zip' 30 MALWAREDOMAINS_PATH = '/files/justdomains.zip'
38 31
39 32
40 def try_mirror(mirror): 33 def try_mirror(mirror):
41 try: 34 try:
42 response = urllib2.urlopen(mirror + MALWAREDOMAINS_PATH) 35 response = urllib2.urlopen(mirror + MALWAREDOMAINS_PATH)
43 return response.read() 36 return response.read()
44 except urllib2.HTTPError: 37 except urllib2.HTTPError:
45 return None 38 return None
46 39
47 40
48 if __name__ == '__main__': 41 if __name__ == '__main__':
49 config = get_config() 42 config = get_config()
50 repository = config.get('subscriptionDownloads', 'malwaredomains_repository') 43 section = 'subscriptionDownloads'
51 try: 44 repository = config.get(section, 'malwaredomains_repository')
52 mirrors = config.get('subscriptionDownloads', 'malwaredomains_mirrors') 45 mirrors = config.get(section, 'malwaredomains_mirrors').split()
53 mirrors_list = mirrors.split()
54 except ConfigParser.NoOptionError:
55 mirrors_list = DEFAULT_MIRRORS_LIST
56 46
57 tempdir = tempfile.mkdtemp(prefix='malwaredomains') 47 tempdir = tempfile.mkdtemp(prefix='malwaredomains')
58 try: 48 try:
59 subprocess.check_call(['hg', '-q', 'clone', '-U', repository, tempdir]) 49 subprocess.check_call(['hg', '-q', 'clone', '-U', repository, tempdir])
60 subprocess.check_call(['hg', '-q', 'up', '-R', tempdir, '-r', 'default']) 50 subprocess.check_call(['hg', '-q', 'up', '-R', tempdir, '-r', 'default'])
61 51
62 path = os.path.join(tempdir, 'malwaredomains_full.txt') 52 path = os.path.join(tempdir, 'malwaredomains_full.txt')
63 file = codecs.open(path, 'wb', encoding='utf-8') 53 file = codecs.open(path, 'wb', encoding='utf-8')
64 54
65 print >>file, FILTERLIST_HEADER 55 print >>file, FILTERLIST_HEADER
66 56
67 for mirror in mirrors_list: 57 for mirror in mirrors:
68 data = try_mirror(mirror) 58 data = try_mirror(mirror)
69 if data is not None: 59 if data is not None:
70 break 60 break
71 else: 61 else:
72 sys.exit('Unable to fetch malware domains list.') 62 sys.exit('Unable to fetch malware domains list.')
73 63
74 zip = zipfile.ZipFile(StringIO(data), 'r') 64 zip = zipfile.ZipFile(StringIO(data), 'r')
75 info = zip.infolist()[0] 65 info = zip.infolist()[0]
76 for line in str(zip.read(info.filename)).splitlines(): 66 for line in str(zip.read(info.filename)).splitlines():
77 domain = line.strip() 67 domain = line.strip()
78 if not domain: 68 if not domain:
79 continue 69 continue
80 70
81 print >>file, '||%s^' % domain.decode('idna') 71 print >>file, '||%s^' % domain.decode('idna')
82 file.close(); 72 file.close();
83 73
84 if subprocess.check_output(['hg', 'stat', '-R', tempdir]) != '': 74 if subprocess.check_output(['hg', 'stat', '-R', tempdir]) != '':
85 subprocess.check_call(['hg', '-q', 'commit', '-R', tempdir, '-A', '-u', 'h gbot', '-m', 'Updated malwaredomains.com data']) 75 subprocess.check_call(['hg', '-q', 'commit', '-R', tempdir, '-A', '-u', 'h gbot', '-m', 'Updated malwaredomains.com data'])
86 subprocess.check_call(['hg', '-q', 'push', '-R', tempdir]) 76 subprocess.check_call(['hg', '-q', 'push', '-R', tempdir])
87 finally: 77 finally:
88 shutil.rmtree(tempdir, ignore_errors=True) 78 shutil.rmtree(tempdir, ignore_errors=True)
OLDNEW
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld