Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Unified Diff: sitescripts/subscriptions/bin/updateMalwareDomainsList.py

Issue 29338216: Issue 3774 - Support multiple mirrors for the Malware Domains List (Closed)
Patch Set: Addressed review comments 2 Created March 15, 2016, 10:51 p.m.
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « .sitescripts.example ('k') | no next file » | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: sitescripts/subscriptions/bin/updateMalwareDomainsList.py
===================================================================
--- a/sitescripts/subscriptions/bin/updateMalwareDomainsList.py
+++ b/sitescripts/subscriptions/bin/updateMalwareDomainsList.py
@@ -10,38 +10,72 @@
# Adblock Plus is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.
-import os, subprocess, codecs, urllib, zipfile, tempfile, shutil
+import os, subprocess, codecs, urllib2, zipfile, tempfile, shutil, sys
+import ConfigParser
from StringIO import StringIO
from sitescripts.utils import get_config
+
+FILTERLIST_HEADER = '''[Adblock Plus 1.1]
+! This is a list of malware domains generated from malwaredomains.com data.
+! Homepage: http://malwaredomains.com/?page_id=2
+! Last modified: %timestamp%
+! Expires: 1d
+!'''
+
+DEFAULT_MIRRORS_LIST = [
+ 'http://mirror3.malwaredomains.com',
+ 'http://mirror1.malwaredomains.com',
+ 'http://mirror2.malwaredomains.com'
+]
+
+MALWAREDOMAINS_PATH = '/files/justdomains.zip'
+
+
+def try_mirror(mirror):
+ try:
+ response = urllib2.urlopen(mirror + MALWAREDOMAINS_PATH)
+ return response.read()
+ except urllib2.HTTPError:
+ return None
+
+
if __name__ == '__main__':
- repository = get_config().get('subscriptionDownloads', 'malwaredomains_repository')
+ config = get_config()
+ repository = config.get('subscriptionDownloads', 'malwaredomains_repository')
+ try:
+ mirrors = config.get('subscriptionDownloads', 'malwaredomains_mirrors')
+ mirrors_list = mirrors.split()
+ except ConfigParser.NoOptionError:
+ mirrors_list = DEFAULT_MIRRORS_LIST
+
tempdir = tempfile.mkdtemp(prefix='malwaredomains')
try:
subprocess.check_call(['hg', '-q', 'clone', '-U', repository, tempdir])
subprocess.check_call(['hg', '-q', 'up', '-R', tempdir, '-r', 'default'])
path = os.path.join(tempdir, 'malwaredomains_full.txt')
file = codecs.open(path, 'wb', encoding='utf-8')
- print >>file, '''[Adblock Plus 1.1]
-! This is a list of malware domains generated from malwaredomains.com data.
-! Homepage: http://malwaredomains.com/?page_id=2
-! Last modified: %timestamp%
-! Expires: 1d
-!'''
+ print >>file, FILTERLIST_HEADER
- data = urllib.urlopen('http://mirror3.malwaredomains.com/files/justdomains.zip').read()
+ for mirror in mirrors_list:
+ data = try_mirror(mirror)
+ if data is not None:
+ break
+ else:
+ sys.exit('Unable to fetch malware domains list.')
+
zip = zipfile.ZipFile(StringIO(data), 'r')
info = zip.infolist()[0]
for line in str(zip.read(info.filename)).splitlines():
domain = line.strip()
if not domain:
continue
print >>file, '||%s^' % domain.decode('idna')
« no previous file with comments | « .sitescripts.example ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld