Index: sitescripts/subscriptions/bin/updateMalwareDomainsList.py |
=================================================================== |
--- a/sitescripts/subscriptions/bin/updateMalwareDomainsList.py |
+++ b/sitescripts/subscriptions/bin/updateMalwareDomainsList.py |
@@ -10,38 +10,72 @@ |
# Adblock Plus is distributed in the hope that it will be useful, |
# but WITHOUT ANY WARRANTY; without even the implied warranty of |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
# GNU General Public License for more details. |
# |
# You should have received a copy of the GNU General Public License |
# along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. |
-import os, subprocess, codecs, urllib, zipfile, tempfile, shutil |
+import os, subprocess, codecs, urllib2, zipfile, tempfile, shutil, sys |
+import ConfigParser |
from StringIO import StringIO |
from sitescripts.utils import get_config |
+ |
+FILTERLIST_HEADER = '''[Adblock Plus 1.1] |
+! This is a list of malware domains generated from malwaredomains.com data. |
+! Homepage: http://malwaredomains.com/?page_id=2 |
+! Last modified: %timestamp% |
+! Expires: 1d |
+!''' |
+ |
+DEFAULT_MIRRORS_LIST = [ |
+ 'http://mirror3.malwaredomains.com', |
+ 'http://mirror1.malwaredomains.com', |
+ 'http://mirror2.malwaredomains.com' |
+] |
+ |
+MALWAREDOMAINS_PATH = '/files/justdomains.zip' |
+ |
+ |
+def try_mirror(mirror): |
+ try: |
+ response = urllib2.urlopen(mirror + MALWAREDOMAINS_PATH) |
+ return response.read() |
+ except urllib2.HTTPError: |
+ return None |
+ |
+ |
if __name__ == '__main__': |
- repository = get_config().get('subscriptionDownloads', 'malwaredomains_repository') |
+ config = get_config() |
+ repository = config.get('subscriptionDownloads', 'malwaredomains_repository') |
+ try: |
+ mirrors = config.get('subscriptionDownloads', 'malwaredomains_mirrors') |
+ mirrors_list = mirrors.split() |
+ except ConfigParser.NoOptionError: |
+ mirrors_list = DEFAULT_MIRRORS_LIST |
+ |
tempdir = tempfile.mkdtemp(prefix='malwaredomains') |
try: |
subprocess.check_call(['hg', '-q', 'clone', '-U', repository, tempdir]) |
subprocess.check_call(['hg', '-q', 'up', '-R', tempdir, '-r', 'default']) |
path = os.path.join(tempdir, 'malwaredomains_full.txt') |
file = codecs.open(path, 'wb', encoding='utf-8') |
- print >>file, '''[Adblock Plus 1.1] |
-! This is a list of malware domains generated from malwaredomains.com data. |
-! Homepage: http://malwaredomains.com/?page_id=2 |
-! Last modified: %timestamp% |
-! Expires: 1d |
-!''' |
+ print >>file, FILTERLIST_HEADER |
- data = urllib.urlopen('http://mirror3.malwaredomains.com/files/justdomains.zip').read() |
+ for mirror in mirrors_list: |
+ data = try_mirror(mirror) |
+ if data is not None: |
+ break |
+ else: |
+ sys.exit('Unable to fetch malware domains list.') |
+ |
zip = zipfile.ZipFile(StringIO(data), 'r') |
info = zip.infolist()[0] |
for line in str(zip.read(info.filename)).splitlines(): |
domain = line.strip() |
if not domain: |
continue |
print >>file, '||%s^' % domain.decode('idna') |