sitescripts/subscriptions/bin/updateMalwareDomainsList.py - Issue 29338216: Issue 3774 - Support multiple mirrors for the Malware Domains List

Unified Diff: sitescripts/subscriptions/bin/updateMalwareDomainsList.py

Issue 29338216: Issue 3774 - Support multiple mirrors for the Malware Domains List (Closed)

Patch Set: Created March 14, 2016, 1:23 p.m.

Use n/p to move between diff chunks; N/P to move between comments.

Jump to:

View side-by-side diff with in-line comments

Index: sitescripts/subscriptions/bin/updateMalwareDomainsList.py

===================================================================

--- a/sitescripts/subscriptions/bin/updateMalwareDomainsList.py

+++ b/sitescripts/subscriptions/bin/updateMalwareDomainsList.py

@@ -10,38 +10,80 @@

# Adblock Plus is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License

# along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.

-import os, subprocess, codecs, urllib, zipfile, tempfile, shutil

+import os, subprocess, codecs, urllib2, zipfile, tempfile, shutil, sys

+import ConfigParser

from StringIO import StringIO

from sitescripts.utils import get_config

+filterlist_header = '''[Adblock Plus 1.1]

+! This is a list of malware domains generated from malwaredomains.com data.

+! Homepage: http://malwaredomains.com/?page_id=2

+! Last modified: %timestamp%

+! Expires: 1d

+!'''

+# Default value for malwaredomains_mirrors in the config.

+# Equivalent to the following in the config:

+# malwaredomains_mirrors=

Sebastian Noack 2016/03/14 13:36:07 Example configuration should be documented in .sit

Vasily Kuznetsov 2016/03/14 14:18:12 Acknowledged.

Vasily Kuznetsov 2016/03/14 14:47:58 Done.

+# http://mirror3.malwaredomains.com

+# http://mirror1.malwaredomains.com

+# http://mirror2.malwaredomains.com

+default_mirrors_list = [

Sebastian Noack 2016/03/14 13:36:07 I'd rather wait until sitescripts.ini got updated

Vasily Kuznetsov 2016/03/14 14:18:11 I'm cool with this, but I'm not very sure how to u

Sebastian Noack 2016/03/14 15:40:12 Matze, Felix or Wladimir can change the config man

+ 'http://mirror3.malwaredomains.com',

+ 'http://mirror1.malwaredomains.com',

+ 'http://mirror2.malwaredomains.com'

+malwaredomains_path = '/files/justdomains.zip'

Sebastian Noack 2016/03/14 13:36:07 Nit: Please use upper case for constant-like varia

Vasily Kuznetsov 2016/03/14 14:18:11 Acknowledged.

Vasily Kuznetsov 2016/03/14 14:47:58 Done.

+def try_mirror(mirror, path):

+ try:

+ response = urllib2.urlopen(mirror + path)

+ return response.read()

+ except urllib2.HTTPError, err:

+ print >>sys.stderr, '{}: {}'.format(mirror, err)

Sebastian Noack 2016/03/14 13:36:07 Any output we generate will result into an email s

Vasily Kuznetsov 2016/03/14 14:18:11 Acknowledged.

Vasily Kuznetsov 2016/03/14 14:47:58 Done.

+ return None

if __name__ == '__main__':

- repository = get_config().get('subscriptionDownloads', 'malwaredomains_repository')

+ config = get_config()

+ repository = config.get('subscriptionDownloads', 'malwaredomains_repository')

+ try:

+ mirrors = config.get('subscriptionDownloads', 'malwaredomains_mirrors')

+ mirrors_list = filter(None, [mirror.strip() for mirror in mirrors.split()])

+ except ConfigParser.NoOptionError:

+ mirrors_list = default_mirrors_list

tempdir = tempfile.mkdtemp(prefix='malwaredomains')

try:

subprocess.check_call(['hg', '-q', 'clone', '-U', repository, tempdir])

subprocess.check_call(['hg', '-q', 'up', '-R', tempdir, '-r', 'default'])

path = os.path.join(tempdir, 'malwaredomains_full.txt')

file = codecs.open(path, 'wb', encoding='utf-8')

- print >>file, '''[Adblock Plus 1.1]

-! This is a list of malware domains generated from malwaredomains.com data.

-! Homepage: http://malwaredomains.com/?page_id=2

-! Last modified: %timestamp%

-! Expires: 1d

-!'''

+ print >>file, filterlist_header

- data = urllib.urlopen('http://mirror3.malwaredomains.com/files/justdomains.zip').read()

+ for mirror in mirrors_list:

+ data = try_mirror(mirror, malwaredomains_path)

+ if data is not None:

+ break

+ else:

+ print >>sys.stderr, 'Unable to fetch malware domains list.'

+ sys.exit(1)

zip = zipfile.ZipFile(StringIO(data), 'r')

info = zip.infolist()[0]

for line in str(zip.read(info.filename)).splitlines():

domain = line.strip()

if not domain:

continue

print >>file, '||%s^' % domain.decode('idna')

« no previous file with comments | « no previous file | no next file » | no next file with comments »