| Index: sitescripts/subscriptions/bin/updateMalwareDomainsList.py |
| =================================================================== |
| --- a/sitescripts/subscriptions/bin/updateMalwareDomainsList.py |
| +++ b/sitescripts/subscriptions/bin/updateMalwareDomainsList.py |
| @@ -10,38 +10,80 @@ |
| # Adblock Plus is distributed in the hope that it will be useful, |
| # but WITHOUT ANY WARRANTY; without even the implied warranty of |
| # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| # GNU General Public License for more details. |
| # |
| # You should have received a copy of the GNU General Public License |
| # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. |
| -import os, subprocess, codecs, urllib, zipfile, tempfile, shutil |
| +import os, subprocess, codecs, urllib2, zipfile, tempfile, shutil, sys |
| +import ConfigParser |
| from StringIO import StringIO |
| from sitescripts.utils import get_config |
| + |
| +filterlist_header = '''[Adblock Plus 1.1] |
| +! This is a list of malware domains generated from malwaredomains.com data. |
| +! Homepage: http://malwaredomains.com/?page_id=2 |
| +! Last modified: %timestamp% |
| +! Expires: 1d |
| +!''' |
| + |
| +# Default value for malwaredomains_mirrors in the config. |
| +# Equivalent to the following in the config: |
| +# malwaredomains_mirrors= |
|
Sebastian Noack
2016/03/14 13:36:07
Example configuration should be documented in .sit
Vasily Kuznetsov
2016/03/14 14:18:12
Acknowledged.
Vasily Kuznetsov
2016/03/14 14:47:58
Done.
|
| +# http://mirror3.malwaredomains.com |
| +# http://mirror1.malwaredomains.com |
| +# http://mirror2.malwaredomains.com |
| +default_mirrors_list = [ |
|
Sebastian Noack
2016/03/14 13:36:07
I'd rather wait until sitescripts.ini got updated
Vasily Kuznetsov
2016/03/14 14:18:11
I'm cool with this, but I'm not very sure how to u
Sebastian Noack
2016/03/14 15:40:12
Matze, Felix or Wladimir can change the config man
|
| + 'http://mirror3.malwaredomains.com', |
| + 'http://mirror1.malwaredomains.com', |
| + 'http://mirror2.malwaredomains.com' |
| +] |
| + |
| +malwaredomains_path = '/files/justdomains.zip' |
|
Sebastian Noack
2016/03/14 13:36:07
Nit: Please use upper case for constant-like varia
Vasily Kuznetsov
2016/03/14 14:18:11
Acknowledged.
Vasily Kuznetsov
2016/03/14 14:47:58
Done.
|
| + |
| + |
| +def try_mirror(mirror, path): |
| + try: |
| + response = urllib2.urlopen(mirror + path) |
| + return response.read() |
| + except urllib2.HTTPError, err: |
| + print >>sys.stderr, '{}: {}'.format(mirror, err) |
|
Sebastian Noack
2016/03/14 13:36:07
Any output we generate will result into an email s
Vasily Kuznetsov
2016/03/14 14:18:11
Acknowledged.
Vasily Kuznetsov
2016/03/14 14:47:58
Done.
|
| + return None |
| + |
| + |
| if __name__ == '__main__': |
| - repository = get_config().get('subscriptionDownloads', 'malwaredomains_repository') |
| + config = get_config() |
| + repository = config.get('subscriptionDownloads', 'malwaredomains_repository') |
| + try: |
| + mirrors = config.get('subscriptionDownloads', 'malwaredomains_mirrors') |
| + mirrors_list = filter(None, [mirror.strip() for mirror in mirrors.split()]) |
| + except ConfigParser.NoOptionError: |
| + mirrors_list = default_mirrors_list |
| + |
| tempdir = tempfile.mkdtemp(prefix='malwaredomains') |
| try: |
| subprocess.check_call(['hg', '-q', 'clone', '-U', repository, tempdir]) |
| subprocess.check_call(['hg', '-q', 'up', '-R', tempdir, '-r', 'default']) |
| path = os.path.join(tempdir, 'malwaredomains_full.txt') |
| file = codecs.open(path, 'wb', encoding='utf-8') |
| - print >>file, '''[Adblock Plus 1.1] |
| -! This is a list of malware domains generated from malwaredomains.com data. |
| -! Homepage: http://malwaredomains.com/?page_id=2 |
| -! Last modified: %timestamp% |
| -! Expires: 1d |
| -!''' |
| + print >>file, filterlist_header |
| - data = urllib.urlopen('http://mirror3.malwaredomains.com/files/justdomains.zip').read() |
| + for mirror in mirrors_list: |
| + data = try_mirror(mirror, malwaredomains_path) |
| + if data is not None: |
| + break |
| + else: |
| + print >>sys.stderr, 'Unable to fetch malware domains list.' |
| + sys.exit(1) |
| + |
| zip = zipfile.ZipFile(StringIO(data), 'r') |
| info = zip.infolist()[0] |
| for line in str(zip.read(info.filename)).splitlines(): |
| domain = line.strip() |
| if not domain: |
| continue |
| print >>file, '||%s^' % domain.decode('idna') |