sitescripts/subscriptions/bin/updateMalwareDomainsList.py - Issue 29821558: Issue #6707 - Make the generated malware domain filter list encode domains as Punycode

Side by Side Diff: sitescripts/subscriptions/bin/updateMalwareDomainsList.py

Issue 29821558: Issue #6707 - Make the generated malware domain filter list encode domains as Punycode (Closed)

Patch Set: Removed uncessary codecs call from updateMalwareDomains.py Created July 9, 2018, 1:08 p.m.

Left:
Right:

Use n/p to move between diff chunks; N/P to move between comments.

Jump to:

View unified diff | Download patch

OLD	NEW
1 # This file is part of the Adblock Plus web scripts,	1 # This file is part of the Adblock Plus web scripts,

2 # Copyright (C) 2006-present eyeo GmbH	2 # Copyright (C) 2006-present eyeo GmbH

3 #	3 #

4 # Adblock Plus is free software: you can redistribute it and/or modify	4 # Adblock Plus is free software: you can redistribute it and/or modify

5 # it under the terms of the GNU General Public License version 3 as	5 # it under the terms of the GNU General Public License version 3 as

6 # published by the Free Software Foundation.	6 # published by the Free Software Foundation.

7 #	7 #

8 # Adblock Plus is distributed in the hope that it will be useful,	8 # Adblock Plus is distributed in the hope that it will be useful,

9 # but WITHOUT ANY WARRANTY; without even the implied warranty of	9 # but WITHOUT ANY WARRANTY; without even the implied warranty of

10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the	10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

11 # GNU General Public License for more details.	11 # GNU General Public License for more details.

12 #	12 #

13 # You should have received a copy of the GNU General Public License	13 # You should have received a copy of the GNU General Public License

14 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.	14 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.

15	15

16 import os	16 import os

17 import subprocess	17 import subprocess

18 import codecs

19 import contextlib	18 import contextlib

20 import urllib2	19 import urllib2

21 import zipfile	20 import zipfile

22 import tempfile	21 import tempfile

23 import shutil	22 import shutil

24 import sys	23 import sys

25 from StringIO import StringIO	24 from StringIO import StringIO

26 from sitescripts.utils import get_config	25 from sitescripts.utils import get_config

27	26

28	27

(...skipping 24 matching lines...) Expand all Loading...
53 section = 'subscriptionDownloads'	52 section = 'subscriptionDownloads'

54 repository = config.get(section, 'malwaredomains_repository')	53 repository = config.get(section, 'malwaredomains_repository')

55 mirrors = config.get(section, 'malwaredomains_mirrors').split()	54 mirrors = config.get(section, 'malwaredomains_mirrors').split()

56	55

57 tempdir = tempfile.mkdtemp(prefix='malwaredomains')	56 tempdir = tempfile.mkdtemp(prefix='malwaredomains')

58 try:	57 try:

59 subprocess.check_call(['hg', '-q', 'clone', '-U', repository, tempdir])	58 subprocess.check_call(['hg', '-q', 'clone', '-U', repository, tempdir])

60 subprocess.check_call(['hg', '-q', 'up', '-R', tempdir, '-r', 'default'] )	59 subprocess.check_call(['hg', '-q', 'up', '-R', tempdir, '-r', 'default'] )

61	60

62 path = os.path.join(tempdir, 'malwaredomains_full.txt')	61 path = os.path.join(tempdir, 'malwaredomains_full.txt')

63 file = codecs.open(path, 'wb', encoding='utf-8')	62 file = open(path, 'wb')

64	63

65 print >>file, FILTERLIST_HEADER	64 print >>file, FILTERLIST_HEADER

66	65

67 error_report = ['Unable to fetch malware domains list', 'Errors:']	66 error_report = ['Unable to fetch malware domains list', 'Errors:']

68 for mirror in mirrors:	67 for mirror in mirrors:

69 error_message, data = try_mirror(mirror)	68 error_message, data = try_mirror(mirror)

70 if data is not None:	69 if data is not None:

71 break	70 break

72 error_report.append(error_message)	71 error_report.append(error_message)

73 else:	72 else:

74 sys.exit('\n'.join(error_report))	73 sys.exit('\n'.join(error_report))

75	74

76 zf = zipfile.ZipFile(StringIO(data), 'r')	75 zf = zipfile.ZipFile(StringIO(data), 'r')

77 info = zf.infolist()[0]	76 info = zf.infolist()[0]

78 for line in str(zf.read(info.filename)).splitlines():	77 for line in str(zf.read(info.filename)).splitlines():

79 domain = line.strip()	78 domain = line.strip()

80 if not domain:	79 if not domain:

81 continue	80 continue

82	81

83 print >>file, '\|\|%s^' % domain.decode('idna')	82 print >>file, '\|\|%s^' % domain

84 file.close()	83 file.close()

85	84

86 if subprocess.check_output(['hg', 'stat', '-R', tempdir]) != '':	85 if subprocess.check_output(['hg', 'stat', '-R', tempdir]) != '':

87 subprocess.check_call(['hg', '-q', 'commit', '-R', tempdir, '-A', '- u', 'hgbot', '-m', 'Updated malwaredomains.com data'])	86 subprocess.check_call(['hg', '-q', 'commit', '-R', tempdir, '-A',

	87 '-u', 'hgbot', '-m',

	88 'Updated malwaredomains.com data'])

88 subprocess.check_call(['hg', '-q', 'push', '-R', tempdir])	89 subprocess.check_call(['hg', '-q', 'push', '-R', tempdir])

89 finally:	90 finally:

90 shutil.rmtree(tempdir, ignore_errors=True)	91 shutil.rmtree(tempdir, ignore_errors=True)

91	92

92	93

93 if __name__ == '__main__':	94 if __name__ == '__main__':

94 main()	95 main()

OLD	NEW

« no previous file with comments | « .gitignore ('k') | sitescripts/subscriptions/test/test_updateMalwareDomainsList.py » ('j') | no next file with comments »