Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Delta Between Two Patch Sets: sitescripts/subscriptions/bin/updateMalwareDomainsList.py

Issue 29821558: Issue #6707 - Make the generated malware domain filter list encode domains as Punycode (Closed)
Left Patch Set: updated tests Created July 3, 2018, 4:39 p.m.
Right Patch Set: Removed uncessary codecs call from updateMalwareDomains.py Created July 9, 2018, 1:08 p.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
Left: Side by side diff | Download
Right: Side by side diff | Download
« no previous file with change/comment | « .gitignore ('k') | sitescripts/subscriptions/test/test_updateMalwareDomainsList.py » ('j') | no next file with change/comment »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
LEFTRIGHT
1 # This file is part of the Adblock Plus web scripts, 1 # This file is part of the Adblock Plus web scripts,
2 # Copyright (C) 2006-present eyeo GmbH 2 # Copyright (C) 2006-present eyeo GmbH
3 # 3 #
4 # Adblock Plus is free software: you can redistribute it and/or modify 4 # Adblock Plus is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU General Public License version 3 as 5 # it under the terms of the GNU General Public License version 3 as
6 # published by the Free Software Foundation. 6 # published by the Free Software Foundation.
7 # 7 #
8 # Adblock Plus is distributed in the hope that it will be useful, 8 # Adblock Plus is distributed in the hope that it will be useful,
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of 9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # GNU General Public License for more details. 11 # GNU General Public License for more details.
12 # 12 #
13 # You should have received a copy of the GNU General Public License 13 # You should have received a copy of the GNU General Public License
14 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. 14 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.
15 15
16 import os 16 import os
17 import subprocess 17 import subprocess
18 import codecs
19 import contextlib 18 import contextlib
20 import urllib2 19 import urllib2
21 import zipfile 20 import zipfile
22 import tempfile 21 import tempfile
23 import shutil 22 import shutil
24 import sys 23 import sys
25 from StringIO import StringIO 24 from StringIO import StringIO
26 from sitescripts.utils import get_config 25 from sitescripts.utils import get_config
27 26
28 27
(...skipping 24 matching lines...) Expand all
53 section = 'subscriptionDownloads' 52 section = 'subscriptionDownloads'
54 repository = config.get(section, 'malwaredomains_repository') 53 repository = config.get(section, 'malwaredomains_repository')
55 mirrors = config.get(section, 'malwaredomains_mirrors').split() 54 mirrors = config.get(section, 'malwaredomains_mirrors').split()
56 55
57 tempdir = tempfile.mkdtemp(prefix='malwaredomains') 56 tempdir = tempfile.mkdtemp(prefix='malwaredomains')
58 try: 57 try:
59 subprocess.check_call(['hg', '-q', 'clone', '-U', repository, tempdir]) 58 subprocess.check_call(['hg', '-q', 'clone', '-U', repository, tempdir])
60 subprocess.check_call(['hg', '-q', 'up', '-R', tempdir, '-r', 'default'] ) 59 subprocess.check_call(['hg', '-q', 'up', '-R', tempdir, '-r', 'default'] )
61 60
62 path = os.path.join(tempdir, 'malwaredomains_full.txt') 61 path = os.path.join(tempdir, 'malwaredomains_full.txt')
63 file = codecs.open(path, 'wb') 62 file = open(path, 'wb')
64 63
65 print >>file, FILTERLIST_HEADER 64 print >>file, FILTERLIST_HEADER
66 65
67 error_report = ['Unable to fetch malware domains list', 'Errors:'] 66 error_report = ['Unable to fetch malware domains list', 'Errors:']
68 for mirror in mirrors: 67 for mirror in mirrors:
69 error_message, data = try_mirror(mirror) 68 error_message, data = try_mirror(mirror)
70 if data is not None: 69 if data is not None:
71 break 70 break
72 error_report.append(error_message) 71 error_report.append(error_message)
73 else: 72 else:
74 sys.exit('\n'.join(error_report)) 73 sys.exit('\n'.join(error_report))
75 74
76 zf = zipfile.ZipFile(StringIO(data), 'r') 75 zf = zipfile.ZipFile(StringIO(data), 'r')
77 info = zf.infolist()[0] 76 info = zf.infolist()[0]
78 for line in str(zf.read(info.filename)).splitlines(): 77 for line in str(zf.read(info.filename)).splitlines():
79 domain = line.strip() 78 domain = line.strip()
80 if not domain: 79 if not domain:
81 continue 80 continue
82 81
83 print >>file, '||%s^' % domain.decode('idna').encode('punycode') 82 print >>file, '||%s^' % domain
Sebastian Noack 2018/07/05 14:55:28 This seems incorrect. Let's take "xn--fuson-1sa.or
Tudor Avram 2018/07/06 11:53:16 Done.
84 file.close() 83 file.close()
85 84
86 if subprocess.check_output(['hg', 'stat', '-R', tempdir]) != '': 85 if subprocess.check_output(['hg', 'stat', '-R', tempdir]) != '':
87 subprocess.check_call(['hg', '-q', 'commit', '-R', tempdir, '-A', '- u', 'hgbot', '-m', 'Updated malwaredomains.com data']) 86 subprocess.check_call(['hg', '-q', 'commit', '-R', tempdir, '-A',
87 '-u', 'hgbot', '-m',
88 'Updated malwaredomains.com data'])
88 subprocess.check_call(['hg', '-q', 'push', '-R', tempdir]) 89 subprocess.check_call(['hg', '-q', 'push', '-R', tempdir])
89 finally: 90 finally:
90 shutil.rmtree(tempdir, ignore_errors=True) 91 shutil.rmtree(tempdir, ignore_errors=True)
91 92
92 93
93 if __name__ == '__main__': 94 if __name__ == '__main__':
94 main() 95 main()
LEFTRIGHT

Powered by Google App Engine
This is Rietveld