| OLD | NEW |
| 1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
| 2 # coding: utf-8 | 2 # coding: utf-8 |
| 3 | 3 |
| 4 # This file is part of the Adblock Plus web scripts, | 4 # This file is part of the Adblock Plus web scripts, |
| 5 # Copyright (C) 2006-2013 Eyeo GmbH | 5 # Copyright (C) 2006-2013 Eyeo GmbH |
| 6 # | 6 # |
| 7 # Adblock Plus is free software: you can redistribute it and/or modify | 7 # Adblock Plus is free software: you can redistribute it and/or modify |
| 8 # it under the terms of the GNU General Public License version 3 as | 8 # it under the terms of the GNU General Public License version 3 as |
| 9 # published by the Free Software Foundation. | 9 # published by the Free Software Foundation. |
| 10 # | 10 # |
| 11 # Adblock Plus is distributed in the hope that it will be useful, | 11 # Adblock Plus is distributed in the hope that it will be useful, |
| 12 # but WITHOUT ANY WARRANTY; without even the implied warranty of | 12 # but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | 13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 14 # GNU General Public License for more details. | 14 # GNU General Public License for more details. |
| 15 # | 15 # |
| 16 # You should have received a copy of the GNU General Public License | 16 # You should have received a copy of the GNU General Public License |
| 17 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. | 17 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. |
| 18 | 18 |
| 19 import sys, os, re, subprocess, urllib2, time, traceback, codecs, hashlib, base6
4 | 19 import sys, os, re, subprocess, urllib2, time, traceback, codecs, hashlib, base6
4, multiprocessing, functools |
| 20 from getopt import getopt, GetoptError | 20 from getopt import getopt, GetoptError |
| 21 | 21 |
| 22 acceptedExtensions = { | 22 acceptedExtensions = { |
| 23 '.txt': True, | 23 '.txt': True, |
| 24 } | 24 } |
| 25 ignore = { | 25 ignore = { |
| 26 'Apache.txt': True, | 26 'Apache.txt': True, |
| 27 'CC-BY-SA.txt': True, | 27 'CC-BY-SA.txt': True, |
| 28 'GPL.txt': True, | 28 'GPL.txt': True, |
| 29 'MPL.txt': True, | 29 'MPL.txt': True, |
| 30 } | 30 } |
| 31 verbatim = { | 31 verbatim = { |
| 32 'COPYING': True, | 32 'COPYING': True, |
| 33 } | 33 } |
| 34 | 34 |
| 35 def getFiles(sourceDirs): |
| 36 for sourceName, sourceDir in sourceDirs.iteritems(): |
| 37 for file in os.listdir(sourceDir): |
| 38 if file in ignore or file[0] == '.' or not os.path.isfile(os.path.join(sou
rceDir, file)): |
| 39 continue |
| 40 |
| 41 if file in verbatim: |
| 42 yield (sourceName, sourceDir, file, 'verbatim') |
| 43 elif not os.path.splitext(file)[1] in acceptedExtensions: |
| 44 continue |
| 45 else: |
| 46 yield (sourceName, sourceDir, file, 'subscription') |
| 47 |
| 35 def combineSubscriptions(sourceDirs, targetDir, timeout=30): | 48 def combineSubscriptions(sourceDirs, targetDir, timeout=30): |
| 36 global acceptedExtensions, ignore, verbatim | 49 global acceptedExtensions, ignore, verbatim |
| 37 | 50 |
| 38 if isinstance(sourceDirs, basestring): | 51 if isinstance(sourceDirs, basestring): |
| 39 sourceDirs = {'': sourceDirs} | 52 sourceDirs = {'': sourceDirs} |
| 40 | 53 |
| 41 if not os.path.exists(targetDir): | 54 if not os.path.exists(targetDir): |
| 42 os.makedirs(targetDir, 0755) | 55 os.makedirs(targetDir, 0755) |
| 43 | 56 |
| 44 known = {} | 57 known = {} |
| 45 for sourceName, sourceDir in sourceDirs.iteritems(): | 58 pool = multiprocessing.Pool() |
| 46 for file in os.listdir(sourceDir): | 59 processor = functools.partial(processFile, sourceDirs, targetDir, timeout) |
| 47 if file in ignore or file[0] == '.' or not os.path.isfile(os.path.join(sou
rceDir, file)): | 60 for file, type in pool.imap(processor, getFiles(sourceDirs)): |
| 48 continue | 61 known[file] = True |
| 49 if file in verbatim: | 62 known[file + '.gz'] = True |
| 50 processVerbatimFile(sourceDir, targetDir, file) | 63 if type == "subscription": |
| 51 elif not os.path.splitext(file)[1] in acceptedExtensions: | 64 known[os.path.splitext(file)[0] + '.tpl'] = True |
| 52 continue | 65 known[os.path.splitext(file)[0] + '.tpl.gz'] = True |
| 53 else: | |
| 54 try: | |
| 55 processSubscriptionFile(sourceName, sourceDirs, targetDir, file, timeo
ut) | |
| 56 except: | |
| 57 print >>sys.stderr, 'Error processing subscription file "%s"' % file | |
| 58 traceback.print_exc() | |
| 59 print >>sys.stderr | |
| 60 known[os.path.splitext(file)[0] + '.tpl'] = True | |
| 61 known[os.path.splitext(file)[0] + '.tpl.gz'] = True | |
| 62 known[file] = True | |
| 63 known[file + '.gz'] = True | |
| 64 | 66 |
| 65 for file in os.listdir(targetDir): | 67 for file in os.listdir(targetDir): |
| 66 if file[0] == '.': | 68 if file[0] == '.': |
| 67 continue | 69 continue |
| 68 if not file in known: | 70 if not file in known: |
| 69 os.remove(os.path.join(targetDir, file)) | 71 os.remove(os.path.join(targetDir, file)) |
| 70 | 72 |
| 71 def saveFile(filePath, data): | 73 def saveFile(filePath, data): |
| 72 handle = codecs.open(filePath, 'wb', encoding='utf-8') | 74 handle = codecs.open(filePath, 'wb', encoding='utf-8') |
| 73 handle.write(data) | 75 handle.write(data) |
| 74 handle.close() | 76 handle.close() |
| 75 try: | 77 try: |
| 76 subprocess.check_output(['7za', 'a', '-tgzip', '-mx=9', '-bd', '-mpass=15',
filePath + '.gz', filePath]) | 78 subprocess.check_output(['7za', 'a', '-tgzip', '-mx=9', '-bd', '-mpass=15',
filePath + '.gz', filePath]) |
| 77 except: | 79 except: |
| 78 print >>sys.stderr, 'Failed to compress file %s. Please ensure that p7zip is
installed on the system.' % filePath | 80 print >>sys.stderr, 'Failed to compress file %s. Please ensure that p7zip is
installed on the system.' % filePath |
| 79 | 81 |
| 82 def processFile(sourceDirs, targetDir, timeout, (sourceName, sourceDir, file, ty
pe)): |
| 83 if type == "verbatim": |
| 84 processVerbatimFile(sourceDir, targetDir, file) |
| 85 else: |
| 86 try: |
| 87 processSubscriptionFile(sourceName, sourceDirs, targetDir, file, timeout) |
| 88 except: |
| 89 print >>sys.stderr, 'Error processing subscription file "%s"' % file |
| 90 traceback.print_exc() |
| 91 print >>sys.stderr |
| 92 return (file, type) |
| 93 |
| 80 def processVerbatimFile(sourceDir, targetDir, file): | 94 def processVerbatimFile(sourceDir, targetDir, file): |
| 81 handle = codecs.open(os.path.join(sourceDir, file), 'rb', encoding='utf-8') | 95 handle = codecs.open(os.path.join(sourceDir, file), 'rb', encoding='utf-8') |
| 82 saveFile(os.path.join(targetDir, file), handle.read()) | 96 saveFile(os.path.join(targetDir, file), handle.read()) |
| 83 handle.close() | 97 handle.close() |
| 84 | 98 |
| 85 def processSubscriptionFile(sourceName, sourceDirs, targetDir, file, timeout): | 99 def processSubscriptionFile(sourceName, sourceDirs, targetDir, file, timeout): |
| 86 sourceDir = sourceDirs[sourceName] | 100 sourceDir = sourceDirs[sourceName] |
| 87 filePath = os.path.join(sourceDir, file) | 101 filePath = os.path.join(sourceDir, file) |
| 88 handle = codecs.open(filePath, 'rb', encoding='utf-8') | 102 handle = codecs.open(filePath, 'rb', encoding='utf-8') |
| 89 lines = map(lambda l: re.sub(r'[\r\n]', '', l), handle.readlines()) | 103 lines = map(lambda l: re.sub(r'[\r\n]', '', l), handle.readlines()) |
| (...skipping 223 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 313 usage() | 327 usage() |
| 314 sys.exit() | 328 sys.exit() |
| 315 elif option in ('-t', '--timeout'): | 329 elif option in ('-t', '--timeout'): |
| 316 timeout = int(value) | 330 timeout = int(value) |
| 317 | 331 |
| 318 if os.path.exists(os.path.join(sourceDir, '.hg')): | 332 if os.path.exists(os.path.join(sourceDir, '.hg')): |
| 319 # Our source is a Mercurial repository, try updating | 333 # Our source is a Mercurial repository, try updating |
| 320 subprocess.check_call(['hg', '-q', '-R', sourceDir, 'pull', '--update']) | 334 subprocess.check_call(['hg', '-q', '-R', sourceDir, 'pull', '--update']) |
| 321 | 335 |
| 322 combineSubscriptions(sourceDir, targetDir, timeout) | 336 combineSubscriptions(sourceDir, targetDir, timeout) |
| OLD | NEW |