| Index: sitescripts/subscriptions/combineSubscriptions.py |
| =================================================================== |
| --- a/sitescripts/subscriptions/combineSubscriptions.py |
| +++ b/sitescripts/subscriptions/combineSubscriptions.py |
| @@ -11,77 +11,91 @@ |
| # Adblock Plus is distributed in the hope that it will be useful, |
| # but WITHOUT ANY WARRANTY; without even the implied warranty of |
| # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| # GNU General Public License for more details. |
| # |
| # You should have received a copy of the GNU General Public License |
| # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. |
| -import sys, os, re, subprocess, urllib2, time, traceback, codecs, hashlib, base64 |
| +import sys, os, re, subprocess, urllib2, time, traceback, codecs, hashlib, base64, multiprocessing, functools |
| from getopt import getopt, GetoptError |
| acceptedExtensions = { |
| '.txt': True, |
| } |
| ignore = { |
| 'Apache.txt': True, |
| 'CC-BY-SA.txt': True, |
| 'GPL.txt': True, |
| 'MPL.txt': True, |
| } |
| verbatim = { |
| 'COPYING': True, |
| } |
| +def getFiles(sourceDirs): |
| + for sourceName, sourceDir in sourceDirs.iteritems(): |
| + for file in os.listdir(sourceDir): |
| + if file in ignore or file[0] == '.' or not os.path.isfile(os.path.join(sourceDir, file)): |
| + continue |
| + |
| + if file in verbatim: |
| + yield (sourceName, sourceDir, file, 'verbatim') |
| + elif not os.path.splitext(file)[1] in acceptedExtensions: |
| + continue |
| + else: |
| + yield (sourceName, sourceDir, file, 'subscription') |
| + |
| def combineSubscriptions(sourceDirs, targetDir, timeout=30): |
| global acceptedExtensions, ignore, verbatim |
| if isinstance(sourceDirs, basestring): |
| sourceDirs = {'': sourceDirs} |
| if not os.path.exists(targetDir): |
| os.makedirs(targetDir, 0755) |
| known = {} |
| - for sourceName, sourceDir in sourceDirs.iteritems(): |
| - for file in os.listdir(sourceDir): |
| - if file in ignore or file[0] == '.' or not os.path.isfile(os.path.join(sourceDir, file)): |
| - continue |
| - if file in verbatim: |
| - processVerbatimFile(sourceDir, targetDir, file) |
| - elif not os.path.splitext(file)[1] in acceptedExtensions: |
| - continue |
| - else: |
| - try: |
| - processSubscriptionFile(sourceName, sourceDirs, targetDir, file, timeout) |
| - except: |
| - print >>sys.stderr, 'Error processing subscription file "%s"' % file |
| - traceback.print_exc() |
| - print >>sys.stderr |
| - known[os.path.splitext(file)[0] + '.tpl'] = True |
| - known[os.path.splitext(file)[0] + '.tpl.gz'] = True |
| - known[file] = True |
| - known[file + '.gz'] = True |
| + pool = multiprocessing.Pool() |
| + processor = functools.partial(processFile, sourceDirs, targetDir, timeout) |
| + for file, type in pool.imap(processor, getFiles(sourceDirs)): |
| + known[file] = True |
| + known[file + '.gz'] = True |
| + if type == "subscription": |
| + known[os.path.splitext(file)[0] + '.tpl'] = True |
| + known[os.path.splitext(file)[0] + '.tpl.gz'] = True |
| for file in os.listdir(targetDir): |
| if file[0] == '.': |
| continue |
| if not file in known: |
| os.remove(os.path.join(targetDir, file)) |
| def saveFile(filePath, data): |
| handle = codecs.open(filePath, 'wb', encoding='utf-8') |
| handle.write(data) |
| handle.close() |
| try: |
| subprocess.check_output(['7za', 'a', '-tgzip', '-mx=9', '-bd', '-mpass=15', filePath + '.gz', filePath]) |
| except: |
| print >>sys.stderr, 'Failed to compress file %s. Please ensure that p7zip is installed on the system.' % filePath |
| +def processFile(sourceDirs, targetDir, timeout, (sourceName, sourceDir, file, type)): |
| + if type == "verbatim": |
| + processVerbatimFile(sourceDir, targetDir, file) |
| + else: |
| + try: |
| + processSubscriptionFile(sourceName, sourceDirs, targetDir, file, timeout) |
| + except: |
| + print >>sys.stderr, 'Error processing subscription file "%s"' % file |
| + traceback.print_exc() |
| + print >>sys.stderr |
| + return (file, type) |
| + |
| def processVerbatimFile(sourceDir, targetDir, file): |
| handle = codecs.open(os.path.join(sourceDir, file), 'rb', encoding='utf-8') |
| saveFile(os.path.join(targetDir, file), handle.read()) |
| handle.close() |
| def processSubscriptionFile(sourceName, sourceDirs, targetDir, file, timeout): |
| sourceDir = sourceDirs[sourceName] |
| filePath = os.path.join(sourceDir, file) |