Index: sitescripts/subscriptions/combineSubscriptions.py |
=================================================================== |
--- a/sitescripts/subscriptions/combineSubscriptions.py |
+++ b/sitescripts/subscriptions/combineSubscriptions.py |
@@ -11,77 +11,91 @@ |
# Adblock Plus is distributed in the hope that it will be useful, |
# but WITHOUT ANY WARRANTY; without even the implied warranty of |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
# GNU General Public License for more details. |
# |
# You should have received a copy of the GNU General Public License |
# along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. |
-import sys, os, re, subprocess, urllib2, time, traceback, codecs, hashlib, base64 |
+import sys, os, re, subprocess, urllib2, time, traceback, codecs, hashlib, base64, multiprocessing, functools |
from getopt import getopt, GetoptError |
acceptedExtensions = { |
'.txt': True, |
} |
ignore = { |
'Apache.txt': True, |
'CC-BY-SA.txt': True, |
'GPL.txt': True, |
'MPL.txt': True, |
} |
verbatim = { |
'COPYING': True, |
} |
+def getFiles(sourceDirs): |
+ for sourceName, sourceDir in sourceDirs.iteritems(): |
+ for file in os.listdir(sourceDir): |
+ if file in ignore or file[0] == '.' or not os.path.isfile(os.path.join(sourceDir, file)): |
+ continue |
+ |
+ if file in verbatim: |
+ yield (sourceName, sourceDir, file, 'verbatim') |
+ elif not os.path.splitext(file)[1] in acceptedExtensions: |
+ continue |
+ else: |
+ yield (sourceName, sourceDir, file, 'subscription') |
+ |
def combineSubscriptions(sourceDirs, targetDir, timeout=30): |
global acceptedExtensions, ignore, verbatim |
if isinstance(sourceDirs, basestring): |
sourceDirs = {'': sourceDirs} |
if not os.path.exists(targetDir): |
os.makedirs(targetDir, 0755) |
known = {} |
- for sourceName, sourceDir in sourceDirs.iteritems(): |
- for file in os.listdir(sourceDir): |
- if file in ignore or file[0] == '.' or not os.path.isfile(os.path.join(sourceDir, file)): |
- continue |
- if file in verbatim: |
- processVerbatimFile(sourceDir, targetDir, file) |
- elif not os.path.splitext(file)[1] in acceptedExtensions: |
- continue |
- else: |
- try: |
- processSubscriptionFile(sourceName, sourceDirs, targetDir, file, timeout) |
- except: |
- print >>sys.stderr, 'Error processing subscription file "%s"' % file |
- traceback.print_exc() |
- print >>sys.stderr |
- known[os.path.splitext(file)[0] + '.tpl'] = True |
- known[os.path.splitext(file)[0] + '.tpl.gz'] = True |
- known[file] = True |
- known[file + '.gz'] = True |
+ pool = multiprocessing.Pool() |
+ processor = functools.partial(processFile, sourceDirs, targetDir, timeout) |
+ for file, type in pool.imap(processor, getFiles(sourceDirs)): |
+ known[file] = True |
+ known[file + '.gz'] = True |
+ if type == "subscription": |
+ known[os.path.splitext(file)[0] + '.tpl'] = True |
+ known[os.path.splitext(file)[0] + '.tpl.gz'] = True |
for file in os.listdir(targetDir): |
if file[0] == '.': |
continue |
if not file in known: |
os.remove(os.path.join(targetDir, file)) |
def saveFile(filePath, data): |
handle = codecs.open(filePath, 'wb', encoding='utf-8') |
handle.write(data) |
handle.close() |
try: |
subprocess.check_output(['7za', 'a', '-tgzip', '-mx=9', '-bd', '-mpass=15', filePath + '.gz', filePath]) |
except: |
print >>sys.stderr, 'Failed to compress file %s. Please ensure that p7zip is installed on the system.' % filePath |
+def processFile(sourceDirs, targetDir, timeout, (sourceName, sourceDir, file, type)): |
+ if type == "verbatim": |
+ processVerbatimFile(sourceDir, targetDir, file) |
+ else: |
+ try: |
+ processSubscriptionFile(sourceName, sourceDirs, targetDir, file, timeout) |
+ except: |
+ print >>sys.stderr, 'Error processing subscription file "%s"' % file |
+ traceback.print_exc() |
+ print >>sys.stderr |
+ return (file, type) |
+ |
def processVerbatimFile(sourceDir, targetDir, file): |
handle = codecs.open(os.path.join(sourceDir, file), 'rb', encoding='utf-8') |
saveFile(os.path.join(targetDir, file), handle.read()) |
handle.close() |
def processSubscriptionFile(sourceName, sourceDirs, targetDir, file, timeout): |
sourceDir = sourceDirs[sourceName] |
filePath = os.path.join(sourceDir, file) |