Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Unified Diff: sitescripts/subscriptions/combineSubscriptions.py

Issue 11126024: Speed up filter subscription processing by using a thread pool (Closed)
Patch Set: Created July 18, 2013, 11:22 a.m.
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: sitescripts/subscriptions/combineSubscriptions.py
===================================================================
--- a/sitescripts/subscriptions/combineSubscriptions.py
+++ b/sitescripts/subscriptions/combineSubscriptions.py
@@ -11,77 +11,91 @@
# Adblock Plus is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.
-import sys, os, re, subprocess, urllib2, time, traceback, codecs, hashlib, base64
+import sys, os, re, subprocess, urllib2, time, traceback, codecs, hashlib, base64, multiprocessing, functools
from getopt import getopt, GetoptError
acceptedExtensions = {
'.txt': True,
}
ignore = {
'Apache.txt': True,
'CC-BY-SA.txt': True,
'GPL.txt': True,
'MPL.txt': True,
}
verbatim = {
'COPYING': True,
}
+def getFiles(sourceDirs):
+ for sourceName, sourceDir in sourceDirs.iteritems():
+ for file in os.listdir(sourceDir):
+ if file in ignore or file[0] == '.' or not os.path.isfile(os.path.join(sourceDir, file)):
+ continue
+
+ if file in verbatim:
+ yield (sourceName, sourceDir, file, 'verbatim')
+ elif not os.path.splitext(file)[1] in acceptedExtensions:
+ continue
+ else:
+ yield (sourceName, sourceDir, file, 'subscription')
+
def combineSubscriptions(sourceDirs, targetDir, timeout=30):
global acceptedExtensions, ignore, verbatim
if isinstance(sourceDirs, basestring):
sourceDirs = {'': sourceDirs}
if not os.path.exists(targetDir):
os.makedirs(targetDir, 0755)
known = {}
- for sourceName, sourceDir in sourceDirs.iteritems():
- for file in os.listdir(sourceDir):
- if file in ignore or file[0] == '.' or not os.path.isfile(os.path.join(sourceDir, file)):
- continue
- if file in verbatim:
- processVerbatimFile(sourceDir, targetDir, file)
- elif not os.path.splitext(file)[1] in acceptedExtensions:
- continue
- else:
- try:
- processSubscriptionFile(sourceName, sourceDirs, targetDir, file, timeout)
- except:
- print >>sys.stderr, 'Error processing subscription file "%s"' % file
- traceback.print_exc()
- print >>sys.stderr
- known[os.path.splitext(file)[0] + '.tpl'] = True
- known[os.path.splitext(file)[0] + '.tpl.gz'] = True
- known[file] = True
- known[file + '.gz'] = True
+ pool = multiprocessing.Pool()
+ processor = functools.partial(processFile, sourceDirs, targetDir, timeout)
+ for file, type in pool.imap(processor, getFiles(sourceDirs)):
+ known[file] = True
+ known[file + '.gz'] = True
+ if type == "subscription":
+ known[os.path.splitext(file)[0] + '.tpl'] = True
+ known[os.path.splitext(file)[0] + '.tpl.gz'] = True
for file in os.listdir(targetDir):
if file[0] == '.':
continue
if not file in known:
os.remove(os.path.join(targetDir, file))
def saveFile(filePath, data):
handle = codecs.open(filePath, 'wb', encoding='utf-8')
handle.write(data)
handle.close()
try:
subprocess.check_output(['7za', 'a', '-tgzip', '-mx=9', '-bd', '-mpass=15', filePath + '.gz', filePath])
except:
print >>sys.stderr, 'Failed to compress file %s. Please ensure that p7zip is installed on the system.' % filePath
+def processFile(sourceDirs, targetDir, timeout, (sourceName, sourceDir, file, type)):
+ if type == "verbatim":
+ processVerbatimFile(sourceDir, targetDir, file)
+ else:
+ try:
+ processSubscriptionFile(sourceName, sourceDirs, targetDir, file, timeout)
+ except:
+ print >>sys.stderr, 'Error processing subscription file "%s"' % file
+ traceback.print_exc()
+ print >>sys.stderr
+ return (file, type)
+
def processVerbatimFile(sourceDir, targetDir, file):
handle = codecs.open(os.path.join(sourceDir, file), 'rb', encoding='utf-8')
saveFile(os.path.join(targetDir, file), handle.read())
handle.close()
def processSubscriptionFile(sourceName, sourceDirs, targetDir, file, timeout):
sourceDir = sourceDirs[sourceName]
filePath = os.path.join(sourceDir, file)
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld