Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Unified Diff: sitescripts/extensions/bin/updateExternalFilterlists.py

Issue 11003016: Added script for copying external filterlists into existing repositories (Closed)
Patch Set: Created June 20, 2013, 12:06 p.m.
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View side-by-side diff with in-line comments
Download patch
« .sitescripts.example ('K') | « .sitescripts.example ('k') | no next file » | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: sitescripts/extensions/bin/updateExternalFilterlists.py
===================================================================
new file mode 100644
--- /dev/null
+++ b/sitescripts/extensions/bin/updateExternalFilterlists.py
@@ -0,0 +1,100 @@
+# coding: utf-8
Wladimir Palant 2013/06/24 12:35:10 Probably better to have that script under manageme
Thomas Greiner 2013/06/24 14:24:45 Done.
+
+# This file is part of the Adblock Plus web scripts,
+# Copyright (C) 2006-2013 Eyeo GmbH
+#
+# Adblock Plus is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 3 as
+# published by the Free Software Foundation.
+#
+# Adblock Plus is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.
+
+import os, subprocess, codecs, urllib, re
+from sitescripts.utils import get_config, setupStderr
+from tempfile import mkdtemp
+from shutil import rmtree
+
+def resolveRepositoryPath(repositories, path):
+ repoName = path
+ if path.find(':') >= 0:
+ repoName, target = path.split(':', 1)
+ if not repoName in repositories:
+ raise Exception('Cannot resolve path to repository "%s", this repository is unknown' % repoName)
+
+ repoPath = repositories[repoName]
+ relTargetDir = ''
+ if not repoName == path:
+ targetDir = os.path.join(repoPath, target)
+ relTargetDir = os.path.relpath(targetDir, repoPath)
+
+ return repoPath, relTargetDir
+
+
+def updateExternalFilterlists(repositories):
+ settings = readSettings()
+ for name, setting in settings.iteritems():
+ tempdir = mkdtemp(prefix='adblockplus')
Wladimir Palant 2013/06/24 12:35:10 A more generic specific prefix might be better her
Thomas Greiner 2013/06/24 14:24:45 Done.
+ repoPath, targetDir = resolveRepositoryPath(repositories, setting['target'])
+ try:
Wladimir Palant 2013/06/24 12:35:10 This try statement should immediately follow mkdte
Thomas Greiner 2013/06/24 14:24:45 Done.
+ subprocess.Popen(['hg', 'clone', '-U', repoPath, tempdir], stdout=subprocess.PIPE).communicate()
+ subprocess.Popen(['hg', 'up', '-R', tempdir, '-r', 'default'], stdout=subprocess.PIPE).communicate()
+
+ path = os.path.join(tempdir, targetDir)
+ if not os.path.exists(path):
+ os.makedirs(path)
+
+ filename = name + '.txt'
Wladimir Palant 2013/06/24 12:35:10 The settings should really specify the full file n
Thomas Greiner 2013/06/24 14:24:45 Done.
+ path = os.path.join(path, filename)
+ exists = os.path.exists(path)
+ file = codecs.open(path, 'wb', encoding='utf-8')
+ data = urllib.urlopen(setting['source']).read()
+ for line in str(data).splitlines():
Wladimir Palant 2013/06/24 12:35:10 Why split lines and write/decode each line separat
Thomas Greiner 2013/06/24 14:24:45 Done.
+ if not line:
+ continue
+ print >>file, line.strip().decode('iso-8859-1')
Wladimir Palant 2013/06/24 12:35:10 The source file encoding should always be UTF-8, d
Thomas Greiner 2013/06/24 14:24:45 Done.
+ file.close();
Wladimir Palant 2013/06/24 12:35:10 No semicolon please.
Thomas Greiner 2013/06/24 14:24:45 Done. :)
+
+ message = 'Updated copy of external filterlist %s'
+ if not exists:
+ subprocess.Popen(['hg', 'add', '-R', tempdir], stdout=subprocess.PIPE).communicate()
Wladimir Palant 2013/06/24 12:35:10 No need to add the file explicitly, use hg commit
Thomas Greiner 2013/06/24 14:24:45 Done.
+ message = 'Added copy of external filterlist %s'
+ subprocess.Popen(['hg', 'commit', '-R', tempdir, '-u', 'hgbot', '-m', message % filename], stdout=subprocess.PIPE).communicate()
+ subprocess.Popen(['hg', 'push', '-R', tempdir], stdout=subprocess.PIPE).communicate()
Wladimir Palant 2013/06/24 12:35:10 Please use -q command line parameter for all Mercu
Thomas Greiner 2013/06/24 14:24:45 Done.
+ finally:
+ rmtree(tempdir)
+
+def readSettings():
+ result = {}
+ for option in get_config().options('externalFilterlists'):
Wladimir Palant 2013/06/24 12:35:10 Better: for option, value in get_config().items('
Thomas Greiner 2013/06/24 14:24:45 Done.
+ if option.find('_') < 0:
+ continue
+ name, setting = option.rsplit('_', 2)
+ if not setting in ('source', 'target'):
+ continue
+
+ if not name in result:
+ result[name] = {
+ 'source': None,
+ 'target': None
+ }
+ if isinstance(result[name][setting], list):
Wladimir Palant 2013/06/24 12:35:10 What is that check for? I think we want exactly on
Thomas Greiner 2013/06/24 14:24:45 Done.
+ result[name][setting] = get_config().get('externalFilterlists', option).split(' ')
+ else:
+ result[name][setting] = get_config().get('externalFilterlists', option)
+ return result
+
+if __name__ == '__main__':
+ setupStderr()
+
+ repositories = {}
+ for option, value in get_config().items('subscriptionDownloads'):
+ if option.endswith('_repository'):
+ repositories[re.sub(r'_repository$', '', option)] = value
Wladimir Palant 2013/06/24 12:35:10 This script shouldn't really combine settings from
Thomas Greiner 2013/06/24 14:24:45 Done.
+
+ updateExternalFilterlists(repositories)
« .sitescripts.example ('K') | « .sitescripts.example ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld