| Index: sitescripts/extensions/bin/updateExternalFilterlists.py |
| =================================================================== |
| new file mode 100644 |
| --- /dev/null |
| +++ b/sitescripts/extensions/bin/updateExternalFilterlists.py |
| @@ -0,0 +1,100 @@ |
| +# coding: utf-8 |
|
Wladimir Palant
2013/06/24 12:35:10
Probably better to have that script under manageme
Thomas Greiner
2013/06/24 14:24:45
Done.
|
| + |
| +# This file is part of the Adblock Plus web scripts, |
| +# Copyright (C) 2006-2013 Eyeo GmbH |
| +# |
| +# Adblock Plus is free software: you can redistribute it and/or modify |
| +# it under the terms of the GNU General Public License version 3 as |
| +# published by the Free Software Foundation. |
| +# |
| +# Adblock Plus is distributed in the hope that it will be useful, |
| +# but WITHOUT ANY WARRANTY; without even the implied warranty of |
| +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| +# GNU General Public License for more details. |
| +# |
| +# You should have received a copy of the GNU General Public License |
| +# along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. |
| + |
| +import os, subprocess, codecs, urllib, re |
| +from sitescripts.utils import get_config, setupStderr |
| +from tempfile import mkdtemp |
| +from shutil import rmtree |
| + |
| +def resolveRepositoryPath(repositories, path): |
| + repoName = path |
| + if path.find(':') >= 0: |
| + repoName, target = path.split(':', 1) |
| + if not repoName in repositories: |
| + raise Exception('Cannot resolve path to repository "%s", this repository is unknown' % repoName) |
| + |
| + repoPath = repositories[repoName] |
| + relTargetDir = '' |
| + if not repoName == path: |
| + targetDir = os.path.join(repoPath, target) |
| + relTargetDir = os.path.relpath(targetDir, repoPath) |
| + |
| + return repoPath, relTargetDir |
| + |
| + |
| +def updateExternalFilterlists(repositories): |
| + settings = readSettings() |
| + for name, setting in settings.iteritems(): |
| + tempdir = mkdtemp(prefix='adblockplus') |
|
Wladimir Palant
2013/06/24 12:35:10
A more generic specific prefix might be better her
Thomas Greiner
2013/06/24 14:24:45
Done.
|
| + repoPath, targetDir = resolveRepositoryPath(repositories, setting['target']) |
| + try: |
|
Wladimir Palant
2013/06/24 12:35:10
This try statement should immediately follow mkdte
Thomas Greiner
2013/06/24 14:24:45
Done.
|
| + subprocess.Popen(['hg', 'clone', '-U', repoPath, tempdir], stdout=subprocess.PIPE).communicate() |
| + subprocess.Popen(['hg', 'up', '-R', tempdir, '-r', 'default'], stdout=subprocess.PIPE).communicate() |
| + |
| + path = os.path.join(tempdir, targetDir) |
| + if not os.path.exists(path): |
| + os.makedirs(path) |
| + |
| + filename = name + '.txt' |
|
Wladimir Palant
2013/06/24 12:35:10
The settings should really specify the full file n
Thomas Greiner
2013/06/24 14:24:45
Done.
|
| + path = os.path.join(path, filename) |
| + exists = os.path.exists(path) |
| + file = codecs.open(path, 'wb', encoding='utf-8') |
| + data = urllib.urlopen(setting['source']).read() |
| + for line in str(data).splitlines(): |
|
Wladimir Palant
2013/06/24 12:35:10
Why split lines and write/decode each line separat
Thomas Greiner
2013/06/24 14:24:45
Done.
|
| + if not line: |
| + continue |
| + print >>file, line.strip().decode('iso-8859-1') |
|
Wladimir Palant
2013/06/24 12:35:10
The source file encoding should always be UTF-8, d
Thomas Greiner
2013/06/24 14:24:45
Done.
|
| + file.close(); |
|
Wladimir Palant
2013/06/24 12:35:10
No semicolon please.
Thomas Greiner
2013/06/24 14:24:45
Done. :)
|
| + |
| + message = 'Updated copy of external filterlist %s' |
| + if not exists: |
| + subprocess.Popen(['hg', 'add', '-R', tempdir], stdout=subprocess.PIPE).communicate() |
|
Wladimir Palant
2013/06/24 12:35:10
No need to add the file explicitly, use hg commit
Thomas Greiner
2013/06/24 14:24:45
Done.
|
| + message = 'Added copy of external filterlist %s' |
| + subprocess.Popen(['hg', 'commit', '-R', tempdir, '-u', 'hgbot', '-m', message % filename], stdout=subprocess.PIPE).communicate() |
| + subprocess.Popen(['hg', 'push', '-R', tempdir], stdout=subprocess.PIPE).communicate() |
|
Wladimir Palant
2013/06/24 12:35:10
Please use -q command line parameter for all Mercu
Thomas Greiner
2013/06/24 14:24:45
Done.
|
| + finally: |
| + rmtree(tempdir) |
| + |
| +def readSettings(): |
| + result = {} |
| + for option in get_config().options('externalFilterlists'): |
|
Wladimir Palant
2013/06/24 12:35:10
Better:
for option, value in get_config().items('
Thomas Greiner
2013/06/24 14:24:45
Done.
|
| + if option.find('_') < 0: |
| + continue |
| + name, setting = option.rsplit('_', 2) |
| + if not setting in ('source', 'target'): |
| + continue |
| + |
| + if not name in result: |
| + result[name] = { |
| + 'source': None, |
| + 'target': None |
| + } |
| + if isinstance(result[name][setting], list): |
|
Wladimir Palant
2013/06/24 12:35:10
What is that check for? I think we want exactly on
Thomas Greiner
2013/06/24 14:24:45
Done.
|
| + result[name][setting] = get_config().get('externalFilterlists', option).split(' ') |
| + else: |
| + result[name][setting] = get_config().get('externalFilterlists', option) |
| + return result |
| + |
| +if __name__ == '__main__': |
| + setupStderr() |
| + |
| + repositories = {} |
| + for option, value in get_config().items('subscriptionDownloads'): |
| + if option.endswith('_repository'): |
| + repositories[re.sub(r'_repository$', '', option)] = value |
|
Wladimir Palant
2013/06/24 12:35:10
This script shouldn't really combine settings from
Thomas Greiner
2013/06/24 14:24:45
Done.
|
| + |
| + updateExternalFilterlists(repositories) |