OLD | NEW |
1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
2 # coding: utf-8 | 2 # coding: utf-8 |
3 | 3 |
4 # This file is part of the Adblock Plus web scripts, | 4 # This file is part of the Adblock Plus web scripts, |
5 # Copyright (C) 2006-2012 Eyeo GmbH | 5 # Copyright (C) 2006-2012 Eyeo GmbH |
6 # | 6 # |
7 # Adblock Plus is free software: you can redistribute it and/or modify | 7 # Adblock Plus is free software: you can redistribute it and/or modify |
8 # it under the terms of the GNU General Public License version 3 as | 8 # it under the terms of the GNU General Public License version 3 as |
9 # published by the Free Software Foundation. | 9 # published by the Free Software Foundation. |
10 # | 10 # |
(...skipping 139 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
150 try: | 150 try: |
151 request = urllib2.urlopen(file, None, timeout) | 151 request = urllib2.urlopen(file, None, timeout) |
152 error = None | 152 error = None |
153 break | 153 break |
154 except urllib2.URLError, e: | 154 except urllib2.URLError, e: |
155 error = e | 155 error = e |
156 time.sleep(5) | 156 time.sleep(5) |
157 if error: | 157 if error: |
158 raise error | 158 raise error |
159 | 159 |
160 charset = 'utf-8' | 160 # We should really get the charset from the headers rather than assuming |
161 contentType = request.headers.get('content-type', '') | 161 # that it is UTF-8. However, some of the Google Code mirrors are |
162 if contentType.find('charset=') >= 0: | 162 # misconfigured and will return ISO-8859-1 as charset instead of UTF-8. |
163 charset = contentType.split('charset=', 1)[1] | 163 newLines = unicode(request.read(), 'utf-8').split('\n') |
164 newLines = unicode(request.read(), charset).split('\n') | |
165 newLines = map(lambda l: re.sub(r'[\r\n]', '', l), newLines) | 164 newLines = map(lambda l: re.sub(r'[\r\n]', '', l), newLines) |
166 newLines = filter(lambda l: not re.search(r'^\s*!.*?\bExpires\s*(?::|aft
er)\s*(\d+)\s*(h)?', l, re.M | re.I), newLines) | 165 newLines = filter(lambda l: not re.search(r'^\s*!.*?\bExpires\s*(?::|aft
er)\s*(\d+)\s*(h)?', l, re.M | re.I), newLines) |
167 newLines = filter(lambda l: not re.search(r'^\s*!\s*(Redirect|Homepage|T
itle)\s*:', l, re.M | re.I), newLines) | 166 newLines = filter(lambda l: not re.search(r'^\s*!\s*(Redirect|Homepage|T
itle)\s*:', l, re.M | re.I), newLines) |
168 else: | 167 else: |
169 result.append('! *** %s ***' % file) | 168 result.append('! *** %s ***' % file) |
170 | 169 |
171 includeSource = sourceName | 170 includeSource = sourceName |
172 if file.find(':') >= 0: | 171 if file.find(':') >= 0: |
173 includeSource, file = file.split(':', 1) | 172 includeSource, file = file.split(':', 1) |
174 if not includeSource in sourceDirs: | 173 if not includeSource in sourceDirs: |
(...skipping 151 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
326 usage() | 325 usage() |
327 sys.exit() | 326 sys.exit() |
328 elif option in ('-t', '--timeout'): | 327 elif option in ('-t', '--timeout'): |
329 timeout = int(value) | 328 timeout = int(value) |
330 | 329 |
331 if os.path.exists(os.path.join(sourceDir, '.hg')): | 330 if os.path.exists(os.path.join(sourceDir, '.hg')): |
332 # Our source is a Mercurial repository, try updating | 331 # Our source is a Mercurial repository, try updating |
333 subprocess.Popen(['hg', '-R', sourceDir, 'pull', '--update']).communicate() | 332 subprocess.Popen(['hg', '-R', sourceDir, 'pull', '--update']).communicate() |
334 | 333 |
335 combineSubscriptions(sourceDir, targetDir, timeout) | 334 combineSubscriptions(sourceDir, targetDir, timeout) |
OLD | NEW |