Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Side by Side Diff: sitescripts/subscriptions/combineSubscriptions.py

Issue 9170210: Subscription downloads: Ignore stated charset for remote downloads, always assume UTF-8 (Closed)
Patch Set: Created Jan. 16, 2013, 3:49 p.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 #!/usr/bin/env python 1 #!/usr/bin/env python
2 # coding: utf-8 2 # coding: utf-8
3 3
4 # This file is part of the Adblock Plus web scripts, 4 # This file is part of the Adblock Plus web scripts,
5 # Copyright (C) 2006-2012 Eyeo GmbH 5 # Copyright (C) 2006-2012 Eyeo GmbH
6 # 6 #
7 # Adblock Plus is free software: you can redistribute it and/or modify 7 # Adblock Plus is free software: you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License version 3 as 8 # it under the terms of the GNU General Public License version 3 as
9 # published by the Free Software Foundation. 9 # published by the Free Software Foundation.
10 # 10 #
(...skipping 139 matching lines...) Expand 10 before | Expand all | Expand 10 after
150 try: 150 try:
151 request = urllib2.urlopen(file, None, timeout) 151 request = urllib2.urlopen(file, None, timeout)
152 error = None 152 error = None
153 break 153 break
154 except urllib2.URLError, e: 154 except urllib2.URLError, e:
155 error = e 155 error = e
156 time.sleep(5) 156 time.sleep(5)
157 if error: 157 if error:
158 raise error 158 raise error
159 159
160 charset = 'utf-8' 160 # We should really get the charset from the headers rather than assuming
161 contentType = request.headers.get('content-type', '') 161 # that it is UTF-8. However, some of the Google Code mirrors are
162 if contentType.find('charset=') >= 0: 162 # misconfigured and will return ISO-8859-1 as charset instead of UTF-8.
163 charset = contentType.split('charset=', 1)[1] 163 newLines = unicode(request.read(), 'utf-8').split('\n')
164 newLines = unicode(request.read(), charset).split('\n')
165 newLines = map(lambda l: re.sub(r'[\r\n]', '', l), newLines) 164 newLines = map(lambda l: re.sub(r'[\r\n]', '', l), newLines)
166 newLines = filter(lambda l: not re.search(r'^\s*!.*?\bExpires\s*(?::|aft er)\s*(\d+)\s*(h)?', l, re.M | re.I), newLines) 165 newLines = filter(lambda l: not re.search(r'^\s*!.*?\bExpires\s*(?::|aft er)\s*(\d+)\s*(h)?', l, re.M | re.I), newLines)
167 newLines = filter(lambda l: not re.search(r'^\s*!\s*(Redirect|Homepage|T itle)\s*:', l, re.M | re.I), newLines) 166 newLines = filter(lambda l: not re.search(r'^\s*!\s*(Redirect|Homepage|T itle)\s*:', l, re.M | re.I), newLines)
168 else: 167 else:
169 result.append('! *** %s ***' % file) 168 result.append('! *** %s ***' % file)
170 169
171 includeSource = sourceName 170 includeSource = sourceName
172 if file.find(':') >= 0: 171 if file.find(':') >= 0:
173 includeSource, file = file.split(':', 1) 172 includeSource, file = file.split(':', 1)
174 if not includeSource in sourceDirs: 173 if not includeSource in sourceDirs:
(...skipping 151 matching lines...) Expand 10 before | Expand all | Expand 10 after
326 usage() 325 usage()
327 sys.exit() 326 sys.exit()
328 elif option in ('-t', '--timeout'): 327 elif option in ('-t', '--timeout'):
329 timeout = int(value) 328 timeout = int(value)
330 329
331 if os.path.exists(os.path.join(sourceDir, '.hg')): 330 if os.path.exists(os.path.join(sourceDir, '.hg')):
332 # Our source is a Mercurial repository, try updating 331 # Our source is a Mercurial repository, try updating
333 subprocess.Popen(['hg', '-R', sourceDir, 'pull', '--update']).communicate() 332 subprocess.Popen(['hg', '-R', sourceDir, 'pull', '--update']).communicate()
334 333
335 combineSubscriptions(sourceDir, targetDir, timeout) 334 combineSubscriptions(sourceDir, targetDir, timeout)
OLDNEW
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld