sitescripts/subscriptions/combineSubscriptions.py - Issue 28037010: Improved generation of filter subscription files

Side by Side Diff: sitescripts/subscriptions/combineSubscriptions.py

Issue 28037010: Improved generation of filter subscription files (Closed)

Patch Set: Created Nov. 6, 2013, 2:27 p.m.

Left:
Right:

Use n/p to move between diff chunks; N/P to move between comments.

Jump to:

View unified diff | Download patch

OLD	NEW
1 #!/usr/bin/env python	1 #!/usr/bin/env python

2 # coding: utf-8	2 # coding: utf-8

3	3

4 # This file is part of the Adblock Plus web scripts,	4 # This file is part of the Adblock Plus web scripts,

5 # Copyright (C) 2006-2013 Eyeo GmbH	5 # Copyright (C) 2006-2013 Eyeo GmbH

6 #	6 #

7 # Adblock Plus is free software: you can redistribute it and/or modify	7 # Adblock Plus is free software: you can redistribute it and/or modify

8 # it under the terms of the GNU General Public License version 3 as	8 # it under the terms of the GNU General Public License version 3 as

9 # published by the Free Software Foundation.	9 # published by the Free Software Foundation.

10 #	10 #

11 # Adblock Plus is distributed in the hope that it will be useful,	11 # Adblock Plus is distributed in the hope that it will be useful,

12 # but WITHOUT ANY WARRANTY; without even the implied warranty of	12 # but WITHOUT ANY WARRANTY; without even the implied warranty of

13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the	13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

14 # GNU General Public License for more details.	14 # GNU General Public License for more details.

15 #	15 #

16 # You should have received a copy of the GNU General Public License	16 # You should have received a copy of the GNU General Public License

17 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.	17 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.

18	18

19 import sys, os, re, subprocess, urllib2, time, traceback, codecs, hashlib, base6 4	19 import sys, os, re, subprocess, urllib2, time, traceback, codecs, hashlib, base6 4

20 from getopt import getopt, GetoptError	20 from getopt import getopt, GetoptError

21	21

22 acceptedExtensions = {	22 accepted_extensions = set([".txt"])

23 '.txt': True,	23 ignore = set(["Apache.txt", "CC-BY-SA.txt", "GPL.txt", "MPL.txt"])

24 }	24 verbatim = set(["COPYING"])

25 ignore = {	25

26 'Apache.txt': True,	26 def combine_subscriptions(sources, target_dir, timeout=30):

27 'CC-BY-SA.txt': True,	27 global accepted_extensions, ignore, verbatim
	Sebastian Noack 2013/11/06 15:56:22 The global keyword is unneeded here. I know it was The global keyword is unneeded here. I know it was there before, but feel free to remove it. The global keyword makes assignments below, assigning the variable in the global instead of local scope. However it doesn't affect reading of variables.
28 'GPL.txt': True,	28

29 'MPL.txt': True,	29 if not os.path.exists(target_dir):

30 }	30 os.makedirs(target_dir, 0755)

31 verbatim = {	31

32 'COPYING': True,	32 known = set()

33 }	33 for source_name, source in sources.iteritems():

34	34 for filename in source.list_top_level_files():

35 def combineSubscriptions(sourceDirs, targetDir, timeout=30):	35 if filename in ignore or filename.startswith("."):

36 global acceptedExtensions, ignore, verbatim

37

38 if isinstance(sourceDirs, basestring):

39 sourceDirs = {'': sourceDirs}

40

41 if not os.path.exists(targetDir):

42 os.makedirs(targetDir, 0755)

43

44 known = {}

45 for sourceName, sourceDir in sourceDirs.iteritems():

46 for file in os.listdir(sourceDir):

47 if file in ignore or file[0] == '.' or not os.path.isfile(os.path.join(sou rceDir, file)):

48 continue	36 continue

49 if file in verbatim:	37 if filename in verbatim:

50 processVerbatimFile(sourceDir, targetDir, file)	38 process_verbatim_file(source, target_dir, filename)

51 elif not os.path.splitext(file)[1] in acceptedExtensions:	39 elif not os.path.splitext(filename)[1] in accepted_extensions:

52 continue	40 continue

53 else:	41 else:

54 try:	42 try:

55 processSubscriptionFile(sourceName, sourceDirs, targetDir, file, timeo ut)	43 process_subscription_file(source_name, sources, target_dir, filename, timeout)

56 except:	44 except:

57 print >>sys.stderr, 'Error processing subscription file "%s"' % file	45 print >>sys.stderr, 'Error processing subscription file "%s"' % filena me

58 traceback.print_exc()	46 traceback.print_exc()

59 print >>sys.stderr	47 print >>sys.stderr

60 known[os.path.splitext(file)[0] + '.tpl'] = True	48 known.add(os.path.splitext(filename)[0] + ".tpl")

61 known[os.path.splitext(file)[0] + '.tpl.gz'] = True	49 known.add(os.path.splitext(filename)[0] + ".tpl.gz")

62 known[file] = True	50 known.add(filename)

63 known[file + '.gz'] = True	51 known.add(filename + ".gz")

64	52

65 for file in os.listdir(targetDir):	53 for filename in os.listdir(target_dir):

66 if file[0] == '.':	54 if filename.startswith("."):

67 continue	55 continue

68 if not file in known:	56 if not filename in known:

69 os.remove(os.path.join(targetDir, file))	57 os.remove(os.path.join(target_dir, filename))

70	58

71 def saveFile(filePath, data):	59 def save_file(path, data):

72 handle = codecs.open(filePath, 'wb', encoding='utf-8')	60 handle = codecs.open(path, "wb", encoding="utf-8")

73 handle.write(data)	61 handle.write(data)

74 handle.close()	62 handle.close()

75 try:	63 try:

76 subprocess.check_output(['7za', 'a', '-tgzip', '-mx=9', '-bd', '-mpass=5', f ilePath + '.gz', filePath])	64 subprocess.check_output(["7za", "a", "-tgzip", "-mx=9", "-bd", "-mpass=5", p ath + ".gz", path])

77 except:	65 except:

78 print >>sys.stderr, 'Failed to compress file %s. Please ensure that p7zip is installed on the system.' % filePath	66 print >>sys.stderr, "Failed to compress file %s. Please ensure that p7zip is installed on the system." % path

79	67

80 def processVerbatimFile(sourceDir, targetDir, file):	68 def process_verbatim_file(source, target_dir, filename):

81 handle = codecs.open(os.path.join(sourceDir, file), 'rb', encoding='utf-8')	69 save_file(os.path.join(target_dir, filename), source.read_file(filename))

82 saveFile(os.path.join(targetDir, file), handle.read())	70

83 handle.close()	71 def process_subscription_file(source_name, sources, target_dir, filename, timeou t):

84	72 source = sources[source_name]

85 def processSubscriptionFile(sourceName, sourceDirs, targetDir, file, timeout):	73 lines = source.read_file(filename).splitlines()

86 sourceDir = sourceDirs[sourceName]	74

87 filePath = os.path.join(sourceDir, file)	75 header = ""

88 handle = codecs.open(filePath, 'rb', encoding='utf-8')

89 lines = map(lambda l: re.sub(r'[\r\n]', '', l), handle.readlines())

90 handle.close()

91

92 header = ''

93 if len(lines) > 0:	76 if len(lines) > 0:

94 header = lines[0]	77 header = lines.pop(0)

95 del lines[0]	78 if not re.search(r"\[Adblock(?:\sPlus\s([\d\.]+)?)?\]", header, re.I):

96 if not re.search(r'\[Adblock(?:\sPlus\s([\d\.]+)?)?\]', header, re.I):	79 raise Exception("This is not a valid Adblock Plus subscription file.")

97 raise Exception('This is not a valid Adblock Plus subscription file.')	80

98	81 lines = resolve_includes(source_name, sources, lines, timeout)

99 lines = resolveIncludes(sourceName, sourceDirs, filePath, lines, timeout)	82 seen = set(["checksum", "version"])

100 seen = set(['checksum', 'version'])	83 def check_line(line):

101 def checkLine(line):	84 if line == "":

102 if line == '':

103 return False	85 return False

104 match = re.search(r'^\s!\s(Redirect\|Homepage\|Title\|Checksum\|Version)\s*:', line, re.M \| re.I)	86 match = re.search(r"^\s!\s(Redirect\|Homepage\|Title\|Checksum\|Version)\s*:", line, re.M \| re.I)

105 if not match:	87 if not match:

106 return True	88 return True

107 key = match.group(1).lower()	89 key = match.group(1).lower()

108 if key in seen:	90 if key in seen:

109 return False	91 return False

110 seen.add(key)	92 seen.add(key)

111 return True	93 return True

112 lines = filter(checkLine, lines)	94 lines = filter(check_line, lines)

113	95

114 writeTPL(os.path.join(targetDir, os.path.splitext(file)[0] + '.tpl'), lines)	96 write_tpl(os.path.join(target_dir, os.path.splitext(filename)[0] + ".tpl"), li nes)

115	97

116 lines.insert(0, '! Version: %s' % time.strftime('%Y%m%d%H%M', time.gmtime()))	98 lines.insert(0, "! Version: %s" % time.strftime("%Y%m%d%H%M", time.gmtime()))

117	99

118 checksum = hashlib.md5()	100 checksum = hashlib.md5()

119 checksum.update((header + '\n' + '\n'.join(lines)).encode('utf-8'))	101 checksum.update("\n".join([header] + lines).encode("utf-8"))

120 lines.insert(0, '! Checksum: %s' % re.sub(r'=', '', base64.b64encode(checksum. digest())))	102 lines.insert(0, "! Checksum: %s" % re.sub(r"=", "", base64.b64encode(checksum. digest())))
	Sebastian Noack 2013/11/06 15:56:22 You don't need a regex to strip a given character. You don't need a regex to strip a given character. You could just use .replace("=", "") or since the equals character is only used as padding in the end by base64 you could also use .rstrip('='), wich IMHO makes the intention of the code even more obvious. However I've never seen the use of base64 to bring an MD5 digit in a printable form. Ususally you you just convert it to hexadecimal, which can be done as easy as checksum.hexdigest() in Python. But I don't know if changing that now would break something. Wladimir Palant 2013/11/08 15:08:07 It's way too late to change the specification of c Show quoted text On 2013/11/06 15:56:22, sebastian wrote: > However I've never seen the use of base64 to bring an MD5 digit in a printable > form. Ususally you you just convert it to hexadecimal, which can be done as easy > as checksum.hexdigest() in Python. But I don't know if changing that now would > break something. It's way too late to change the specification of checksums. It's the way it is because Mozilla APIs produce base64 - and we mimic this everywhere else.
121 lines.insert(0, header)	103 lines.insert(0, header)

122 saveFile(os.path.join(targetDir, file), '\n'.join(lines))	104 save_file(os.path.join(target_dir, filename), "\n".join(lines))

123	105

124 def resolveIncludes(sourceName, sourceDirs, filePath, lines, timeout, level=0):	106 def resolve_includes(source_name, sources, lines, timeout, level=0):

125 if level > 5:	107 if level > 5:

126 raise Exception('There are too many nested includes, which is probably the r esult of a circular reference somewhere.')	108 raise Exception("There are too many nested includes, which is probably the r esult of a circular reference somewhere.")

127	109

128 result = []	110 result = []

129 for line in lines:	111 for line in lines:

130 match = re.search(r'^\s%include\s+(.)%\s*$', line)	112 match = re.search(r"^\s%include\s+(.)%\s*$", line)

131 if match:	113 if match:

132 file = match.group(1)	114 filename = match.group(1)

133 newLines = None	115 newlines = None

134 if re.match(r'^https?://', file):	116 if re.match(r"^https?://", filename):

135 result.append('! * Fetched from: %s *' % file)	117 result.append("! * Fetched from: %s *" % filename)

136	118

137 for i in range(3):	119 for i in range(3):

138 try:	120 try:

139 request = urllib2.urlopen(file, None, timeout)	121 request = urllib2.urlopen(filename, None, timeout)

	122 data = request.read()

140 error = None	123 error = None

141 break	124 break

142 except urllib2.URLError, e:	125 except urllib2.URLError, e:

143 error = e	126 error = e

144 time.sleep(5)	127 time.sleep(5)

145 if error:	128 if error:

146 raise error	129 raise error

147	130

148 # We should really get the charset from the headers rather than assuming	131 # We should really get the charset from the headers rather than assuming

149 # that it is UTF-8. However, some of the Google Code mirrors are	132 # that it is UTF-8. However, some of the Google Code mirrors are

150 # misconfigured and will return ISO-8859-1 as charset instead of UTF-8.	133 # misconfigured and will return ISO-8859-1 as charset instead of UTF-8.

151 newLines = unicode(request.read(), 'utf-8').split('\n')	134 newlines = data.decode("utf-8").splitlines()

152 newLines = map(lambda l: re.sub(r'[\r\n]', '', l), newLines)	135 newlines = filter(lambda l: not re.search(r"^\s!.?\bExpires\s(?::\|aft er)\s(\d+)\s*(h)?", l, re.M \| re.I), newlines)

153 newLines = filter(lambda l: not re.search(r'^\s!.?\bExpires\s(?::\|aft er)\s(\d+)\s*(h)?', l, re.M \| re.I), newLines)	136 newlines = filter(lambda l: not re.search(r"^\s!\s(Redirect\|Homepage\|T itle\|Version)\s*:", l, re.M \| re.I), newlines)

154 newLines = filter(lambda l: not re.search(r'^\s!\s(Redirect\|Homepage\|T itle\|Version)\s*:', l, re.M \| re.I), newLines)

155 else:	137 else:

156 result.append('! * %s *' % file)	138 result.append("! * %s *" % filename)

157	139

158 includeSource = sourceName	140 include_source = source_name

159 if file.find(':') >= 0:	141 if ":" in filename:

160 includeSource, file = file.split(':', 1)	142 include_source, filename = filename.split(":", 1)

161 if not includeSource in sourceDirs:	143 if not include_source in sources:

162 raise Exception('Cannot include file from repository "%s", this reposi tory is unknown' % includeSource)	144 raise Exception('Cannot include file from repository "%s", this reposi tory is unknown' % include_source)

163	145

164 parentDir = sourceDirs[includeSource]	146 source = sources[include_source]

165 includePath = os.path.join(parentDir, file)	147 newlines = source.read_file(filename).splitlines()

166 relPath = os.path.relpath(includePath, parentDir)	148 newlines = resolve_includes(include_source, sources, newlines, timeout, level + 1)

167 if len(relPath) == 0 or relPath[0] == '.':	149

168 raise Exception('Invalid include "%s", needs to be an HTTP/HTTPS URL o r a relative file path' % file)	150 if len(newlines) and re.search(r"\[Adblock(?:\sPlus\s([\d\.]+)?)?\]", ne wlines[0], re.I):

169	151 del newlines[0]

170 handle = codecs.open(includePath, 'rb', encoding='utf-8')	152 result.extend(newlines)

171 newLines = map(lambda l: re.sub(r'[\r\n]', '', l), handle.readlines())

172 newLines = resolveIncludes(includeSource, sourceDirs, includePath, newLi nes, timeout, level + 1)

173 handle.close()

174

175 if len(newLines) and re.search(r'\[Adblock(?:\sPlus\s([\d\.]+)?)?\]', ne wLines[0], re.I):

176 del newLines[0]

177 result.extend(newLines)

178 else:	153 else:

179 if line.find('%timestamp%') >= 0:	154 if line.find("%timestamp%") >= 0:

180 if level == 0:	155 if level == 0:

181 line = line.replace('%timestamp%', time.strftime('%d %b %Y %H:%M UTC', time.gmtime()))	156 line = line.replace("%timestamp%", time.strftime("%d %b %Y %H:%M UTC", time.gmtime()))

182 else:	157 else:

183 line = ''	158 line = ""

184 result.append(line)	159 result.append(line)

185 return result	160 return result

186	161

187 def writeTPL(filePath, lines):	162 def write_tpl(path, lines):

188 result = []	163 result = []

189 result.append('msFilterList')	164 result.append("msFilterList")

190 for line in lines:	165 for line in lines:

191 if re.search(r'^!', line):	166 if re.search(r"^\s*!", line):

192 # This is a comment. Handle "Expires" comment in a special way, keep the r est.	167 # This is a comment. Handle "Expires" comment in a special way, keep the r est.

193 match = re.search(r'\bExpires\s(?::\|after)\s(\d+)\s*(h)?', line, re.I)	168 match = re.search(r"\bExpires\s(?::\|after)\s(\d+)\s*(h)?", line, re.I)

194 if match:	169 if match:

195 interval = int(match.group(1))	170 interval = int(match.group(1))

196 if match.group(2):	171 if match.group(2):

197 interval = int(interval / 24)	172 interval = int(interval / 24)

198 result.append(': Expires=%i' % interval)	173 result.append(": Expires=%i" % interval)

199 else:	174 else:

200 result.append(re.sub(r'!', '#', re.sub(r'--!$', '--#', line)))	175 result.append(re.sub(r"^\s*!", "#", re.sub(r"--!$", "--#", line)))

201 elif line.find('#') >= 0:	176 elif line.find("#") >= 0:

202 # Element hiding rules are not supported in MSIE, drop them	177 # Element hiding rules are not supported in MSIE, drop them

203 pass	178 pass

204 else:	179 else:

205 # We have a blocking or exception rule, try to convert it	180 # We have a blocking or exception rule, try to convert it

206 origLine = line	181 origline = line

207	182

208 isException = False	183 isexception = False
	Sebastian Noack 2013/11/06 15:56:22 Apparently you don't like underscores, but "is_exc Apparently you don't like underscores, but "is_exception" would be way more readable than "isexcpetion". ;) Wladimir Palant 2013/11/08 15:08:07 As you wish... Show quoted text On 2013/11/06 15:56:22, sebastian wrote: > Apparently you don't like underscores, but "is_exception" would be way more > readable than "isexcpetion". ;) As you wish...
209 if line[0:2] == '@@':	184 if line.startswith("@@"):

210 isException = True	185 isexception = True

211 line = line[2:]	186 line = line[2:]

212	187

213 hasUnsupportedOptions = False	188 has_unsupported = False

214 requiresScript = False	189 requires_script = False

215 match = re.search(r'^(.?)\$(.)', line)	190 match = re.search(r"^(.?)\$(.)", line)

216 if match:	191 if match:

217 # This rule has options, check whether any of them are important	192 # This rule has options, check whether any of them are important

218 line = match.group(1)	193 line = match.group(1)

219 options = match.group(2).replace('_', '-').lower().split(',')	194 options = match.group(2).replace("_", "-").lower().split(",")

220	195

221 # Remove first-party only exceptions, we will allow an ad server everywh ere otherwise	196 # Remove first-party only exceptions, we will allow an ad server everywh ere otherwise

222 if isException and '~third-party' in options:	197 if isexception and "~third-party" in options:

223 hasUnsupportedOptions = True	198 has_unsupported = True

224	199

225 # A number of options are not supported in MSIE but can be safely ignore d, remove them	200 # A number of options are not supported in MSIE but can be safely ignore d, remove them

226 options = filter(lambda o: not o in ('', 'third-party', '~third-party', 'match-case', '~match-case', '~other', '~donottrack'), options)	201 options = filter(lambda o: not o in ("", "third-party", "~third-party", "match-case", "~match-case", "~other", "~donottrack"), options)

227	202

228 # Also ignore domain negation of whitelists	203 # Also ignore domain negation of whitelists

229 if isException:	204 if isexception:

230 options = filter(lambda o: not o.startswith('domain=~'), options)	205 options = filter(lambda o: not o.startswith("domain=~"), options)

231	206

232 unsupportedOptions = filter(lambda o: o in ('other', 'elemhide'), option s)	207 unsupported = filter(lambda o: o in ("other", "elemhide"), options)

233 if unsupportedOptions and len(unsupportedOptions) == len(options):	208 if unsupported and len(unsupported) == len(options):

234 # The rule only applies to types that are not supported in MSIE	209 # The rule only applies to types that are not supported in MSIE

235 hasUnsupportedOptions = True	210 has_unsupported = True

236 elif 'donottrack' in options:	211 elif "donottrack" in options:

237 # Do-Not-Track rules have to be removed even if $donottrack is combine d with other options	212 # Do-Not-Track rules have to be removed even if $donottrack is combine d with other options

238 hasUnsupportedOptions = True	213 has_unsupported = True

239 elif 'script' in options and len(options) == len(unsupportedOptions) + 1 :	214 elif "script" in options and len(options) == len(unsupported) + 1:

240 # Mark rules that only apply to scripts for approximate conversion	215 # Mark rules that only apply to scripts for approximate conversion

241 requiresScript = True	216 requires_script = True

242 elif len(options) > 0:	217 elif len(options) > 0:

243 # The rule has further options that aren't available in TPLs. For	218 # The rule has further options that aren't available in TPLs. For

244 # exception rules that aren't specific to a domain we ignore all	219 # exception rules that aren't specific to a domain we ignore all

245 # remaining options to avoid potential false positives. Other rules	220 # remaining options to avoid potential false positives. Other rules

246 # simply aren't included in the TPL file.	221 # simply aren't included in the TPL file.

247 if isException:	222 if isexception:

248 hasUnsupportedOptions = any([o.startswith('domain=') for o in option s])	223 has_unsupported = any([o.startswith("domain=") for o in options])

249 else:	224 else:

250 hasUnsupportedOptions = True	225 has_unsupported = True

251	226

252 if hasUnsupportedOptions:	227 if has_unsupported:

253 # Do not include filters with unsupported options	228 # Do not include filters with unsupported options

254 result.append('# ' + origLine)	229 result.append("# " + origline)

255 else:	230 else:

256 line = line.replace('^', '/') # Assume that separator placeholders mean slashes	231 line = line.replace("^", "/") # Assume that separator placeholders mean slashes

257	232

258 # Try to extract domain info	233 # Try to extract domain info

259 domain = None	234 domain = None

260 match = re.search(r'^(\\|\\|\|\\|\w+://)([^:/]+)(:\d+)?(/.)', line)	235 match = re.search(r"^(\\|\\|\|\\|\w+://)([^:/]+)(:\d+)?(/.)", line)

261 if match:	236 if match:

262 domain = match.group(2)	237 domain = match.group(2)

263 line = match.group(4)	238 line = match.group(4)

264 else:	239 else:

265 # No domain info, remove anchors at the rule start	240 # No domain info, remove anchors at the rule start

266 line = re.sub(r'^\\|\\|', 'http://', line)	241 line = re.sub(r"^\\|\\|", "http://", line)

267 line = re.sub(r'^\\|', '', line)	242 line = re.sub(r"^\\|", "", line)

268 # Remove anchors at the rule end	243 # Remove anchors at the rule end

269 line = re.sub(r'\\|$', '', line)	244 line = re.sub(r"\\|$", "", line)

270 # Remove unnecessary asterisks at the ends of lines	245 # Remove unnecessary asterisks at the ends of lines

271 line = re.sub(r'\*$', '', line)	246 line = re.sub(r"\*$", "", line)

272 # Emulate $script by appending *.js to the rule	247 # Emulate $script by appending *.js to the rule

273 if requiresScript:	248 if requires_script:

274 line += '*.js'	249 line += "*.js"

275 if line.startswith('/*'):	250 if line.startswith("/*"):

276 line = line[2:]	251 line = line[2:]

277 if domain:	252 if domain:

278 line = '%sd %s %s' % ('+' if isException else '-', domain, line)	253 line = "%sd %s %s" % ("+" if isexception else "-", domain, line)

279 line = re.sub(r'\s+/$', '', line)	254 line = re.sub(r"\s+/$", "", line)

280 result.append(line)	255 result.append(line)

281 elif isException:	256 elif isexception:

282 # Exception rules without domains are unsupported	257 # Exception rules without domains are unsupported

283 result.append('# ' + origLine)	258 result.append("# " + origline)

284 else:	259 else:

285 result.append('- ' + line)	260 result.append("- " + line)

286 saveFile(filePath, '\n'.join(result) + '\n')	261 save_file(path, "\n".join(result) + "\n")

	262

	263 class FileSource:

	264 def __init__(self, dir):

	265 self._dir = dir

	266 if os.path.exists(os.path.join(dir, ".hg")):

	267 # This is a Mercurial repository, try updating

	268 subprocess.call(["hg", "-q", "-R", dir, "pull", "--update"])

	269

	270 def get_path(self, filename):

	271 return os.path.join(self._dir, *filename.split("/"))

	272

	273 def read_file(self, filename):

	274 path = self.get_path(filename)

	275 if os.path.relpath(path, self._dir).startswith("."):

	276 raise Exception("Attempt to access a file outside the repository")

	277 with codecs.open(path, "rb", encoding="utf-8") as handle:

	278 return handle.read()

	279

	280 def list_top_level_files(self):

	281 for filename in os.listdir(self._dir):

	282 path = os.path.join(self._dir, filename)

	283 if os.path.isfile(path):

	284 yield filename

287	285

288 def usage():	286 def usage():

289 print '''Usage: %s [source_dir] [output_dir]	287 print """Usage: %s source_name=source_dir ... [output_dir]

290	288

291 Options:	289 Options:

292 -h --help Print this message and exit	290 -h --help Print this message and exit

293 -t seconds --timeout=seconds Timeout when fetching remote subscriptions	291 -t seconds --timeout=seconds Timeout when fetching remote subscriptions

294 ''' % os.path.basename(sys.argv[0])	292 """ % os.path.basename(sys.argv[0])

295	293

296 if __name__ == '__main__':	294 if __name__ == "__main__":

297 try:	295 try:

298 opts, args = getopt(sys.argv[1:], 'ht:', ['help', 'timeout='])	296 opts, args = getopt(sys.argv[1:], "ht:", ["help", "timeout="])

299 except GetoptError, e:	297 except GetoptError, e:

300 print str(e)	298 print str(e)

301 usage()	299 usage()

302 sys.exit(2)	300 sys.exit(2)

303	301

304 sourceDir, targetDir = '.', 'subscriptions'	302 target_dir = "subscriptions"

305 if len(args) >= 1:	303 sources = {}

306 sourceDir = args[0]	304 for arg in args:

307 if len(args) >= 2:	305 if "=" in arg:

308 targetDir = args[1]	306 source_name, source_dir = arg.split("=", 1)

	307 sources[source_name] = FileSource(source_dir)

	308 else:

	309 target_dir = arg

	310 if not sources:

	311 sources[""] = FileSource(".")

309	312

310 timeout = 30	313 timeout = 30

311 for option, value in opts:	314 for option, value in opts:

312 if option in ('-h', '--help'):	315 if option in ("-h", "--help"):

313 usage()	316 usage()

314 sys.exit()	317 sys.exit()

315 elif option in ('-t', '--timeout'):	318 elif option in ("-t", "--timeout"):

316 timeout = int(value)	319 timeout = int(value)

317	320

318 if os.path.exists(os.path.join(sourceDir, '.hg')):	321 combine_subscriptions(sources, target_dir, timeout)

319 # Our source is a Mercurial repository, try updating

320 subprocess.check_call(['hg', '-q', '-R', sourceDir, 'pull', '--update'])

321

322 combineSubscriptions(sourceDir, targetDir, timeout)

OLD	NEW

« sitescripts/subscriptions/bin/updateSubscriptionDownloads.py ('K') | « sitescripts/subscriptions/bin/updateSubscriptionDownloads.py ('k') | no next file » | no next file with comments »