sitescripts/subscriptions/subscriptionParser.py - Issue 11275006: Added script to generate notification.json for the emergencynotification mechanism

Delta Between Two Patch Sets: sitescripts/subscriptions/subscriptionParser.py

Issue 11275006: Added script to generate notification.json for the emergencynotification mechanism (Closed)

Left Patch Set: Created July 26, 2013, 9:15 a.m.

Right Patch Set: Addressed review comments Created Nov. 4, 2013, 10:28 a.m.

Left:
Right:

Use n/p to move between diff chunks; N/P to move between comments.

Jump to:

Right: Side by side diff | Download

LEFT	RIGHT
(no file at all)
1 # coding: utf-8	1 # coding: utf-8

2	2

3 # This file is part of the Adblock Plus web scripts,	3 # This file is part of the Adblock Plus web scripts,

4 # Copyright (C) 2006-2013 Eyeo GmbH	4 # Copyright (C) 2006-2013 Eyeo GmbH

5 #	5 #

6 # Adblock Plus is free software: you can redistribute it and/or modify	6 # Adblock Plus is free software: you can redistribute it and/or modify

7 # it under the terms of the GNU General Public License version 3 as	7 # it under the terms of the GNU General Public License version 3 as

8 # published by the Free Software Foundation.	8 # published by the Free Software Foundation.

9 #	9 #

10 # Adblock Plus is distributed in the hope that it will be useful,	10 # Adblock Plus is distributed in the hope that it will be useful,

11 # but WITHOUT ANY WARRANTY; without even the implied warranty of	11 # but WITHOUT ANY WARRANTY; without even the implied warranty of

12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the	12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

13 # GNU General Public License for more details.	13 # GNU General Public License for more details.

14 #	14 #

15 # You should have received a copy of the GNU General Public License	15 # You should have received a copy of the GNU General Public License

16 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.	16 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.

17	17

18 import re, os, sys, codecs, subprocess, tarfile	18 import re, os, sys, codecs, subprocess, tarfile

19 from urlparse import urlparse	19 from urlparse import urlparse

20 from StringIO import StringIO	20 from StringIO import StringIO

21 from ConfigParser import SafeConfigParser	21 from ConfigParser import SafeConfigParser

22 from sitescripts.utils import get_config, cached	22 from sitescripts.utils import get_config, cached

23	23

24 def warn(message):	24 def warn(message):

25 print >> sys.stderr, message	25 print >> sys.stderr, message

26	26

27 class Subscription(object):	27 class Subscription(object):

28 def defineProperty(propName, isSimple = False):	28 def define_property(propName, readonly=False):

29 if isSimple:	29 if readonly:

30 def setProperty(dict, propName, value):	30 return property(lambda self: self._data[propName])

31 dict[propName] = value

32

33 return property(lambda self: self._data[propName], lambda self, value: set Property(self._data, propName, value))

34 else:	31 else:

35 return property(lambda self: self._data[propName])	32 def set_property(self, value):

36	33 self._data[propName] = value

37 name = defineProperty("name", True)	34

38 type = defineProperty("type", True)	35 return property(lambda self: self._data[propName], set_property)

39 maintainer = defineProperty("maintainer", True)	36

40 email = defineProperty("email", True)	37 name = define_property("name")

41 specialization = defineProperty("specialization", True)	38 type = define_property("type")

42 languages = defineProperty("languages", True)	39 maintainer = define_property("maintainer")

43 recommendation = defineProperty("recommendation")	40 email = define_property("email")

44 deprecated = defineProperty("deprecated")	41 specialization = define_property("specialization")

45 unavailable = defineProperty("unavailable")	42 languages = define_property("languages")

46 catchall = defineProperty("catchall")	43 recommendation = define_property("recommendation", readonly=True)

47 supplements = defineProperty("supplements")	44 deprecated = define_property("deprecated", readonly=True)

48 supplemented = defineProperty("supplemented")	45 unavailable = define_property("unavailable", readonly=True)

49 variants = defineProperty("variants")	46 catchall = define_property("catchall", readonly=True)

50 homepage = defineProperty("homepage", True)	47 supplements = define_property("supplements", readonly=True)

51 contact = defineProperty("contact", True)	48 supplemented = define_property("supplemented", readonly=True)

52 forum = defineProperty("forum", True)	49 variants = define_property("variants", readonly=True)

53 faq = defineProperty("faq", True)	50 homepage = define_property("homepage")

54 blog = defineProperty("blog", True)	51 contact = define_property("contact")

55 changelog = defineProperty("changelog", True)	52 forum = define_property("forum")

56 policy = defineProperty("policy", True)	53 faq = define_property("faq")

57 digest = defineProperty("digest", True)	54 blog = define_property("blog")

58 digestDay = defineProperty("digestDay", True)	55 changelog = define_property("changelog")

59	56 policy = define_property("policy")

60 def __init__(self, filePath, data):	57 digest = define_property("digest")

	58 digestDay = define_property("digestDay")

	59

	60 def __init__(self, path, data):

61 self._data = {	61 self._data = {

62 'name': None,	62 'name': None,

63 'type': 'ads',	63 'type': 'ads',

64 'maintainer': None,	64 'maintainer': None,

65 'email': None,	65 'email': None,

66 'specialization': None,	66 'specialization': None,

67 'languages': None,	67 'languages': None,

68 'deprecated': False,	68 'deprecated': False,

69 'unavailable': False,	69 'unavailable': False,

70 'catchall': False,	70 'catchall': False,

71 'supplements': [],	71 'supplements': [],

72 'supplemented': [],	72 'supplemented': [],

73 'variants': [],	73 'variants': [],

74 'recommendation': None,	74 'recommendation': None,

75 'homepage': None,	75 'homepage': None,

76 'contact': None,	76 'contact': None,

77 'forum': None,	77 'forum': None,

78 'faq': None,	78 'faq': None,

79 'blog': None,	79 'blog': None,

80 'changelog': None,	80 'changelog': None,

81 'policy': None,	81 'policy': None,

82 'digest': 'weekly',	82 'digest': 'weekly',

83 'digestDay': 'wed',	83 'digestDay': 'wed',

84 }	84 }

85 self.parse(filePath, data)	85 self.parse(path, data)

86	86

87 def parse(self, filePath, data):	87 def parse(self, path, data):

88 mandatory = [['email'], ['specialization'], ['homepage', 'contact', 'forum', 'faq', 'blog']]	88 mandatory = [['email'], ['specialization'], ['homepage', 'contact', 'forum', 'faq', 'blog']]

89 weekDays = {	89 weekdays = {

90 'son': 0,	90 'son': 0,

91 'mon': 1,	91 'mon': 1,

92 'tue': 2,	92 'tue': 2,

93 'wed': 3,	93 'wed': 3,

94 'thu': 4,	94 'thu': 4,

95 'fri': 5,	95 'fri': 5,

96 'sat': 6,	96 'sat': 6,

97 }	97 }

98	98

99 self.name = re.sub(r'\.\w+$', r'', os.path.basename(filePath))	99 self.name = re.sub(r'\.\w+$', r'', os.path.basename(path))

100	100

101 for line in data:	101 for line in data:

102 if not re.search(r'\S', line):	102 if not re.search(r'\S', line):

103 continue	103 continue

104	104

105 parts = line.split('=', 1)	105 parts = line.split('=', 1)

106 key = parts[0].strip()	106 key = parts[0].strip()

107 if len(parts) > 1:	107 if len(parts) > 1:

108 value = parts[1].strip()	108 value = parts[1].strip()

109 else:	109 else:

110 value = ''	110 value = ''

111	111

112 try:	112 try:

113 # Might be a simple attribute - try setting the value	113 # Might be a simple attribute - try setting the value

114 if not hasattr(self, key):	114 if not hasattr(self, key):

115 raise Exception()	115 raise Exception()

116	116

117 oldValue = getattr(self, key)	117 oldvalue = getattr(self, key)

118 setattr(self, key, value)	118 setattr(self, key, value)

119 if value == '':	119 if value == '':

120 warn('Empty value given for attribute %s in %s' % (key, filePath))	120 warn('Empty value given for attribute %s in %s' % (key, path))

121 if oldValue != None and key != 'name' and key != 'type' and key != 'dige st' and key != 'digestDay':	121 if oldvalue != None and key != 'name' and key != 'type' and key != 'dige st' and key != 'digestDay':

122 warn('Value for attribute %s is duplicated in %s' % (key, filePath))	122 warn('Value for attribute %s is duplicated in %s' % (key, path))

123 except:	123 except:

124 # Not a simple attribute, needs special handling	124 # Not a simple attribute, needs special handling

125 if key == 'supplements':	125 if key == 'supplements':

126 if value == '':	126 if value == '':

127 warn('Empty value given for attribute %s in %s' % (key, filePath))	127 warn('Empty value given for attribute %s in %s' % (key, path))

128 self.supplements.append(value)	128 self.supplements.append(value)

129	129

130 elif key == 'list' or key == 'variant':	130 elif key == 'list' or key == 'variant':

131 if value == '':	131 if value == '':

132 warn('Empty value given for attribute %s in %s' % (key, filePath))	132 warn('Empty value given for attribute %s in %s' % (key, path))

133 keywords = {	133 keywords = {

134 'recommendation': False,	134 'recommendation': False,

135 'catchall': False,	135 'catchall': False,

136 'complete': False,	136 'complete': False,

137 }	137 }

138 regexp = re.compile(r'\s\[((?:\w+,)\w+)\]$')	138 regexp = re.compile(r'\s\[((?:\w+,)\w+)\]$')

139 match = re.search(regexp, value)	139 match = re.search(regexp, value)

140 if match:	140 if match:

141 value = re.sub(regexp, r'', value)	141 value = re.sub(regexp, r'', value)

142 for keyword in match.group(1).split(','):	142 for keyword in match.group(1).split(','):

143 keyword = keyword.lower()	143 keyword = keyword.lower()

144 if keyword in keywords:	144 if keyword in keywords:

145 keywords[keyword] = True	145 keywords[keyword] = True

146 else:	146 else:

147 warn('Unknown keyword %s given for attribute %s in %s' % (keywor d, key, filePath))	147 warn('Unknown keyword %s given for attribute %s in %s' % (keywor d, key, path))

148 (name, url) = (self.name, value)	148 (name, url) = (self.name, value)

149 if key == 'variant':	149 if key == 'variant':

150 match = re.search(r'(.+?)\s+(\S+)$', value)	150 match = re.search(r'(.+?)\s+(\S+)$', value)

151 if match:	151 if match:

152 (name, url) = (match.group(1), match.group(2));	152 (name, url) = (match.group(1), match.group(2));

153 else:	153 else:

154 warn('Invalid variant format in %s, no name given?' % (filePath))	154 warn('Invalid variant format in %s, no name given?' % (path))

155 if not _validateURL(url):	155 if not _validate_URL(url):

156 warn('Invalid list URL %s given in %s' % (url, filePath))	156 warn('Invalid list URL %s given in %s' % (url, path))

157 self.variants.append([name, url, keywords['complete']])	157 self.variants.append([name, url, keywords['complete']])

158 if keywords['recommendation']:	158 if keywords['recommendation']:

159 self._data['recommendation'] = self._data['variants'][-1]	159 self._data['recommendation'] = self._data['variants'][-1]

160 self._data['catchall'] = keywords['catchall']	160 self._data['catchall'] = keywords['catchall']

161	161

162 elif key == 'deprecated' or key == 'unavailable':	162 elif key == 'deprecated' or key == 'unavailable':

163 self._data[key] = True	163 self._data[key] = True

164	164

165 else:	165 else:

166 warn('Unknown attribute %s in %s' % (key, filePath))	166 warn('Unknown attribute %s in %s' % (key, path))

167	167

168 if key == 'languages':	168 if key == 'languages':

169 settings = get_settings()	169 settings = get_settings()

170 languageNames = []	170 languagenames = []

171 for language in value.split(','):	171 for language in value.split(','):

172 if settings.has_option('languages', language):	172 if settings.has_option('languages', language):

173 languageNames.append(settings.get('languages', language))	173 languagenames.append(settings.get('languages', language))

174 else:	174 else:

175 warn('Unknown language code %s in %s' % (language, filePath))	175 warn('Unknown language code %s in %s' % (language, path))

176 self._data['languageSpecialization'] = ', '.join(languageNames)	176 self._data['languageSpecialization'] = ', '.join(languagenames)

177	177

178 if 'languageSpecialization' in self._data:	178 if 'languageSpecialization' in self._data:

179 if self.specialization != None:	179 if self.specialization != None:

180 self.specialization += ", " + self._data['languageSpecialization']	180 self.specialization += ", " + self._data['languageSpecialization']

181 else:	181 else:

182 self.specialization = self._data['languageSpecialization']	182 self.specialization = self._data['languageSpecialization']

183 del self._data['languageSpecialization']	183 del self._data['languageSpecialization']

184	184

185 for mandatorySet in mandatory:	185 for group in mandatory:

186 found = False	186 found = False

187 for key in mandatorySet:	187 for key in group:

188 if self._data[key] != None:	188 if self._data[key] != None:

189 found = True	189 found = True

190 if not found:	190 if not found:

191 str = ", ".join(mandatorySet)	191 str = ", ".join(group)

192 warn('None of the attributes %s present in %s' % (str, filePath))	192 warn('None of the attributes %s present in %s' % (str, path))

193	193

194 if len(self.variants) == 0:	194 if len(self.variants) == 0:

195 warn('No list locations given in %s' % (filePath))	195 warn('No list locations given in %s' % (path))

196 if self.type != 'ads' and self.type != 'other':	196 if self.type != 'ads' and self.type != 'other':

197 warn('Unknown type given in %s' % (filePath))	197 warn('Unknown type given in %s' % (path))

198 if self.digest != 'daily' and self.digest != 'weekly':	198 if self.digest != 'daily' and self.digest != 'weekly':

199 warn('Unknown digest frequency given in %s' % (filePath))	199 warn('Unknown digest frequency given in %s' % (path))

200 if not self.digestDay[0:3].lower() in weekDays:	200 if not self.digestDay[0:3].lower() in weekdays:

201 warn('Unknown digest day given in %s' % (filePath))	201 warn('Unknown digest day given in %s' % (path))

202 self.digestDay = 'wed'	202 self.digestDay = 'wed'

203 self.digestDay = weekDays[self.digestDay[0:3].lower()]	203 self.digestDay = weekdays[self.digestDay[0:3].lower()]

204 if self.recommendation != None and (self.languages == None or not re.search( r'\S', self.languages)):	204 if self.recommendation != None and (self.languages == None or not re.search( r'\S', self.languages)):

205 warn('Recommendation without languages in %s' % (filePath))	205 warn('Recommendation without languages in %s' % (path))

206 if len(self.supplements) == 0:	206 if len(self.supplements) == 0:

207 for [name, url, complete] in self.variants:	207 for [name, url, complete] in self.variants:

208 if complete:	208 if complete:

209 warn('Variant marked as complete for non-supplemental subscription in %s' % (filePath))	209 warn('Variant marked as complete for non-supplemental subscription in %s' % (path))

210 break	210 break

211	211

212 self.variants.sort(key=lambda variant: (self.recommendation == variant) * 2 + variant[2], reverse=True)	212 self.variants.sort(key=lambda variant: (self.recommendation == variant) * 2 + variant[2], reverse=True)

213	213

214 def parseFile(filePath, data):	214 def parse_file(path, data):

215 return Subscription(filePath, data)	215 return Subscription(path, data)

216	216

217 def calculateSupplemented(lists):	217 def calculate_supplemented(lists):

218 for fileData in lists.itervalues():	218 for filedata in lists.itervalues():

219 for supplements in fileData.supplements:	219 for supplements in filedata.supplements:

220 if supplements in lists:	220 if supplements in lists:

221 lists[supplements].supplemented.append(fileData)	221 lists[supplements].supplemented.append(filedata)

222 else:	222 else:

223 warn('Subscription %s supplements an unknown subscription %s' % (fileDat a.name, supplements))	223 warn('Subscription %s supplements an unknown subscription %s' % (filedat a.name, supplements))

224	224

225 @cached(60)	225 @cached(60)

226 def get_settings():	226 def get_settings():

227 repo = os.path.abspath(get_config().get('subscriptions', 'repository'))	227 repo = os.path.abspath(get_config().get('subscriptions', 'repository'))

228 settingsData = subprocess.check_output(['hg', '-R', repo, 'cat', '-r', 'defaul t', os.path.join(repo, 'settings')])	228 settingsdata = subprocess.check_output(['hg', '-R', repo, 'cat', '-r', 'defaul t', os.path.join(repo, 'settings')])

229 settings = SafeConfigParser()	229 settings = SafeConfigParser()

230 settings.readfp(codecs.getreader('utf8')(StringIO(settingsData)))	230 settings.readfp(codecs.getreader('utf8')(StringIO(settingsdata)))

231 return settings	231 return settings

232	232

233 def readSubscriptions():	233 def readSubscriptions():

234 repo = os.path.abspath(get_config().get('subscriptions', 'repository'))	234 repo = os.path.abspath(get_config().get('subscriptions', 'repository'))

235 data = subprocess.check_output(['hg', 'archive', '-R', repo, '-r', 'default', '-t', 'tar', '-I', os.path.join(repo, '*.subscription'), '-'])	235 data = subprocess.check_output(['hg', 'archive', '-R', repo, '-r', 'default', '-t', 'tar', '-I', os.path.join(repo, '*.subscription'), '-'])

236	236

237 result = {}	237 result = {}

238 tarFile = tarfile.open(mode='r:', fileobj=StringIO(data))	238 with tarfile.open(mode='r:', fileobj=StringIO(data)) as archive:

239 for fileInfo in tarFile:	239 for fileinfo in archive:

240 fileData = parseFile(fileInfo.name, codecs.getreader('utf8')(tarFile.extract file(fileInfo)))	240 filedata = parse_file(fileinfo.name, codecs.getreader('utf8')(archive.extr actfile(fileinfo)))

241 if fileData.unavailable:	241 if filedata.unavailable:

242 continue	242 continue

243	243

244 if fileData.name in result:	244 if filedata.name in result:

245 warn('Name %s is claimed by multiple files' % (fileData.name))	245 warn('Name %s is claimed by multiple files' % (filedata.name))

246 result[fileData.name] = fileData	246 result[filedata.name] = filedata

247 tarFile.close()	247

248	248 calculate_supplemented(result)

249 calculateSupplemented(result)

250 return result	249 return result

251	250

252 def getFallbackData():	251 def getFallbackData():

253 repo = os.path.abspath(get_config().get('subscriptions', 'repository'))	252 repo = os.path.abspath(get_config().get('subscriptions', 'repository'))

254 redirectData = subprocess.check_output(['hg', '-R', repo, 'cat', '-r', 'defaul t', os.path.join(repo, 'redirects')])	253 redirectdata = subprocess.check_output(['hg', '-R', repo, 'cat', '-r', 'defaul t', os.path.join(repo, 'redirects')])

255 goneData = subprocess.check_output(['hg', '-R', repo, 'cat', '-r', 'default', os.path.join(repo, 'gone')])	254 gonedata = subprocess.check_output(['hg', '-R', repo, 'cat', '-r', 'default', os.path.join(repo, 'gone')])

256 return (redirectData, goneData)	255 return (redirectdata, gonedata)

257	256

258 def _validateURL(url):	257 def _validate_URL(url):

259 parseResult = urlparse(url)	258 parse_result = urlparse(url)

260 return (parseResult.scheme == 'http' or parseResult.scheme == 'https') and par seResult.netloc != ''	259 return parse_result.scheme in ('http', 'https') and parse_result.netloc != ''

LEFT	RIGHT