Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Delta Between Two Patch Sets: sitescripts/subscriptions/subscriptionParser.py

Issue 11275006: Added script to generate notification.json for the emergencynotification mechanism (Closed)
Left Patch Set: Fixed closing and iterating through tar files Created July 29, 2013, 2:08 p.m.
Right Patch Set: Addressed review comments Created Nov. 4, 2013, 10:28 a.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
Left: Side by side diff | Download
Right: Side by side diff | Download
« no previous file with change/comment | « sitescripts/management/bin/generateNotifications.py ('k') | no next file » | no next file with change/comment »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
LEFTRIGHT
1 # coding: utf-8 1 # coding: utf-8
2 2
3 # This file is part of the Adblock Plus web scripts, 3 # This file is part of the Adblock Plus web scripts,
4 # Copyright (C) 2006-2013 Eyeo GmbH 4 # Copyright (C) 2006-2013 Eyeo GmbH
5 # 5 #
6 # Adblock Plus is free software: you can redistribute it and/or modify 6 # Adblock Plus is free software: you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License version 3 as 7 # it under the terms of the GNU General Public License version 3 as
8 # published by the Free Software Foundation. 8 # published by the Free Software Foundation.
9 # 9 #
10 # Adblock Plus is distributed in the hope that it will be useful, 10 # Adblock Plus is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of 11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details. 13 # GNU General Public License for more details.
14 # 14 #
15 # You should have received a copy of the GNU General Public License 15 # You should have received a copy of the GNU General Public License
16 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. 16 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.
17 17
18 import re, os, sys, codecs, subprocess, tarfile 18 import re, os, sys, codecs, subprocess, tarfile
19 from urlparse import urlparse 19 from urlparse import urlparse
20 from StringIO import StringIO 20 from StringIO import StringIO
21 from ConfigParser import SafeConfigParser 21 from ConfigParser import SafeConfigParser
22 from sitescripts.utils import get_config, cached 22 from sitescripts.utils import get_config, cached
23 23
24 def warn(message): 24 def warn(message):
25 print >> sys.stderr, message 25 print >> sys.stderr, message
26 26
27 class Subscription(object): 27 class Subscription(object):
28 def defineProperty(propName, isSimple = False): 28 def define_property(propName, readonly=False):
29 if isSimple: 29 if readonly:
30 def setProperty(dict, propName, value): 30 return property(lambda self: self._data[propName])
31 dict[propName] = value
32
33 return property(lambda self: self._data[propName], lambda self, value: set Property(self._data, propName, value))
34 else: 31 else:
35 return property(lambda self: self._data[propName]) 32 def set_property(self, value):
36 33 self._data[propName] = value
37 name = defineProperty("name", True) 34
38 type = defineProperty("type", True) 35 return property(lambda self: self._data[propName], set_property)
39 maintainer = defineProperty("maintainer", True) 36
40 email = defineProperty("email", True) 37 name = define_property("name")
41 specialization = defineProperty("specialization", True) 38 type = define_property("type")
42 languages = defineProperty("languages", True) 39 maintainer = define_property("maintainer")
43 recommendation = defineProperty("recommendation") 40 email = define_property("email")
44 deprecated = defineProperty("deprecated") 41 specialization = define_property("specialization")
45 unavailable = defineProperty("unavailable") 42 languages = define_property("languages")
46 catchall = defineProperty("catchall") 43 recommendation = define_property("recommendation", readonly=True)
47 supplements = defineProperty("supplements") 44 deprecated = define_property("deprecated", readonly=True)
48 supplemented = defineProperty("supplemented") 45 unavailable = define_property("unavailable", readonly=True)
49 variants = defineProperty("variants") 46 catchall = define_property("catchall", readonly=True)
50 homepage = defineProperty("homepage", True) 47 supplements = define_property("supplements", readonly=True)
51 contact = defineProperty("contact", True) 48 supplemented = define_property("supplemented", readonly=True)
52 forum = defineProperty("forum", True) 49 variants = define_property("variants", readonly=True)
53 faq = defineProperty("faq", True) 50 homepage = define_property("homepage")
54 blog = defineProperty("blog", True) 51 contact = define_property("contact")
55 changelog = defineProperty("changelog", True) 52 forum = define_property("forum")
56 policy = defineProperty("policy", True) 53 faq = define_property("faq")
57 digest = defineProperty("digest", True) 54 blog = define_property("blog")
58 digestDay = defineProperty("digestDay", True) 55 changelog = define_property("changelog")
59 56 policy = define_property("policy")
60 def __init__(self, filePath, data): 57 digest = define_property("digest")
58 digestDay = define_property("digestDay")
59
60 def __init__(self, path, data):
61 self._data = { 61 self._data = {
62 'name': None, 62 'name': None,
63 'type': 'ads', 63 'type': 'ads',
64 'maintainer': None, 64 'maintainer': None,
65 'email': None, 65 'email': None,
66 'specialization': None, 66 'specialization': None,
67 'languages': None, 67 'languages': None,
68 'deprecated': False, 68 'deprecated': False,
69 'unavailable': False, 69 'unavailable': False,
70 'catchall': False, 70 'catchall': False,
71 'supplements': [], 71 'supplements': [],
72 'supplemented': [], 72 'supplemented': [],
73 'variants': [], 73 'variants': [],
74 'recommendation': None, 74 'recommendation': None,
75 'homepage': None, 75 'homepage': None,
76 'contact': None, 76 'contact': None,
77 'forum': None, 77 'forum': None,
78 'faq': None, 78 'faq': None,
79 'blog': None, 79 'blog': None,
80 'changelog': None, 80 'changelog': None,
81 'policy': None, 81 'policy': None,
82 'digest': 'weekly', 82 'digest': 'weekly',
83 'digestDay': 'wed', 83 'digestDay': 'wed',
84 } 84 }
85 self.parse(filePath, data) 85 self.parse(path, data)
86 86
87 def parse(self, filePath, data): 87 def parse(self, path, data):
88 mandatory = [['email'], ['specialization'], ['homepage', 'contact', 'forum', 'faq', 'blog']] 88 mandatory = [['email'], ['specialization'], ['homepage', 'contact', 'forum', 'faq', 'blog']]
89 weekDays = { 89 weekdays = {
90 'son': 0, 90 'son': 0,
91 'mon': 1, 91 'mon': 1,
92 'tue': 2, 92 'tue': 2,
93 'wed': 3, 93 'wed': 3,
94 'thu': 4, 94 'thu': 4,
95 'fri': 5, 95 'fri': 5,
96 'sat': 6, 96 'sat': 6,
97 } 97 }
98 98
99 self.name = re.sub(r'\.\w+$', r'', os.path.basename(filePath)) 99 self.name = re.sub(r'\.\w+$', r'', os.path.basename(path))
100 100
101 for line in data: 101 for line in data:
102 if not re.search(r'\S', line): 102 if not re.search(r'\S', line):
103 continue 103 continue
104 104
105 parts = line.split('=', 1) 105 parts = line.split('=', 1)
106 key = parts[0].strip() 106 key = parts[0].strip()
107 if len(parts) > 1: 107 if len(parts) > 1:
108 value = parts[1].strip() 108 value = parts[1].strip()
109 else: 109 else:
110 value = '' 110 value = ''
111 111
112 try: 112 try:
113 # Might be a simple attribute - try setting the value 113 # Might be a simple attribute - try setting the value
114 if not hasattr(self, key): 114 if not hasattr(self, key):
115 raise Exception() 115 raise Exception()
116 116
117 oldValue = getattr(self, key) 117 oldvalue = getattr(self, key)
118 setattr(self, key, value) 118 setattr(self, key, value)
119 if value == '': 119 if value == '':
120 warn('Empty value given for attribute %s in %s' % (key, filePath)) 120 warn('Empty value given for attribute %s in %s' % (key, path))
121 if oldValue != None and key != 'name' and key != 'type' and key != 'dige st' and key != 'digestDay': 121 if oldvalue != None and key != 'name' and key != 'type' and key != 'dige st' and key != 'digestDay':
122 warn('Value for attribute %s is duplicated in %s' % (key, filePath)) 122 warn('Value for attribute %s is duplicated in %s' % (key, path))
123 except: 123 except:
124 # Not a simple attribute, needs special handling 124 # Not a simple attribute, needs special handling
125 if key == 'supplements': 125 if key == 'supplements':
126 if value == '': 126 if value == '':
127 warn('Empty value given for attribute %s in %s' % (key, filePath)) 127 warn('Empty value given for attribute %s in %s' % (key, path))
128 self.supplements.append(value) 128 self.supplements.append(value)
129 129
130 elif key == 'list' or key == 'variant': 130 elif key == 'list' or key == 'variant':
131 if value == '': 131 if value == '':
132 warn('Empty value given for attribute %s in %s' % (key, filePath)) 132 warn('Empty value given for attribute %s in %s' % (key, path))
133 keywords = { 133 keywords = {
134 'recommendation': False, 134 'recommendation': False,
135 'catchall': False, 135 'catchall': False,
136 'complete': False, 136 'complete': False,
137 } 137 }
138 regexp = re.compile(r'\s*\[((?:\w+,)*\w+)\]$') 138 regexp = re.compile(r'\s*\[((?:\w+,)*\w+)\]$')
139 match = re.search(regexp, value) 139 match = re.search(regexp, value)
140 if match: 140 if match:
141 value = re.sub(regexp, r'', value) 141 value = re.sub(regexp, r'', value)
142 for keyword in match.group(1).split(','): 142 for keyword in match.group(1).split(','):
143 keyword = keyword.lower() 143 keyword = keyword.lower()
144 if keyword in keywords: 144 if keyword in keywords:
145 keywords[keyword] = True 145 keywords[keyword] = True
146 else: 146 else:
147 warn('Unknown keyword %s given for attribute %s in %s' % (keywor d, key, filePath)) 147 warn('Unknown keyword %s given for attribute %s in %s' % (keywor d, key, path))
148 (name, url) = (self.name, value) 148 (name, url) = (self.name, value)
149 if key == 'variant': 149 if key == 'variant':
150 match = re.search(r'(.+?)\s+(\S+)$', value) 150 match = re.search(r'(.+?)\s+(\S+)$', value)
151 if match: 151 if match:
152 (name, url) = (match.group(1), match.group(2)); 152 (name, url) = (match.group(1), match.group(2));
153 else: 153 else:
154 warn('Invalid variant format in %s, no name given?' % (filePath)) 154 warn('Invalid variant format in %s, no name given?' % (path))
155 if not _validateURL(url): 155 if not _validate_URL(url):
156 warn('Invalid list URL %s given in %s' % (url, filePath)) 156 warn('Invalid list URL %s given in %s' % (url, path))
157 self.variants.append([name, url, keywords['complete']]) 157 self.variants.append([name, url, keywords['complete']])
158 if keywords['recommendation']: 158 if keywords['recommendation']:
159 self._data['recommendation'] = self._data['variants'][-1] 159 self._data['recommendation'] = self._data['variants'][-1]
160 self._data['catchall'] = keywords['catchall'] 160 self._data['catchall'] = keywords['catchall']
161 161
162 elif key == 'deprecated' or key == 'unavailable': 162 elif key == 'deprecated' or key == 'unavailable':
163 self._data[key] = True 163 self._data[key] = True
164 164
165 else: 165 else:
166 warn('Unknown attribute %s in %s' % (key, filePath)) 166 warn('Unknown attribute %s in %s' % (key, path))
167 167
168 if key == 'languages': 168 if key == 'languages':
169 settings = get_settings() 169 settings = get_settings()
170 languageNames = [] 170 languagenames = []
171 for language in value.split(','): 171 for language in value.split(','):
172 if settings.has_option('languages', language): 172 if settings.has_option('languages', language):
173 languageNames.append(settings.get('languages', language)) 173 languagenames.append(settings.get('languages', language))
174 else: 174 else:
175 warn('Unknown language code %s in %s' % (language, filePath)) 175 warn('Unknown language code %s in %s' % (language, path))
176 self._data['languageSpecialization'] = ', '.join(languageNames) 176 self._data['languageSpecialization'] = ', '.join(languagenames)
177 177
178 if 'languageSpecialization' in self._data: 178 if 'languageSpecialization' in self._data:
179 if self.specialization != None: 179 if self.specialization != None:
180 self.specialization += ", " + self._data['languageSpecialization'] 180 self.specialization += ", " + self._data['languageSpecialization']
181 else: 181 else:
182 self.specialization = self._data['languageSpecialization'] 182 self.specialization = self._data['languageSpecialization']
183 del self._data['languageSpecialization'] 183 del self._data['languageSpecialization']
184 184
185 for mandatorySet in mandatory: 185 for group in mandatory:
186 found = False 186 found = False
187 for key in mandatorySet: 187 for key in group:
188 if self._data[key] != None: 188 if self._data[key] != None:
189 found = True 189 found = True
190 if not found: 190 if not found:
191 str = ", ".join(mandatorySet) 191 str = ", ".join(group)
192 warn('None of the attributes %s present in %s' % (str, filePath)) 192 warn('None of the attributes %s present in %s' % (str, path))
193 193
194 if len(self.variants) == 0: 194 if len(self.variants) == 0:
195 warn('No list locations given in %s' % (filePath)) 195 warn('No list locations given in %s' % (path))
196 if self.type != 'ads' and self.type != 'other': 196 if self.type != 'ads' and self.type != 'other':
197 warn('Unknown type given in %s' % (filePath)) 197 warn('Unknown type given in %s' % (path))
198 if self.digest != 'daily' and self.digest != 'weekly': 198 if self.digest != 'daily' and self.digest != 'weekly':
199 warn('Unknown digest frequency given in %s' % (filePath)) 199 warn('Unknown digest frequency given in %s' % (path))
200 if not self.digestDay[0:3].lower() in weekDays: 200 if not self.digestDay[0:3].lower() in weekdays:
201 warn('Unknown digest day given in %s' % (filePath)) 201 warn('Unknown digest day given in %s' % (path))
202 self.digestDay = 'wed' 202 self.digestDay = 'wed'
203 self.digestDay = weekDays[self.digestDay[0:3].lower()] 203 self.digestDay = weekdays[self.digestDay[0:3].lower()]
204 if self.recommendation != None and (self.languages == None or not re.search( r'\S', self.languages)): 204 if self.recommendation != None and (self.languages == None or not re.search( r'\S', self.languages)):
205 warn('Recommendation without languages in %s' % (filePath)) 205 warn('Recommendation without languages in %s' % (path))
206 if len(self.supplements) == 0: 206 if len(self.supplements) == 0:
207 for [name, url, complete] in self.variants: 207 for [name, url, complete] in self.variants:
208 if complete: 208 if complete:
209 warn('Variant marked as complete for non-supplemental subscription in %s' % (filePath)) 209 warn('Variant marked as complete for non-supplemental subscription in %s' % (path))
210 break 210 break
211 211
212 self.variants.sort(key=lambda variant: (self.recommendation == variant) * 2 + variant[2], reverse=True) 212 self.variants.sort(key=lambda variant: (self.recommendation == variant) * 2 + variant[2], reverse=True)
213 213
214 def parseFile(filePath, data): 214 def parse_file(path, data):
215 return Subscription(filePath, data) 215 return Subscription(path, data)
216 216
217 def calculateSupplemented(lists): 217 def calculate_supplemented(lists):
218 for fileData in lists.itervalues(): 218 for filedata in lists.itervalues():
219 for supplements in fileData.supplements: 219 for supplements in filedata.supplements:
220 if supplements in lists: 220 if supplements in lists:
221 lists[supplements].supplemented.append(fileData) 221 lists[supplements].supplemented.append(filedata)
222 else: 222 else:
223 warn('Subscription %s supplements an unknown subscription %s' % (fileDat a.name, supplements)) 223 warn('Subscription %s supplements an unknown subscription %s' % (filedat a.name, supplements))
224 224
225 @cached(60) 225 @cached(60)
226 def get_settings(): 226 def get_settings():
227 repo = os.path.abspath(get_config().get('subscriptions', 'repository')) 227 repo = os.path.abspath(get_config().get('subscriptions', 'repository'))
228 settingsData = subprocess.check_output(['hg', '-R', repo, 'cat', '-r', 'defaul t', os.path.join(repo, 'settings')]) 228 settingsdata = subprocess.check_output(['hg', '-R', repo, 'cat', '-r', 'defaul t', os.path.join(repo, 'settings')])
229 settings = SafeConfigParser() 229 settings = SafeConfigParser()
230 settings.readfp(codecs.getreader('utf8')(StringIO(settingsData))) 230 settings.readfp(codecs.getreader('utf8')(StringIO(settingsdata)))
231 return settings 231 return settings
232 232
233 def readSubscriptions(): 233 def readSubscriptions():
234 repo = os.path.abspath(get_config().get('subscriptions', 'repository')) 234 repo = os.path.abspath(get_config().get('subscriptions', 'repository'))
235 data = subprocess.check_output(['hg', 'archive', '-R', repo, '-r', 'default', '-t', 'tar', '-I', os.path.join(repo, '*.subscription'), '-']) 235 data = subprocess.check_output(['hg', 'archive', '-R', repo, '-r', 'default', '-t', 'tar', '-I', os.path.join(repo, '*.subscription'), '-'])
236 236
237 result = {} 237 result = {}
238 tarFile = tarfile.open(mode='r:', fileobj=StringIO(data)) 238 with tarfile.open(mode='r:', fileobj=StringIO(data)) as archive:
239 for fileInfo in tarFile: 239 for fileinfo in archive:
240 fileData = parseFile(fileInfo.name, codecs.getreader('utf8')(tarFile.extract file(fileInfo))) 240 filedata = parse_file(fileinfo.name, codecs.getreader('utf8')(archive.extr actfile(fileinfo)))
241 if fileData.unavailable: 241 if filedata.unavailable:
242 continue 242 continue
243 243
244 if fileData.name in result: 244 if filedata.name in result:
245 warn('Name %s is claimed by multiple files' % (fileData.name)) 245 warn('Name %s is claimed by multiple files' % (filedata.name))
246 result[fileData.name] = fileData 246 result[filedata.name] = filedata
247 tarFile.close() 247
248 248 calculate_supplemented(result)
249 calculateSupplemented(result)
250 return result 249 return result
251 250
252 def getFallbackData(): 251 def getFallbackData():
253 repo = os.path.abspath(get_config().get('subscriptions', 'repository')) 252 repo = os.path.abspath(get_config().get('subscriptions', 'repository'))
254 redirectData = subprocess.check_output(['hg', '-R', repo, 'cat', '-r', 'defaul t', os.path.join(repo, 'redirects')]) 253 redirectdata = subprocess.check_output(['hg', '-R', repo, 'cat', '-r', 'defaul t', os.path.join(repo, 'redirects')])
255 goneData = subprocess.check_output(['hg', '-R', repo, 'cat', '-r', 'default', os.path.join(repo, 'gone')]) 254 gonedata = subprocess.check_output(['hg', '-R', repo, 'cat', '-r', 'default', os.path.join(repo, 'gone')])
256 return (redirectData, goneData) 255 return (redirectdata, gonedata)
257 256
258 def _validateURL(url): 257 def _validate_URL(url):
259 parseResult = urlparse(url) 258 parse_result = urlparse(url)
260 return (parseResult.scheme == 'http' or parseResult.scheme == 'https') and par seResult.netloc != '' 259 return parse_result.scheme in ('http', 'https') and parse_result.netloc != ''
LEFTRIGHT

Powered by Google App Engine
This is Rietveld