LEFT | RIGHT |
1 # coding: utf-8 | 1 # coding: utf-8 |
2 | 2 |
3 # This file is part of the Adblock Plus web scripts, | 3 # This file is part of the Adblock Plus web scripts, |
4 # Copyright (C) 2006-2013 Eyeo GmbH | 4 # Copyright (C) 2006-2013 Eyeo GmbH |
5 # | 5 # |
6 # Adblock Plus is free software: you can redistribute it and/or modify | 6 # Adblock Plus is free software: you can redistribute it and/or modify |
7 # it under the terms of the GNU General Public License version 3 as | 7 # it under the terms of the GNU General Public License version 3 as |
8 # published by the Free Software Foundation. | 8 # published by the Free Software Foundation. |
9 # | 9 # |
10 # Adblock Plus is distributed in the hope that it will be useful, | 10 # Adblock Plus is distributed in the hope that it will be useful, |
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of | 11 # but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | 12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
13 # GNU General Public License for more details. | 13 # GNU General Public License for more details. |
14 # | 14 # |
15 # You should have received a copy of the GNU General Public License | 15 # You should have received a copy of the GNU General Public License |
16 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. | 16 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. |
17 | 17 |
18 import re, os, sys, codecs, subprocess, tarfile | 18 import re, os, sys, codecs, subprocess, tarfile |
19 from urlparse import urlparse | 19 from urlparse import urlparse |
20 from StringIO import StringIO | 20 from StringIO import StringIO |
21 from ConfigParser import SafeConfigParser | 21 from ConfigParser import SafeConfigParser |
22 from sitescripts.utils import get_config, cached | 22 from sitescripts.utils import get_config, cached |
23 | 23 |
24 def warn(message): | 24 def warn(message): |
25 print >> sys.stderr, message | 25 print >> sys.stderr, message |
26 | 26 |
27 class Subscription(object): | 27 class Subscription(object): |
28 def defineProperty(propName, isSimple = False): | 28 def define_property(propName, readonly=False): |
29 if isSimple: | 29 if readonly: |
30 def setProperty(dict, propName, value): | 30 return property(lambda self: self._data[propName]) |
31 dict[propName] = value | |
32 | |
33 return property(lambda self: self._data[propName], lambda self, value: set
Property(self._data, propName, value)) | |
34 else: | 31 else: |
35 return property(lambda self: self._data[propName]) | 32 def set_property(self, value): |
36 | 33 self._data[propName] = value |
37 name = defineProperty("name", True) | 34 |
38 type = defineProperty("type", True) | 35 return property(lambda self: self._data[propName], set_property) |
39 maintainer = defineProperty("maintainer", True) | 36 |
40 email = defineProperty("email", True) | 37 name = define_property("name") |
41 specialization = defineProperty("specialization", True) | 38 type = define_property("type") |
42 languages = defineProperty("languages", True) | 39 maintainer = define_property("maintainer") |
43 recommendation = defineProperty("recommendation") | 40 email = define_property("email") |
44 deprecated = defineProperty("deprecated") | 41 specialization = define_property("specialization") |
45 unavailable = defineProperty("unavailable") | 42 languages = define_property("languages") |
46 catchall = defineProperty("catchall") | 43 recommendation = define_property("recommendation", readonly=True) |
47 supplements = defineProperty("supplements") | 44 deprecated = define_property("deprecated", readonly=True) |
48 supplemented = defineProperty("supplemented") | 45 unavailable = define_property("unavailable", readonly=True) |
49 variants = defineProperty("variants") | 46 catchall = define_property("catchall", readonly=True) |
50 homepage = defineProperty("homepage", True) | 47 supplements = define_property("supplements", readonly=True) |
51 contact = defineProperty("contact", True) | 48 supplemented = define_property("supplemented", readonly=True) |
52 forum = defineProperty("forum", True) | 49 variants = define_property("variants", readonly=True) |
53 faq = defineProperty("faq", True) | 50 homepage = define_property("homepage") |
54 blog = defineProperty("blog", True) | 51 contact = define_property("contact") |
55 changelog = defineProperty("changelog", True) | 52 forum = define_property("forum") |
56 policy = defineProperty("policy", True) | 53 faq = define_property("faq") |
57 digest = defineProperty("digest", True) | 54 blog = define_property("blog") |
58 digestDay = defineProperty("digestDay", True) | 55 changelog = define_property("changelog") |
59 | 56 policy = define_property("policy") |
60 def __init__(self, filePath, data): | 57 digest = define_property("digest") |
| 58 digestDay = define_property("digestDay") |
| 59 |
| 60 def __init__(self, path, data): |
61 self._data = { | 61 self._data = { |
62 'name': None, | 62 'name': None, |
63 'type': 'ads', | 63 'type': 'ads', |
64 'maintainer': None, | 64 'maintainer': None, |
65 'email': None, | 65 'email': None, |
66 'specialization': None, | 66 'specialization': None, |
67 'languages': None, | 67 'languages': None, |
68 'deprecated': False, | 68 'deprecated': False, |
69 'unavailable': False, | 69 'unavailable': False, |
70 'catchall': False, | 70 'catchall': False, |
71 'supplements': [], | 71 'supplements': [], |
72 'supplemented': [], | 72 'supplemented': [], |
73 'variants': [], | 73 'variants': [], |
74 'recommendation': None, | 74 'recommendation': None, |
75 'homepage': None, | 75 'homepage': None, |
76 'contact': None, | 76 'contact': None, |
77 'forum': None, | 77 'forum': None, |
78 'faq': None, | 78 'faq': None, |
79 'blog': None, | 79 'blog': None, |
80 'changelog': None, | 80 'changelog': None, |
81 'policy': None, | 81 'policy': None, |
82 'digest': 'weekly', | 82 'digest': 'weekly', |
83 'digestDay': 'wed', | 83 'digestDay': 'wed', |
84 } | 84 } |
85 self.parse(filePath, data) | 85 self.parse(path, data) |
86 | 86 |
87 def parse(self, filePath, data): | 87 def parse(self, path, data): |
88 mandatory = [['email'], ['specialization'], ['homepage', 'contact', 'forum',
'faq', 'blog']] | 88 mandatory = [['email'], ['specialization'], ['homepage', 'contact', 'forum',
'faq', 'blog']] |
89 weekDays = { | 89 weekdays = { |
90 'son': 0, | 90 'son': 0, |
91 'mon': 1, | 91 'mon': 1, |
92 'tue': 2, | 92 'tue': 2, |
93 'wed': 3, | 93 'wed': 3, |
94 'thu': 4, | 94 'thu': 4, |
95 'fri': 5, | 95 'fri': 5, |
96 'sat': 6, | 96 'sat': 6, |
97 } | 97 } |
98 | 98 |
99 self.name = re.sub(r'\.\w+$', r'', os.path.basename(filePath)) | 99 self.name = re.sub(r'\.\w+$', r'', os.path.basename(path)) |
100 | 100 |
101 for line in data: | 101 for line in data: |
102 if not re.search(r'\S', line): | 102 if not re.search(r'\S', line): |
103 continue | 103 continue |
104 | 104 |
105 parts = line.split('=', 1) | 105 parts = line.split('=', 1) |
106 key = parts[0].strip() | 106 key = parts[0].strip() |
107 if len(parts) > 1: | 107 if len(parts) > 1: |
108 value = parts[1].strip() | 108 value = parts[1].strip() |
109 else: | 109 else: |
110 value = '' | 110 value = '' |
111 | 111 |
112 try: | 112 try: |
113 # Might be a simple attribute - try setting the value | 113 # Might be a simple attribute - try setting the value |
114 if not hasattr(self, key): | 114 if not hasattr(self, key): |
115 raise Exception() | 115 raise Exception() |
116 | 116 |
117 oldValue = getattr(self, key) | 117 oldvalue = getattr(self, key) |
118 setattr(self, key, value) | 118 setattr(self, key, value) |
119 if value == '': | 119 if value == '': |
120 warn('Empty value given for attribute %s in %s' % (key, filePath)) | 120 warn('Empty value given for attribute %s in %s' % (key, path)) |
121 if oldValue != None and key != 'name' and key != 'type' and key != 'dige
st' and key != 'digestDay': | 121 if oldvalue != None and key != 'name' and key != 'type' and key != 'dige
st' and key != 'digestDay': |
122 warn('Value for attribute %s is duplicated in %s' % (key, filePath)) | 122 warn('Value for attribute %s is duplicated in %s' % (key, path)) |
123 except: | 123 except: |
124 # Not a simple attribute, needs special handling | 124 # Not a simple attribute, needs special handling |
125 if key == 'supplements': | 125 if key == 'supplements': |
126 if value == '': | 126 if value == '': |
127 warn('Empty value given for attribute %s in %s' % (key, filePath)) | 127 warn('Empty value given for attribute %s in %s' % (key, path)) |
128 self.supplements.append(value) | 128 self.supplements.append(value) |
129 | 129 |
130 elif key == 'list' or key == 'variant': | 130 elif key == 'list' or key == 'variant': |
131 if value == '': | 131 if value == '': |
132 warn('Empty value given for attribute %s in %s' % (key, filePath)) | 132 warn('Empty value given for attribute %s in %s' % (key, path)) |
133 keywords = { | 133 keywords = { |
134 'recommendation': False, | 134 'recommendation': False, |
135 'catchall': False, | 135 'catchall': False, |
136 'complete': False, | 136 'complete': False, |
137 } | 137 } |
138 regexp = re.compile(r'\s*\[((?:\w+,)*\w+)\]$') | 138 regexp = re.compile(r'\s*\[((?:\w+,)*\w+)\]$') |
139 match = re.search(regexp, value) | 139 match = re.search(regexp, value) |
140 if match: | 140 if match: |
141 value = re.sub(regexp, r'', value) | 141 value = re.sub(regexp, r'', value) |
142 for keyword in match.group(1).split(','): | 142 for keyword in match.group(1).split(','): |
143 keyword = keyword.lower() | 143 keyword = keyword.lower() |
144 if keyword in keywords: | 144 if keyword in keywords: |
145 keywords[keyword] = True | 145 keywords[keyword] = True |
146 else: | 146 else: |
147 warn('Unknown keyword %s given for attribute %s in %s' % (keywor
d, key, filePath)) | 147 warn('Unknown keyword %s given for attribute %s in %s' % (keywor
d, key, path)) |
148 (name, url) = (self.name, value) | 148 (name, url) = (self.name, value) |
149 if key == 'variant': | 149 if key == 'variant': |
150 match = re.search(r'(.+?)\s+(\S+)$', value) | 150 match = re.search(r'(.+?)\s+(\S+)$', value) |
151 if match: | 151 if match: |
152 (name, url) = (match.group(1), match.group(2)); | 152 (name, url) = (match.group(1), match.group(2)); |
153 else: | 153 else: |
154 warn('Invalid variant format in %s, no name given?' % (filePath)) | 154 warn('Invalid variant format in %s, no name given?' % (path)) |
155 if not _validateURL(url): | 155 if not _validate_URL(url): |
156 warn('Invalid list URL %s given in %s' % (url, filePath)) | 156 warn('Invalid list URL %s given in %s' % (url, path)) |
157 self.variants.append([name, url, keywords['complete']]) | 157 self.variants.append([name, url, keywords['complete']]) |
158 if keywords['recommendation']: | 158 if keywords['recommendation']: |
159 self._data['recommendation'] = self._data['variants'][-1] | 159 self._data['recommendation'] = self._data['variants'][-1] |
160 self._data['catchall'] = keywords['catchall'] | 160 self._data['catchall'] = keywords['catchall'] |
161 | 161 |
162 elif key == 'deprecated' or key == 'unavailable': | 162 elif key == 'deprecated' or key == 'unavailable': |
163 self._data[key] = True | 163 self._data[key] = True |
164 | 164 |
165 else: | 165 else: |
166 warn('Unknown attribute %s in %s' % (key, filePath)) | 166 warn('Unknown attribute %s in %s' % (key, path)) |
167 | 167 |
168 if key == 'languages': | 168 if key == 'languages': |
169 settings = get_settings() | 169 settings = get_settings() |
170 languageNames = [] | 170 languagenames = [] |
171 for language in value.split(','): | 171 for language in value.split(','): |
172 if settings.has_option('languages', language): | 172 if settings.has_option('languages', language): |
173 languageNames.append(settings.get('languages', language)) | 173 languagenames.append(settings.get('languages', language)) |
174 else: | 174 else: |
175 warn('Unknown language code %s in %s' % (language, filePath)) | 175 warn('Unknown language code %s in %s' % (language, path)) |
176 self._data['languageSpecialization'] = ', '.join(languageNames) | 176 self._data['languageSpecialization'] = ', '.join(languagenames) |
177 | 177 |
178 if 'languageSpecialization' in self._data: | 178 if 'languageSpecialization' in self._data: |
179 if self.specialization != None: | 179 if self.specialization != None: |
180 self.specialization += ", " + self._data['languageSpecialization'] | 180 self.specialization += ", " + self._data['languageSpecialization'] |
181 else: | 181 else: |
182 self.specialization = self._data['languageSpecialization'] | 182 self.specialization = self._data['languageSpecialization'] |
183 del self._data['languageSpecialization'] | 183 del self._data['languageSpecialization'] |
184 | 184 |
185 for mandatorySet in mandatory: | 185 for group in mandatory: |
186 found = False | 186 found = False |
187 for key in mandatorySet: | 187 for key in group: |
188 if self._data[key] != None: | 188 if self._data[key] != None: |
189 found = True | 189 found = True |
190 if not found: | 190 if not found: |
191 str = ", ".join(mandatorySet) | 191 str = ", ".join(group) |
192 warn('None of the attributes %s present in %s' % (str, filePath)) | 192 warn('None of the attributes %s present in %s' % (str, path)) |
193 | 193 |
194 if len(self.variants) == 0: | 194 if len(self.variants) == 0: |
195 warn('No list locations given in %s' % (filePath)) | 195 warn('No list locations given in %s' % (path)) |
196 if self.type != 'ads' and self.type != 'other': | 196 if self.type != 'ads' and self.type != 'other': |
197 warn('Unknown type given in %s' % (filePath)) | 197 warn('Unknown type given in %s' % (path)) |
198 if self.digest != 'daily' and self.digest != 'weekly': | 198 if self.digest != 'daily' and self.digest != 'weekly': |
199 warn('Unknown digest frequency given in %s' % (filePath)) | 199 warn('Unknown digest frequency given in %s' % (path)) |
200 if not self.digestDay[0:3].lower() in weekDays: | 200 if not self.digestDay[0:3].lower() in weekdays: |
201 warn('Unknown digest day given in %s' % (filePath)) | 201 warn('Unknown digest day given in %s' % (path)) |
202 self.digestDay = 'wed' | 202 self.digestDay = 'wed' |
203 self.digestDay = weekDays[self.digestDay[0:3].lower()] | 203 self.digestDay = weekdays[self.digestDay[0:3].lower()] |
204 if self.recommendation != None and (self.languages == None or not re.search(
r'\S', self.languages)): | 204 if self.recommendation != None and (self.languages == None or not re.search(
r'\S', self.languages)): |
205 warn('Recommendation without languages in %s' % (filePath)) | 205 warn('Recommendation without languages in %s' % (path)) |
206 if len(self.supplements) == 0: | 206 if len(self.supplements) == 0: |
207 for [name, url, complete] in self.variants: | 207 for [name, url, complete] in self.variants: |
208 if complete: | 208 if complete: |
209 warn('Variant marked as complete for non-supplemental subscription in
%s' % (filePath)) | 209 warn('Variant marked as complete for non-supplemental subscription in
%s' % (path)) |
210 break | 210 break |
211 | 211 |
212 self.variants.sort(key=lambda variant: (self.recommendation == variant) * 2
+ variant[2], reverse=True) | 212 self.variants.sort(key=lambda variant: (self.recommendation == variant) * 2
+ variant[2], reverse=True) |
213 | 213 |
214 def parseFile(filePath, data): | 214 def parse_file(path, data): |
215 return Subscription(filePath, data) | 215 return Subscription(path, data) |
216 | 216 |
217 def calculateSupplemented(lists): | 217 def calculate_supplemented(lists): |
218 for fileData in lists.itervalues(): | 218 for filedata in lists.itervalues(): |
219 for supplements in fileData.supplements: | 219 for supplements in filedata.supplements: |
220 if supplements in lists: | 220 if supplements in lists: |
221 lists[supplements].supplemented.append(fileData) | 221 lists[supplements].supplemented.append(filedata) |
222 else: | 222 else: |
223 warn('Subscription %s supplements an unknown subscription %s' % (fileDat
a.name, supplements)) | 223 warn('Subscription %s supplements an unknown subscription %s' % (filedat
a.name, supplements)) |
224 | 224 |
225 @cached(60) | 225 @cached(60) |
226 def get_settings(): | 226 def get_settings(): |
227 repo = os.path.abspath(get_config().get('subscriptions', 'repository')) | 227 repo = os.path.abspath(get_config().get('subscriptions', 'repository')) |
228 settingsData = subprocess.check_output(['hg', '-R', repo, 'cat', '-r', 'defaul
t', os.path.join(repo, 'settings')]) | 228 settingsdata = subprocess.check_output(['hg', '-R', repo, 'cat', '-r', 'defaul
t', os.path.join(repo, 'settings')]) |
229 settings = SafeConfigParser() | 229 settings = SafeConfigParser() |
230 settings.readfp(codecs.getreader('utf8')(StringIO(settingsData))) | 230 settings.readfp(codecs.getreader('utf8')(StringIO(settingsdata))) |
231 return settings | 231 return settings |
232 | 232 |
233 def readSubscriptions(): | 233 def readSubscriptions(): |
234 repo = os.path.abspath(get_config().get('subscriptions', 'repository')) | 234 repo = os.path.abspath(get_config().get('subscriptions', 'repository')) |
235 data = subprocess.check_output(['hg', 'archive', '-R', repo, '-r', 'default',
'-t', 'tar', '-I', os.path.join(repo, '*.subscription'), '-']) | 235 data = subprocess.check_output(['hg', 'archive', '-R', repo, '-r', 'default',
'-t', 'tar', '-I', os.path.join(repo, '*.subscription'), '-']) |
236 | 236 |
237 result = {} | 237 result = {} |
238 tarFile = tarfile.open(mode='r:', fileobj=StringIO(data)) | 238 with tarfile.open(mode='r:', fileobj=StringIO(data)) as archive: |
239 for fileInfo in tarFile: | 239 for fileinfo in archive: |
240 fileData = parseFile(fileInfo.name, codecs.getreader('utf8')(tarFile.extract
file(fileInfo))) | 240 filedata = parse_file(fileinfo.name, codecs.getreader('utf8')(archive.extr
actfile(fileinfo))) |
241 if fileData.unavailable: | 241 if filedata.unavailable: |
242 continue | 242 continue |
243 | 243 |
244 if fileData.name in result: | 244 if filedata.name in result: |
245 warn('Name %s is claimed by multiple files' % (fileData.name)) | 245 warn('Name %s is claimed by multiple files' % (filedata.name)) |
246 result[fileData.name] = fileData | 246 result[filedata.name] = filedata |
247 tarFile.close() | 247 |
248 | 248 calculate_supplemented(result) |
249 calculateSupplemented(result) | |
250 return result | 249 return result |
251 | 250 |
252 def getFallbackData(): | 251 def getFallbackData(): |
253 repo = os.path.abspath(get_config().get('subscriptions', 'repository')) | 252 repo = os.path.abspath(get_config().get('subscriptions', 'repository')) |
254 redirectData = subprocess.check_output(['hg', '-R', repo, 'cat', '-r', 'defaul
t', os.path.join(repo, 'redirects')]) | 253 redirectdata = subprocess.check_output(['hg', '-R', repo, 'cat', '-r', 'defaul
t', os.path.join(repo, 'redirects')]) |
255 goneData = subprocess.check_output(['hg', '-R', repo, 'cat', '-r', 'default',
os.path.join(repo, 'gone')]) | 254 gonedata = subprocess.check_output(['hg', '-R', repo, 'cat', '-r', 'default',
os.path.join(repo, 'gone')]) |
256 return (redirectData, goneData) | 255 return (redirectdata, gonedata) |
257 | 256 |
258 def _validateURL(url): | 257 def _validate_URL(url): |
259 parseResult = urlparse(url) | 258 parse_result = urlparse(url) |
260 return (parseResult.scheme == 'http' or parseResult.scheme == 'https') and par
seResult.netloc != '' | 259 return parse_result.scheme in ('http', 'https') and parse_result.netloc != '' |
LEFT | RIGHT |