OLD | NEW |
1 # This file is part of the Adblock Plus web scripts, | 1 # This file is part of the Adblock Plus web scripts, |
2 # Copyright (C) 2006-present eyeo GmbH | 2 # Copyright (C) 2006-present eyeo GmbH |
3 # | 3 # |
4 # Adblock Plus is free software: you can redistribute it and/or modify | 4 # Adblock Plus is free software: you can redistribute it and/or modify |
5 # it under the terms of the GNU General Public License version 3 as | 5 # it under the terms of the GNU General Public License version 3 as |
6 # published by the Free Software Foundation. | 6 # published by the Free Software Foundation. |
7 # | 7 # |
8 # Adblock Plus is distributed in the hope that it will be useful, | 8 # Adblock Plus is distributed in the hope that it will be useful, |
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of | 9 # but WITHOUT ANY WARRANTY; without even the implied warranty of |
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | 10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
(...skipping 168 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
179 ruleset.checkValidity() | 179 ruleset.checkValidity() |
180 rulesets.append(ruleset) | 180 rulesets.append(ruleset) |
181 ruleset = Ruleset(line[1:len(line) - 1]) | 181 ruleset = Ruleset(line[1:len(line) - 1]) |
182 else: | 182 else: |
183 if ruleset == None: | 183 if ruleset == None: |
184 print >>sys.stderr, 'Found line %s before start of a ruleset' %
line | 184 print >>sys.stderr, 'Found line %s before start of a ruleset' %
line |
185 continue | 185 continue |
186 if line.find('=') < 0: | 186 if line.find('=') < 0: |
187 print >>sys.stderr, 'Unrecognized line %s' % line | 187 print >>sys.stderr, 'Unrecognized line %s' % line |
188 continue | 188 continue |
189 (key, value) = line.split('=', 1) | 189 key, value = line.split('=', 1) |
190 key = key.rstrip() | 190 key = key.rstrip() |
191 value = value.lstrip() | 191 value = value.lstrip() |
192 if key == 'url': | 192 if key == 'url': |
193 ruleset.url = value | 193 ruleset.url = value |
194 else: | 194 else: |
195 ruleset.addRule(rules, key, value) | 195 ruleset.addRule(rules, key, value) |
196 return (rules, rulesets) | 196 return (rules, rulesets) |
197 | 197 |
198 | 198 |
199 def findMatches(it, lang): | 199 def findMatches(it, lang): |
200 global supportedKeys | 200 global supportedKeys |
201 | 201 |
202 (rules, rulesets) = getRules() | 202 rules, rulesets = getRules() |
203 | 203 |
204 for line in it: | 204 for line in it: |
205 match = re.search(r'<([\w\-]+)\s*(.*?)\s*/?>([^<>]*)', line) | 205 match = re.search(r'<([\w\-]+)\s*(.*?)\s*/?>([^<>]*)', line) |
206 if not match: | 206 if not match: |
207 continue | 207 continue |
208 | 208 |
209 tag = match.group(1) | 209 tag = match.group(1) |
210 attrText = match.group(2) | 210 attrText = match.group(2) |
211 text = match.group(3).strip() | 211 text = match.group(3).strip() |
212 | 212 |
213 attrs = {} | 213 attrs = {} |
214 for match in re.finditer(r'(\w+)="([^"]*)"', attrText): | 214 for match in re.finditer(r'(\w+)="([^"]*)"', attrText): |
215 attrs[match.group(1)] = match.group(2).strip().replace('<', '<').
replace('>', '>').replace('"', '"').replace('&', '&') | 215 attrs[match.group(1)] = match.group(2).strip().replace('<', '<').
replace('>', '>').replace('"', '"').replace('&', '&') |
216 | 216 |
217 for key, t in supportedKeys.iteritems(): | 217 for key, t in supportedKeys.iteritems(): |
218 if len(t) == 3: | 218 if len(t) == 3: |
219 (requiredTag, requiredAttrs, requiredValue) = t | 219 requiredTag, requiredAttrs, requiredValue = t |
220 else: | 220 else: |
221 (requiredTag, requiredAttrs) = t | 221 requiredTag, requiredAttrs = t |
222 requiredValue = None | 222 requiredValue = None |
223 requiredAttrs = requiredAttrs.split(' ') | 223 requiredAttrs = requiredAttrs.split(' ') |
224 if requiredTag != tag: | 224 if requiredTag != tag: |
225 continue | 225 continue |
226 | 226 |
227 foundAttrs = [] | 227 foundAttrs = [] |
228 for attr in requiredAttrs: | 228 for attr in requiredAttrs: |
229 if attr in attrs: | 229 if attr in attrs: |
230 foundAttrs.append(attrs[attr]) | 230 foundAttrs.append(attrs[attr]) |
231 if len(foundAttrs) != len(requiredAttrs): | 231 if len(foundAttrs) != len(requiredAttrs): |
232 continue | 232 continue |
233 | 233 |
234 value = ' '.join(foundAttrs) | 234 value = ' '.join(foundAttrs) |
235 if requiredValue != None: | 235 if requiredValue != None: |
236 if requiredValue != value: | 236 if requiredValue != value: |
237 continue | 237 continue |
238 value = text | 238 value = text |
239 | 239 |
240 checkMatch(rules, key, value) | 240 checkMatch(rules, key, value) |
241 return extractMatches(rules, rulesets, lang) | 241 return extractMatches(rules, rulesets, lang) |
OLD | NEW |