| OLD | NEW | 
|---|
| 1 # This file is part of Adblock Plus <https://adblockplus.org/>, | 1 # This file is part of Adblock Plus <https://adblockplus.org/>, | 
| 2 # Copyright (C) 2006-present eyeo GmbH | 2 # Copyright (C) 2006-present eyeo GmbH | 
| 3 # | 3 # | 
| 4 # Adblock Plus is free software: you can redistribute it and/or modify | 4 # Adblock Plus is free software: you can redistribute it and/or modify | 
| 5 # it under the terms of the GNU General Public License version 3 as | 5 # it under the terms of the GNU General Public License version 3 as | 
| 6 # published by the Free Software Foundation. | 6 # published by the Free Software Foundation. | 
| 7 # | 7 # | 
| 8 # Adblock Plus is distributed in the hope that it will be useful, | 8 # Adblock Plus is distributed in the hope that it will be useful, | 
| 9 # but WITHOUT ANY WARRANTY; without even the implied warranty of | 9 # but WITHOUT ANY WARRANTY; without even the implied warranty of | 
| 10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | 10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | 
| (...skipping 122 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 133 | 133 | 
| 134 | 134 | 
| 135 Header = _line_type('Header', 'version', '[{.version}]') | 135 Header = _line_type('Header', 'version', '[{.version}]') | 
| 136 EmptyLine = _line_type('EmptyLine', '', '') | 136 EmptyLine = _line_type('EmptyLine', '', '') | 
| 137 Comment = _line_type('Comment', 'text', '! {.text}') | 137 Comment = _line_type('Comment', 'text', '! {.text}') | 
| 138 Metadata = _line_type('Metadata', 'key value', '! {0.key}: {0.value}') | 138 Metadata = _line_type('Metadata', 'key value', '! {0.key}: {0.value}') | 
| 139 Filter = _line_type('Filter', 'text selector action options', '{.text}') | 139 Filter = _line_type('Filter', 'text selector action options', '{.text}') | 
| 140 Include = _line_type('Include', 'target', '%include {0.target}%') | 140 Include = _line_type('Include', 'target', '%include {0.target}%') | 
| 141 | 141 | 
| 142 | 142 | 
| 143 METADATA_REGEXP = re.compile(r'!\s*([\w-]+)\s*:(?!//)\s*(.*)') | 143 METADATA_REGEXP = re.compile(r'([\w-]+)\s*:\s*(.*)') | 
| 144 INCLUDE_REGEXP = re.compile(r'%include\s+(.+)%') | 144 INCLUDE_REGEXP = re.compile(r'%include\s+(.+)%') | 
| 145 HEADER_REGEXP = re.compile(r'\[(Adblock(?:\s*Plus\s*[\d\.]+?)?)\]', flags=re.I) | 145 HEADER_REGEXP = re.compile(r'\[(Adblock(?:\s*Plus\s*[\d\.]+?)?)\]', flags=re.I) | 
| 146 HIDING_FILTER_REGEXP = re.compile(r'^([^/*|@"!]*?)#([@?])?#(.+)$') | 146 HIDING_FILTER_REGEXP = re.compile(r'^([^/*|@"!]*?)#([@?])?#(.+)$') | 
| 147 FILTER_OPTIONS_REGEXP = re.compile( | 147 FILTER_OPTIONS_REGEXP = re.compile( | 
| 148     r'\$(~?[\w-]+(?:=[^,]+)?(?:,~?[\w-]+(?:=[^,]+)?)*)$' | 148     r'\$(~?[\w-]+(?:=[^,]+)?(?:,~?[\w-]+(?:=[^,]+)?)*)$' | 
| 149 ) | 149 ) | 
| 150 | 150 | 
| 151 | 151 | 
| 152 def _parse_comment(text): |  | 
| 153     match = METADATA_REGEXP.match(text) |  | 
| 154     if match: |  | 
| 155         return Metadata(match.group(1), match.group(2)) |  | 
| 156     return Comment(text[1:].strip()) |  | 
| 157 |  | 
| 158 |  | 
| 159 def _parse_header(text): | 152 def _parse_header(text): | 
| 160     match = HEADER_REGEXP.match(text) | 153     match = HEADER_REGEXP.match(text) | 
| 161     if not match: | 154     if not match: | 
| 162         raise ParseError('Malformed header', text) | 155         raise ParseError('Malformed header', text) | 
| 163     return Header(match.group(1)) | 156     return Header(match.group(1)) | 
| 164 | 157 | 
| 165 | 158 | 
| 166 def _parse_instruction(text): | 159 def _parse_instruction(text): | 
| 167     match = INCLUDE_REGEXP.match(text) | 160     match = INCLUDE_REGEXP.match(text) | 
| 168     if not match: | 161     if not match: | 
| (...skipping 85 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 254     if '#' in text: | 247     if '#' in text: | 
| 255         match = HIDING_FILTER_REGEXP.search(text) | 248         match = HIDING_FILTER_REGEXP.search(text) | 
| 256         if match: | 249         if match: | 
| 257             return _parse_hiding_filter(text, *match.groups()) | 250             return _parse_hiding_filter(text, *match.groups()) | 
| 258     return _parse_blocking_filter(text) | 251     return _parse_blocking_filter(text) | 
| 259 | 252 | 
| 260 | 253 | 
| 261 def parse_line(line_text): | 254 def parse_line(line_text): | 
| 262     """Parse one line of a filter list. | 255     """Parse one line of a filter list. | 
| 263 | 256 | 
|  | 257     Note that parse_line() doesn't handle special comments, hence never returns | 
|  | 258     a Metadata() object, Adblock Plus only considers metadata when parsing the | 
|  | 259     whole filter list and only if they are given at the top of the filter list. | 
|  | 260 | 
| 264     Parameters | 261     Parameters | 
| 265     ---------- | 262     ---------- | 
| 266     line_text : str | 263     line_text : str | 
| 267         Line of a filter list. | 264         Line of a filter list. | 
| 268 | 265 | 
| 269     Returns | 266     Returns | 
| 270     ------- | 267     ------- | 
| 271     namedtuple | 268     namedtuple | 
| 272         Parsed line (see `_line_type`). | 269         Parsed line (see `_line_type`). | 
| 273 | 270 | 
| 274     Raises | 271     Raises | 
| 275     ------ | 272     ------ | 
| 276     ParseError | 273     ParseError | 
| 277         ParseError: If the line can't be parsed. | 274         ParseError: If the line can't be parsed. | 
| 278     """ | 275     """ | 
| 279     if isinstance(line_text, type(b'')): | 276     if isinstance(line_text, type(b'')): | 
| 280         line_text = line_text.decode('utf-8') | 277         line_text = line_text.decode('utf-8') | 
| 281 | 278 | 
| 282     content = line_text.strip() | 279     content = line_text.strip() | 
| 283 | 280 | 
| 284     if content == '': | 281     if content == '': | 
| 285         line = EmptyLine() | 282         line = EmptyLine() | 
| 286     elif content.startswith('!'): | 283     elif content.startswith('!'): | 
| 287         line = _parse_comment(content) | 284         line = Comment(content[1:].lstrip()) | 
| 288     elif content.startswith('%') and content.endswith('%'): | 285     elif content.startswith('%') and content.endswith('%'): | 
| 289         line = _parse_instruction(content) | 286         line = _parse_instruction(content) | 
| 290     elif content.startswith('[') and content.endswith(']'): | 287     elif content.startswith('[') and content.endswith(']'): | 
| 291         line = _parse_header(content) | 288         line = _parse_header(content) | 
| 292     else: | 289     else: | 
| 293         line = parse_filter(content) | 290         line = parse_filter(content) | 
| 294 | 291 | 
| 295     assert line.to_string().replace(' ', '') == content.replace(' ', '') | 292     assert line.to_string().replace(' ', '') == content.replace(' ', '') | 
| 296     return line | 293     return line | 
| 297 | 294 | 
| (...skipping 12 matching lines...) Expand all  Loading... | 
| 310         Parsed lines of the filter list. | 307         Parsed lines of the filter list. | 
| 311 | 308 | 
| 312     Raises | 309     Raises | 
| 313     ------ | 310     ------ | 
| 314     ParseError | 311     ParseError | 
| 315         Thrown during iteration for invalid filter list lines. | 312         Thrown during iteration for invalid filter list lines. | 
| 316     TypeError | 313     TypeError | 
| 317         If `lines` is not iterable. | 314         If `lines` is not iterable. | 
| 318 | 315 | 
| 319     """ | 316     """ | 
|  | 317     metadata_closed = False | 
|  | 318 | 
| 320     for line in lines: | 319     for line in lines: | 
| 321         yield parse_line(line) | 320         result = parse_line(line) | 
|  | 321 | 
|  | 322         if isinstance(result, Comment): | 
|  | 323             match = METADATA_REGEXP.match(result.text) | 
|  | 324             if match: | 
|  | 325                 key, value = match.groups() | 
|  | 326 | 
|  | 327                 # Historically, checksums can occur at the bottom of the | 
|  | 328                 # filter list. Checksums are no longer used by Adblock Plus, | 
|  | 329                 # but in order to strip them (in abp.filters.renderer), | 
|  | 330                 # we have to make sure to still parse them regardless of | 
|  | 331                 # their position in the filter list. | 
|  | 332                 if not metadata_closed or key.lower() == 'checksum': | 
|  | 333                     yield Metadata(key, value) | 
|  | 334                     continue | 
|  | 335 | 
|  | 336             if not result.text: | 
|  | 337                 metadata_closed = True | 
|  | 338         elif not isinstance(result, Header): | 
|  | 339             metadata_closed = True | 
|  | 340 | 
|  | 341         yield result | 
| OLD | NEW | 
|---|