| Index: abp/filters/parser.py |
| =================================================================== |
| --- a/abp/filters/parser.py |
| +++ b/abp/filters/parser.py |
| @@ -140,22 +140,15 @@ |
| Include = _line_type('Include', 'target', '%include {0.target}%') |
| -METADATA_REGEXP = re.compile(r'(.*?)\s*:\s*(.*)') |
| +METADATA_REGEXP = re.compile(r'\s*!\s*(.*?)\s*:\s*(.*)') |
| INCLUDE_REGEXP = re.compile(r'%include\s+(.+)%') |
| -HEADER_REGEXP = re.compile(r'\[(Adblock(?:\s*Plus\s*[\d\.]+?)?)\]', flags=re.I) |
| +HEADER_REGEXP = re.compile(r'\[(?:(Adblock(?:\s*Plus\s*[\d\.]+?)?)|.*)\]$', flags=re.I) |
| HIDING_FILTER_REGEXP = re.compile(r'^([^/*|@"!]*?)#([@?])?#(.+)$') |
| FILTER_OPTIONS_REGEXP = re.compile( |
| r'\$(~?[\w-]+(?:=[^,]+)?(?:,~?[\w-]+(?:=[^,]+)?)*)$' |
| ) |
| -def _parse_header(text): |
| - match = HEADER_REGEXP.match(text) |
| - if not match: |
| - raise ParseError('Malformed header', text) |
| - return Header(match.group(1)) |
| - |
| - |
| def _parse_instruction(text): |
| match = INCLUDE_REGEXP.match(text) |
| if not match: |
| @@ -251,12 +244,17 @@ |
| return _parse_blocking_filter(text) |
| +def _decode_if_bytes(s): |
| + return s.decode('utf-8') if isinstance(s, type(b'')) else s |
| + |
| + |
| def parse_line(line_text): |
| """Parse one line of a filter list. |
| - Note that parse_line() doesn't handle special comments, hence never returns |
| - a Metadata() object, Adblock Plus only considers metadata when parsing the |
| - whole filter list and only if they are given at the top of the filter list. |
| + Note that parse_line() doesn't handle headers and special comments, |
| + hence never returns a Header() or Metadata() object. Adblock Plus only |
| + considers headers and metadata when parsing the whole filter list and |
| + only if they are given at the top of the filter list. |
| Parameters |
| ---------- |
| @@ -273,10 +271,7 @@ |
| ParseError |
| ParseError: If the line can't be parsed. |
| """ |
| - if isinstance(line_text, type(b'')): |
| - line_text = line_text.decode('utf-8') |
| - |
| - content = line_text.strip() |
| + content = _decode_if_bytes(line_text).strip() |
| if content == '': |
| line = EmptyLine() |
| @@ -284,8 +279,6 @@ |
| line = Comment(content[1:].lstrip()) |
| elif content.startswith('%') and content.endswith('%'): |
| line = _parse_instruction(content) |
| - elif content.startswith('[') and content.endswith(']'): |
| - line = _parse_header(content) |
| else: |
| line = parse_filter(content) |
| @@ -316,23 +309,31 @@ |
| """ |
| metadata_closed = False |
| - for line in lines: |
| - result = parse_line(line) |
| + for i, line in enumerate(lines): |
| + text = _decode_if_bytes(line) |
| - if result.type == 'comment': |
| - match = METADATA_REGEXP.match(result.text) |
| + if i == 0: |
| + match = HEADER_REGEXP.match(text) |
| if match: |
| - key, value = match.groups() |
| + version = match.group(1) |
| + if not version: |
| + raise ParseError('Malformed header', text) |
| + |
| + yield Header(version) |
| + continue |
| - # Historically, checksums can occur at the bottom of the |
| - # filter list. Checksums are no longer used by Adblock Plus, |
| - # but in order to strip them (in abp.filters.renderer), |
| - # we have to make sure to still parse them regardless of |
| - # their position in the filter list. |
| - if not metadata_closed or key.lower() == 'checksum': |
| - result = Metadata(key, value) |
| + match = METADATA_REGEXP.match(text) |
| + if match: |
| + key, value = match.groups() |
| - if result.type not in {'header', 'metadata'}: |
| - metadata_closed = True |
| + # Historically, checksums can occur at the bottom of the |
| + # filter list. Checksums are no longer used by Adblock Plus, |
| + # but in order to strip them (in abp.filters.renderer), |
| + # we have to make sure to still parse them regardless of |
| + # their position in the filter list. |
| + if not metadata_closed or key.lower() == 'checksum': |
| + yield Metadata(key, value) |
| + continue |
| - yield result |
| + metadata_closed = True |
| + yield parse_line(text) |