| Index: abp/filters/parser.py |
| =================================================================== |
| --- a/abp/filters/parser.py |
| +++ b/abp/filters/parser.py |
| @@ -140,7 +140,7 @@ |
| Include = _line_type('Include', 'target', '%include {0.target}%') |
| -METADATA_REGEXP = re.compile(r'!\s*([\w-]+)\s*:(?!//)\s*(.*)') |
| +METADATA_REGEXP = re.compile(r'([\w-]+)\s*:\s*(.*)') |
| INCLUDE_REGEXP = re.compile(r'%include\s+(.+)%') |
| HEADER_REGEXP = re.compile(r'\[(Adblock(?:\s*Plus\s*[\d\.]+?)?)\]', flags=re.I) |
| HIDING_FILTER_REGEXP = re.compile(r'^([^/*|@"!]*?)#([@?])?#(.+)$') |
| @@ -149,13 +149,6 @@ |
| ) |
| -def _parse_comment(text): |
| - match = METADATA_REGEXP.match(text) |
| - if match: |
| - return Metadata(match.group(1), match.group(2)) |
| - return Comment(text[1:].strip()) |
| - |
| - |
| def _parse_header(text): |
| match = HEADER_REGEXP.match(text) |
| if not match: |
| @@ -261,6 +254,10 @@ |
| def parse_line(line_text): |
| """Parse one line of a filter list. |
| + Note that parse_line() doesn't handle special comments, hence never returns |
| + a Metadata() object, Adblock Plus only considers metadata when parsing the |
| + whole filter list and only if they are given at the top of the filter list. |
| + |
| Parameters |
| ---------- |
| line_text : str |
| @@ -284,7 +281,7 @@ |
| if content == '': |
| line = EmptyLine() |
| elif content.startswith('!'): |
| - line = _parse_comment(content) |
| + line = Comment(content[1:].lstrip()) |
| elif content.startswith('%') and content.endswith('%'): |
| line = _parse_instruction(content) |
| elif content.startswith('[') and content.endswith(']'): |
| @@ -317,5 +314,28 @@ |
| If `lines` is not iterable. |
| """ |
| + metadata_closed = False |
| + |
| for line in lines: |
| - yield parse_line(line) |
| + result = parse_line(line) |
| + |
| + if isinstance(result, Comment): |
| + match = METADATA_REGEXP.match(result.text) |
| + if match: |
| + key, value = match.groups() |
| + |
| + # Historically, checksums can occur at the bottom of the |
| + # filter list. Checksums are no longer used by Adblock Plus, |
| + # but in order to strip them (in abp.filters.renderer), |
| + # we have to make sure to still parse them regardless of |
| + # their position in the filter list. |
| + if not metadata_closed or key.lower() == 'checksum': |
| + yield Metadata(key, value) |
| + continue |
| + |
| + if not result.text: |
| + metadata_closed = True |
| + elif isinstance(result, (EmptyLine, Filter)): |
|
Vasily Kuznetsov
2018/09/04 19:50:56
I just thought that since this could also be used
Sebastian Noack
2018/09/04 20:23:32
Done.
|
| + metadata_closed = True |
| + |
| + yield result |