Index: abp/filters/parser.py |
=================================================================== |
--- a/abp/filters/parser.py |
+++ b/abp/filters/parser.py |
@@ -140,22 +140,15 @@ |
Include = _line_type('Include', 'target', '%include {0.target}%') |
-METADATA_REGEXP = re.compile(r'(.*?)\s*:\s*(.*)') |
+METADATA_REGEXP = re.compile(r'\s*!\s*(.*?)\s*:\s*(.*)') |
INCLUDE_REGEXP = re.compile(r'%include\s+(.+)%') |
-HEADER_REGEXP = re.compile(r'\[(Adblock(?:\s*Plus\s*[\d\.]+?)?)\]', flags=re.I) |
+HEADER_REGEXP = re.compile(r'\[(?:(Adblock(?:\s*Plus\s*[\d\.]+?)?)|.*)\]$', flags=re.I) |
HIDING_FILTER_REGEXP = re.compile(r'^([^/*|@"!]*?)#([@?])?#(.+)$') |
FILTER_OPTIONS_REGEXP = re.compile( |
r'\$(~?[\w-]+(?:=[^,]+)?(?:,~?[\w-]+(?:=[^,]+)?)*)$' |
) |
-def _parse_header(text): |
- match = HEADER_REGEXP.match(text) |
- if not match: |
- raise ParseError('Malformed header', text) |
- return Header(match.group(1)) |
- |
- |
def _parse_instruction(text): |
match = INCLUDE_REGEXP.match(text) |
if not match: |
@@ -251,12 +244,17 @@ |
return _parse_blocking_filter(text) |
+def _decode_if_bytes(s): |
+ return s.decode('utf-8') if isinstance(s, type(b'')) else s |
+ |
+ |
def parse_line(line_text): |
"""Parse one line of a filter list. |
- Note that parse_line() doesn't handle special comments, hence never returns |
- a Metadata() object, Adblock Plus only considers metadata when parsing the |
- whole filter list and only if they are given at the top of the filter list. |
+ Note that parse_line() doesn't handle headers and special comments, |
+ hence never returns a Header() or Metadata() object. Adblock Plus only |
+ considers headers and metadata when parsing the whole filter list and |
+ only if they are given at the top of the filter list. |
Parameters |
---------- |
@@ -273,10 +271,7 @@ |
ParseError |
ParseError: If the line can't be parsed. |
""" |
- if isinstance(line_text, type(b'')): |
- line_text = line_text.decode('utf-8') |
- |
- content = line_text.strip() |
+ content = _decode_if_bytes(line_text).strip() |
if content == '': |
line = EmptyLine() |
@@ -284,8 +279,6 @@ |
line = Comment(content[1:].lstrip()) |
elif content.startswith('%') and content.endswith('%'): |
line = _parse_instruction(content) |
- elif content.startswith('[') and content.endswith(']'): |
- line = _parse_header(content) |
else: |
line = parse_filter(content) |
@@ -316,23 +309,31 @@ |
""" |
metadata_closed = False |
- for line in lines: |
- result = parse_line(line) |
+ for i, line in enumerate(lines): |
+ text = _decode_if_bytes(line) |
- if result.type == 'comment': |
- match = METADATA_REGEXP.match(result.text) |
+ if i == 0: |
+ match = HEADER_REGEXP.match(text) |
if match: |
- key, value = match.groups() |
+ version = match.group(1) |
+ if not version: |
+ raise ParseError('Malformed header', text) |
+ |
+ yield Header(version) |
+ continue |
- # Historically, checksums can occur at the bottom of the |
- # filter list. Checksums are no longer used by Adblock Plus, |
- # but in order to strip them (in abp.filters.renderer), |
- # we have to make sure to still parse them regardless of |
- # their position in the filter list. |
- if not metadata_closed or key.lower() == 'checksum': |
- result = Metadata(key, value) |
+ match = METADATA_REGEXP.match(text) |
+ if match: |
+ key, value = match.groups() |
- if result.type not in {'header', 'metadata'}: |
- metadata_closed = True |
+ # Historically, checksums can occur at the bottom of the |
+ # filter list. Checksums are no longer used by Adblock Plus, |
+ # but in order to strip them (in abp.filters.renderer), |
+ # we have to make sure to still parse them regardless of |
+ # their position in the filter list. |
+ if not metadata_closed or key.lower() == 'checksum': |
+ yield Metadata(key, value) |
+ continue |
- yield result |
+ metadata_closed = True |
+ yield parse_line(text) |