Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Unified Diff: abp/filters/parser.py

Issue 29880555: Issue 6877 - Only parse headers in the first line of the filter list (Closed)
Patch Set: Created Sept. 14, 2018, 2:40 p.m.
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « no previous file | tests/test_parser.py » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: abp/filters/parser.py
===================================================================
--- a/abp/filters/parser.py
+++ b/abp/filters/parser.py
@@ -140,22 +140,15 @@
Include = _line_type('Include', 'target', '%include {0.target}%')
-METADATA_REGEXP = re.compile(r'(.*?)\s*:\s*(.*)')
+METADATA_REGEXP = re.compile(r'\s*!\s*(.*?)\s*:\s*(.*)')
INCLUDE_REGEXP = re.compile(r'%include\s+(.+)%')
-HEADER_REGEXP = re.compile(r'\[(Adblock(?:\s*Plus\s*[\d\.]+?)?)\]', flags=re.I)
+HEADER_REGEXP = re.compile(r'\[(?:(Adblock(?:\s*Plus\s*[\d\.]+?)?)|.*)\]$', flags=re.I)
HIDING_FILTER_REGEXP = re.compile(r'^([^/*|@"!]*?)#([@?])?#(.+)$')
FILTER_OPTIONS_REGEXP = re.compile(
r'\$(~?[\w-]+(?:=[^,]+)?(?:,~?[\w-]+(?:=[^,]+)?)*)$'
)
-def _parse_header(text):
- match = HEADER_REGEXP.match(text)
- if not match:
- raise ParseError('Malformed header', text)
- return Header(match.group(1))
-
-
def _parse_instruction(text):
match = INCLUDE_REGEXP.match(text)
if not match:
@@ -251,12 +244,17 @@
return _parse_blocking_filter(text)
+def _decode_if_bytes(s):
+ return s.decode('utf-8') if isinstance(s, type(b'')) else s
+
+
def parse_line(line_text):
"""Parse one line of a filter list.
- Note that parse_line() doesn't handle special comments, hence never returns
- a Metadata() object, Adblock Plus only considers metadata when parsing the
- whole filter list and only if they are given at the top of the filter list.
+ Note that parse_line() doesn't handle headers and special comments,
+ hence never returns a Header() or Metadata() object. Adblock Plus only
+ considers headers and metadata when parsing the whole filter list and
+ only if they are given at the top of the filter list.
Parameters
----------
@@ -273,10 +271,7 @@
ParseError
ParseError: If the line can't be parsed.
"""
- if isinstance(line_text, type(b'')):
- line_text = line_text.decode('utf-8')
-
- content = line_text.strip()
+ content = _decode_if_bytes(line_text).strip()
if content == '':
line = EmptyLine()
@@ -284,8 +279,6 @@
line = Comment(content[1:].lstrip())
elif content.startswith('%') and content.endswith('%'):
line = _parse_instruction(content)
- elif content.startswith('[') and content.endswith(']'):
- line = _parse_header(content)
else:
line = parse_filter(content)
@@ -316,23 +309,31 @@
"""
metadata_closed = False
- for line in lines:
- result = parse_line(line)
+ for i, line in enumerate(lines):
+ text = _decode_if_bytes(line)
- if result.type == 'comment':
- match = METADATA_REGEXP.match(result.text)
+ if i == 0:
+ match = HEADER_REGEXP.match(text)
if match:
- key, value = match.groups()
+ version = match.group(1)
+ if not version:
+ raise ParseError('Malformed header', text)
+
+ yield Header(version)
+ continue
- # Historically, checksums can occur at the bottom of the
- # filter list. Checksums are no longer used by Adblock Plus,
- # but in order to strip them (in abp.filters.renderer),
- # we have to make sure to still parse them regardless of
- # their position in the filter list.
- if not metadata_closed or key.lower() == 'checksum':
- result = Metadata(key, value)
+ match = METADATA_REGEXP.match(text)
+ if match:
+ key, value = match.groups()
- if result.type not in {'header', 'metadata'}:
- metadata_closed = True
+ # Historically, checksums can occur at the bottom of the
+ # filter list. Checksums are no longer used by Adblock Plus,
+ # but in order to strip them (in abp.filters.renderer),
+ # we have to make sure to still parse them regardless of
+ # their position in the filter list.
+ if not metadata_closed or key.lower() == 'checksum':
+ yield Metadata(key, value)
+ continue
- yield result
+ metadata_closed = True
+ yield parse_line(text)
« no previous file with comments | « no previous file | tests/test_parser.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld