Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Unified Diff: abp/filters/parser.py

Issue 29873561: Issue 6920 - Only parse metadata from the top of the file (Closed)
Patch Set: Documented behavior for parse_line(), simplified end-of-metadata semantics Created Sept. 4, 2018, 6:26 p.m.
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « no previous file | tests/test_parser.py » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: abp/filters/parser.py
===================================================================
--- a/abp/filters/parser.py
+++ b/abp/filters/parser.py
@@ -140,7 +140,7 @@
Include = _line_type('Include', 'target', '%include {0.target}%')
-METADATA_REGEXP = re.compile(r'!\s*([\w-]+)\s*:(?!//)\s*(.*)')
+METADATA_REGEXP = re.compile(r'([\w-]+)\s*:\s*(.*)')
INCLUDE_REGEXP = re.compile(r'%include\s+(.+)%')
HEADER_REGEXP = re.compile(r'\[(Adblock(?:\s*Plus\s*[\d\.]+?)?)\]', flags=re.I)
HIDING_FILTER_REGEXP = re.compile(r'^([^/*|@"!]*?)#([@?])?#(.+)$')
@@ -149,13 +149,6 @@
)
-def _parse_comment(text):
- match = METADATA_REGEXP.match(text)
- if match:
- return Metadata(match.group(1), match.group(2))
- return Comment(text[1:].strip())
-
-
def _parse_header(text):
match = HEADER_REGEXP.match(text)
if not match:
@@ -261,6 +254,10 @@
def parse_line(line_text):
"""Parse one line of a filter list.
+ Note that parse_line() doesn't handle special comments, hence never returns
+ a Metadata() object, Adblock Plus only considers metadata when parsing the
+ whole filter list and only if they are given at the top of the filter list.
+
Parameters
----------
line_text : str
@@ -284,7 +281,7 @@
if content == '':
line = EmptyLine()
elif content.startswith('!'):
- line = _parse_comment(content)
+ line = Comment(content[1:].lstrip())
elif content.startswith('%') and content.endswith('%'):
line = _parse_instruction(content)
elif content.startswith('[') and content.endswith(']'):
@@ -317,5 +314,28 @@
If `lines` is not iterable.
"""
+ metadata_closed = False
+
for line in lines:
- yield parse_line(line)
+ result = parse_line(line)
+
+ if isinstance(result, Comment):
+ match = METADATA_REGEXP.match(result.text)
+ if match:
+ key, value = match.groups()
+
+ # Historically, checksums can occur at the bottom of the
+ # filter list. Checksums are no longer used by Adblock Plus,
+ # but in order to strip them (in abp.filters.renderer),
+ # we have to make sure to still parse them regardless of
+ # their position in the filter list.
+ if not metadata_closed or key.lower() == 'checksum':
+ yield Metadata(key, value)
+ continue
+
+ if not result.text:
+ metadata_closed = True
+ elif isinstance(result, (EmptyLine, Filter)):
Vasily Kuznetsov 2018/09/04 19:50:56 I just thought that since this could also be used
Sebastian Noack 2018/09/04 20:23:32 Done.
+ metadata_closed = True
+
+ yield result
« no previous file with comments | « no previous file | tests/test_parser.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld