abp/filters/parser.py - Issue 29880555: Issue 6877 - Only parse headers in the first line of the filter list

Keyboard Shortcuts

	File
u :	up to issue
m :	publish + mail comments
M :	edit review message
j / k :	jump to file after / before current file
J / K :	jump to next file with a comment after / before current file
	Side-by-side diff
i :	toggle intra-line diffs
e :	expand all comments
c :	collapse all comments
s :	toggle showing all comments
n / p :	next / previous diff chunk or comment
N / P :	next / previous comment
<Up> / <Down> :	next / previous line
<Enter> :	respond to / edit current comment
d :	mark current comment as done

	Issue
u :	up to list of issues
m :	publish + mail comments
j / k :	jump to patch after / before current patch
o / <Enter> :	open current patch in side-by-side view
i :	open current patch in unified diff view

	Issue List
j / k :	jump to issue after / before current issue
o / <Enter> :	open current issue
# :	close issue

	Comment/message editing
<Ctrl> + s or <Ctrl> + Enter :	save comment
<Esc> :	cancel edit

Unified Diff: abp/filters/parser.py

Issue 29880555: Issue 6877 - Only parse headers in the first line of the filter list (Closed)

Patch Set: Created Sept. 14, 2018, 2:40 p.m.

Use n/p to move between diff chunks; N/P to move between comments.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: abp/filters/parser.py

===================================================================

--- a/abp/filters/parser.py

+++ b/abp/filters/parser.py

@@ -140,22 +140,15 @@

Include = _line_type('Include', 'target', '%include {0.target}%')

-METADATA_REGEXP = re.compile(r'(.*?)\s*:\s*(.*)')

+METADATA_REGEXP = re.compile(r'\s*!\s*(.*?)\s*:\s*(.*)')

INCLUDE_REGEXP = re.compile(r'%include\s+(.+)%')

-HEADER_REGEXP = re.compile(r'\[(Adblock(?:\s*Plus\s*[\d\.]+?)?)\]', flags=re.I)

+HEADER_REGEXP = re.compile(r'\[(?:(Adblock(?:\s*Plus\s*[\d\.]+?)?)|.*)\]$', flags=re.I)

HIDING_FILTER_REGEXP = re.compile(r'^([^/*|@"!]*?)#([@?])?#(.+)$')

FILTER_OPTIONS_REGEXP = re.compile(

r'\$(~?[\w-]+(?:=[^,]+)?(?:,~?[\w-]+(?:=[^,]+)?)*)$'

)

-def _parse_header(text):

- match = HEADER_REGEXP.match(text)

- if not match:

- raise ParseError('Malformed header', text)

- return Header(match.group(1))

def _parse_instruction(text):

match = INCLUDE_REGEXP.match(text)

if not match:

@@ -251,12 +244,17 @@

return _parse_blocking_filter(text)

+def _decode_if_bytes(s):

+ return s.decode('utf-8') if isinstance(s, type(b'')) else s

def parse_line(line_text):

"""Parse one line of a filter list.

- Note that parse_line() doesn't handle special comments, hence never returns

- a Metadata() object, Adblock Plus only considers metadata when parsing the

- whole filter list and only if they are given at the top of the filter list.

+ Note that parse_line() doesn't handle headers and special comments,

+ hence never returns a Header() or Metadata() object. Adblock Plus only

+ considers headers and metadata when parsing the whole filter list and

+ only if they are given at the top of the filter list.

Parameters

----------

@@ -273,10 +271,7 @@

ParseError

ParseError: If the line can't be parsed.

"""

- if isinstance(line_text, type(b'')):

- line_text = line_text.decode('utf-8')

- content = line_text.strip()

+ content = _decode_if_bytes(line_text).strip()

if content == '':

line = EmptyLine()

@@ -284,8 +279,6 @@

line = Comment(content[1:].lstrip())

elif content.startswith('%') and content.endswith('%'):

line = _parse_instruction(content)

- elif content.startswith('[') and content.endswith(']'):

- line = _parse_header(content)

else:

line = parse_filter(content)

@@ -316,23 +309,31 @@

"""

metadata_closed = False

- for line in lines:

- result = parse_line(line)

+ for i, line in enumerate(lines):

+ text = _decode_if_bytes(line)

- if result.type == 'comment':

- match = METADATA_REGEXP.match(result.text)

+ if i == 0:

+ match = HEADER_REGEXP.match(text)

if match:

- key, value = match.groups()

+ version = match.group(1)

+ if not version:

+ raise ParseError('Malformed header', text)

+ yield Header(version)

+ continue

- # Historically, checksums can occur at the bottom of the

- # filter list. Checksums are no longer used by Adblock Plus,

- # but in order to strip them (in abp.filters.renderer),

- # we have to make sure to still parse them regardless of

- # their position in the filter list.

- if not metadata_closed or key.lower() == 'checksum':

- result = Metadata(key, value)

+ match = METADATA_REGEXP.match(text)

+ if match:

+ key, value = match.groups()

- if result.type not in {'header', 'metadata'}:

- metadata_closed = True

+ # Historically, checksums can occur at the bottom of the

+ # filter list. Checksums are no longer used by Adblock Plus,

+ # but in order to strip them (in abp.filters.renderer),

+ # we have to make sure to still parse them regardless of

+ # their position in the filter list.

+ if not metadata_closed or key.lower() == 'checksum':

+ yield Metadata(key, value)

+ continue

- yield result

+ metadata_closed = True

+ yield parse_line(text)

« no previous file with comments | « no previous file | tests/test_parser.py » ('j') | no next file with comments »