abp/filters/parser.py - Issue 29901579: Issue 6976 - Ignore invalid headers instead of crashing

Side by Side Diff: abp/filters/parser.py

Issue 29901579: Issue 6976 - Ignore invalid headers instead of crashing (Closed) Base URL: https://hg.adblockplus.org/python-abp

Patch Set: Created Oct. 4, 2018, 1:34 p.m.

Left:
Right:

Use n/p to move between diff chunks; N/P to move between comments.

Jump to:

View unified diff | Download patch

OLD	NEW
1 # This file is part of Adblock Plus <https://adblockplus.org/>,	1 # This file is part of Adblock Plus <https://adblockplus.org/>,

2 # Copyright (C) 2006-present eyeo GmbH	2 # Copyright (C) 2006-present eyeo GmbH

3 #	3 #

4 # Adblock Plus is free software: you can redistribute it and/or modify	4 # Adblock Plus is free software: you can redistribute it and/or modify

5 # it under the terms of the GNU General Public License version 3 as	5 # it under the terms of the GNU General Public License version 3 as

6 # published by the Free Software Foundation.	6 # published by the Free Software Foundation.

7 #	7 #

8 # Adblock Plus is distributed in the hope that it will be useful,	8 # Adblock Plus is distributed in the hope that it will be useful,

9 # but WITHOUT ANY WARRANTY; without even the implied warranty of	9 # but WITHOUT ANY WARRANTY; without even the implied warranty of

10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the	10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

(...skipping 124 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
135 Header = _line_type('Header', 'version', '[{.version}]')	135 Header = _line_type('Header', 'version', '[{.version}]')

136 EmptyLine = _line_type('EmptyLine', '', '')	136 EmptyLine = _line_type('EmptyLine', '', '')

137 Comment = _line_type('Comment', 'text', '! {.text}')	137 Comment = _line_type('Comment', 'text', '! {.text}')

138 Metadata = _line_type('Metadata', 'key value', '! {0.key}: {0.value}')	138 Metadata = _line_type('Metadata', 'key value', '! {0.key}: {0.value}')

139 Filter = _line_type('Filter', 'text selector action options', '{.text}')	139 Filter = _line_type('Filter', 'text selector action options', '{.text}')

140 Include = _line_type('Include', 'target', '%include {0.target}%')	140 Include = _line_type('Include', 'target', '%include {0.target}%')

141	141

142	142

143 METADATA_REGEXP = re.compile(r'\s!\s(.?)\s:\s(.)')	143 METADATA_REGEXP = re.compile(r'\s!\s(.?)\s:\s(.)')

144 INCLUDE_REGEXP = re.compile(r'%include\s+(.+)%')	144 INCLUDE_REGEXP = re.compile(r'%include\s+(.+)%')

145 HEADER_REGEXP = re.compile(r'\[(?:(Adblock(?:\sPlus\s[\d\.]+?)?)\|.*)\]',	145 HEADER_REGEXP = re.compile(r'\[(Adblock(?:\sPlus\s[\d\.]+?)?)\]', flags=re.I)

146 flags=re.I)

147 HIDING_FILTER_REGEXP = re.compile(r'^([^/\|@"!]?)#([@?])?#(.+)$')	146 HIDING_FILTER_REGEXP = re.compile(r'^([^/\|@"!]?)#([@?])?#(.+)$')

148 FILTER_OPTIONS_REGEXP = re.compile(	147 FILTER_OPTIONS_REGEXP = re.compile(

149 r'\$(~?[\w-]+(?:=[^,]+)?(?:,~?[\w-]+(?:=[^,]+)?)*)$'	148 r'\$(~?[\w-]+(?:=[^,]+)?(?:,~?[\w-]+(?:=[^,]+)?)*)$'

150 )	149 )

151	150

152	151

153 def _parse_instruction(text):	152 def _parse_instruction(text):

154 match = INCLUDE_REGEXP.match(text)	153 match = INCLUDE_REGEXP.match(text)

155 if not match:	154 if not match:

156 raise ParseError('Unrecognized instruction', text)	155 raise ParseError('Unrecognized instruction', text)

(...skipping 130 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
287 line = line.decode('utf-8')	286 line = line.decode('utf-8')

288	287

289 stripped = line.strip()	288 stripped = line.strip()

290	289

291 if stripped == '':	290 if stripped == '':

292 return EmptyLine()	291 return EmptyLine()

293	292

294 if position == 'start':	293 if position == 'start':

295 match = HEADER_REGEXP.search(line)	294 match = HEADER_REGEXP.search(line)

296 if match:	295 if match:

297 version = match.group(1)	296 return Header(match.group(1))

298 if not version:

299 raise ParseError('Malformed header', line)

300 return Header(version)

301	297

302 if stripped.startswith('!'):	298 if stripped.startswith('!'):

303 match = METADATA_REGEXP.match(line)	299 match = METADATA_REGEXP.match(line)

304 if match:	300 if match:

305 key, value = match.groups()	301 key, value = match.groups()

306 if position != 'body' or key.lower() == 'checksum':	302 if position != 'body' or key.lower() == 'checksum':

307 return Metadata(key, value)	303 return Metadata(key, value)

308 return Comment(stripped[1:].lstrip())	304 return Comment(stripped[1:].lstrip())

309	305

310 if stripped.startswith('%') and stripped.endswith('%'):	306 if stripped.startswith('%') and stripped.endswith('%'):

(...skipping 28 matching lines...) Expand all Loading...
339 for line in lines:	335 for line in lines:

340 parsed_line = parse_line(line, position)	336 parsed_line = parse_line(line, position)

341 yield parsed_line	337 yield parsed_line

342	338

343 if position != 'body' and parsed_line.type in {'header', 'metadata'}:	339 if position != 'body' and parsed_line.type in {'header', 'metadata'}:

344 # Continue parsing metadata until it's over...	340 # Continue parsing metadata until it's over...

345 position = 'metadata'	341 position = 'metadata'

346 else:	342 else:

347 # ...then switch to parsing the body.	343 # ...then switch to parsing the body.

348 position = 'body'	344 position = 'body'

OLD	NEW

« no previous file with comments | « no previous file | tests/test_parser.py » ('j') | no next file with comments »