abp/filters/parser.py - Issue 29873561: Issue 6920 - Only parse metadata from the top of the file

Side by Side Diff: abp/filters/parser.py

Issue 29873561: Issue 6920 - Only parse metadata from the top of the file (Closed)

Patch Set: Fixed typo and moved logic to parse_filterlist() Created Sept. 4, 2018, 3:43 p.m.

Left:
Right:

Use n/p to move between diff chunks; N/P to move between comments.

Jump to:

View unified diff | Download patch

OLD	NEW
1 # This file is part of Adblock Plus <https://adblockplus.org/>,	1 # This file is part of Adblock Plus <https://adblockplus.org/>,

2 # Copyright (C) 2006-present eyeo GmbH	2 # Copyright (C) 2006-present eyeo GmbH

3 #	3 #

4 # Adblock Plus is free software: you can redistribute it and/or modify	4 # Adblock Plus is free software: you can redistribute it and/or modify

5 # it under the terms of the GNU General Public License version 3 as	5 # it under the terms of the GNU General Public License version 3 as

6 # published by the Free Software Foundation.	6 # published by the Free Software Foundation.

7 #	7 #

8 # Adblock Plus is distributed in the hope that it will be useful,	8 # Adblock Plus is distributed in the hope that it will be useful,

9 # but WITHOUT ANY WARRANTY; without even the implied warranty of	9 # but WITHOUT ANY WARRANTY; without even the implied warranty of

10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the	10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

(...skipping 122 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
133	133

134	134

135 Header = _line_type('Header', 'version', '[{.version}]')	135 Header = _line_type('Header', 'version', '[{.version}]')

136 EmptyLine = _line_type('EmptyLine', '', '')	136 EmptyLine = _line_type('EmptyLine', '', '')

137 Comment = _line_type('Comment', 'text', '! {.text}')	137 Comment = _line_type('Comment', 'text', '! {.text}')

138 Metadata = _line_type('Metadata', 'key value', '! {0.key}: {0.value}')	138 Metadata = _line_type('Metadata', 'key value', '! {0.key}: {0.value}')

139 Filter = _line_type('Filter', 'text selector action options', '{.text}')	139 Filter = _line_type('Filter', 'text selector action options', '{.text}')

140 Include = _line_type('Include', 'target', '%include {0.target}%')	140 Include = _line_type('Include', 'target', '%include {0.target}%')

141	141

142	142

143 METADATA_REGEXP = re.compile(r'!\s([\w-]+)\s:(?!//)\s(.)')	143 METADATA_REGEXP = re.compile(r'(?:([\w-]+)\|(?:\S.?))\s:\s(.)')

144 INCLUDE_REGEXP = re.compile(r'%include\s+(.+)%')	144 INCLUDE_REGEXP = re.compile(r'%include\s+(.+)%')

145 HEADER_REGEXP = re.compile(r'\[(Adblock(?:\sPlus\s[\d\.]+?)?)\]', flags=re.I)	145 HEADER_REGEXP = re.compile(r'\[(Adblock(?:\sPlus\s[\d\.]+?)?)\]', flags=re.I)

146 HIDING_FILTER_REGEXP = re.compile(r'^([^/\|@"!]?)#([@?])?#(.+)$')	146 HIDING_FILTER_REGEXP = re.compile(r'^([^/\|@"!]?)#([@?])?#(.+)$')

147 FILTER_OPTIONS_REGEXP = re.compile(	147 FILTER_OPTIONS_REGEXP = re.compile(

148 r'\$(~?[\w-]+(?:=[^,]+)?(?:,~?[\w-]+(?:=[^,]+)?)*)$'	148 r'\$(~?[\w-]+(?:=[^,]+)?(?:,~?[\w-]+(?:=[^,]+)?)*)$'

149 )	149 )

150	150

151	151

152 def _parse_comment(text):

153 match = METADATA_REGEXP.match(text)

154 if match:

155 return Metadata(match.group(1), match.group(2))

156 return Comment(text[1:].strip())

157

158

159 def _parse_header(text):	152 def _parse_header(text):

160 match = HEADER_REGEXP.match(text)	153 match = HEADER_REGEXP.match(text)

161 if not match:	154 if not match:

162 raise ParseError('Malformed header', text)	155 raise ParseError('Malformed header', text)

163 return Header(match.group(1))	156 return Header(match.group(1))

164	157

165	158

166 def _parse_instruction(text):	159 def _parse_instruction(text):

167 match = INCLUDE_REGEXP.match(text)	160 match = INCLUDE_REGEXP.match(text)

168 if not match:	161 if not match:

(...skipping 108 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
277 ParseError: If the line can't be parsed.	270 ParseError: If the line can't be parsed.

278 """	271 """

279 if isinstance(line_text, type(b'')):	272 if isinstance(line_text, type(b'')):

280 line_text = line_text.decode('utf-8')	273 line_text = line_text.decode('utf-8')

281	274

282 content = line_text.strip()	275 content = line_text.strip()

283	276

284 if content == '':	277 if content == '':

285 line = EmptyLine()	278 line = EmptyLine()

286 elif content.startswith('!'):	279 elif content.startswith('!'):

287 line = _parse_comment(content)	280 line = Comment(content[1:].lstrip())

288 elif content.startswith('%') and content.endswith('%'):	281 elif content.startswith('%') and content.endswith('%'):

289 line = _parse_instruction(content)	282 line = _parse_instruction(content)

290 elif content.startswith('[') and content.endswith(']'):	283 elif content.startswith('[') and content.endswith(']'):

291 line = _parse_header(content)	284 line = _parse_header(content)

292 else:	285 else:

293 line = parse_filter(content)	286 line = parse_filter(content)

294	287

295 assert line.to_string().replace(' ', '') == content.replace(' ', '')	288 assert line.to_string().replace(' ', '') == content.replace(' ', '')

296 return line	289 return line

297	290

(...skipping 12 matching lines...) Expand all Loading...
310 Parsed lines of the filter list.	303 Parsed lines of the filter list.

311	304

312 Raises	305 Raises

313 ------	306 ------

314 ParseError	307 ParseError

315 Thrown during iteration for invalid filter list lines.	308 Thrown during iteration for invalid filter list lines.

316 TypeError	309 TypeError

317 If `lines` is not iterable.	310 If `lines` is not iterable.

318	311

319 """	312 """

	313 metadata_closed = False

	314

320 for line in lines:	315 for line in lines:

321 yield parse_line(line)	316 result = parse_line(line)

	317

	318 if isinstance(result, Comment):

	319 match = METADATA_REGEXP.match(result.text)

	320 if match:

	321 # The regular expression matches as well if we see a

	322 # malformed key (e.g. "Last modified"). In that case we

	323 # want to keep looking for more metadata, but yield a

	324 # Comment instead of a Metadata object.

	325 #

	326 # Historically, checksums can occur at the bottom of the

	327 # filter list. Checksums are no longer used by Adblock Plus,

	328 # but in order to strip them (in abp.filters.renderer),

	329 # we have to make sure to still parse them regardless of

	330 # their position in the filter list.

	331 key, value = match.groups()

	332 if key and (not metadata_closed or key.lower() == 'checksum'):

	333 result = Metadata(key, value)

	334

	335 yield result

	336 continue

	337

	338 if not isinstance(result, Header):

	339 metadata_closed = True

	340

	341 yield result

OLD	NEW

« no previous file with comments | « no previous file | tests/test_parser.py » ('j') | no next file with comments »