Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Side by Side Diff: abp/filters/parser.py

Issue 29873561: Issue 6920 - Only parse metadata from the top of the file (Closed)
Patch Set: Fixed typo and moved logic to parse_filterlist() Created Sept. 4, 2018, 3:43 p.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | tests/test_parser.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 # This file is part of Adblock Plus <https://adblockplus.org/>, 1 # This file is part of Adblock Plus <https://adblockplus.org/>,
2 # Copyright (C) 2006-present eyeo GmbH 2 # Copyright (C) 2006-present eyeo GmbH
3 # 3 #
4 # Adblock Plus is free software: you can redistribute it and/or modify 4 # Adblock Plus is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU General Public License version 3 as 5 # it under the terms of the GNU General Public License version 3 as
6 # published by the Free Software Foundation. 6 # published by the Free Software Foundation.
7 # 7 #
8 # Adblock Plus is distributed in the hope that it will be useful, 8 # Adblock Plus is distributed in the hope that it will be useful,
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of 9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
(...skipping 122 matching lines...) Expand 10 before | Expand all | Expand 10 after
133 133
134 134
135 Header = _line_type('Header', 'version', '[{.version}]') 135 Header = _line_type('Header', 'version', '[{.version}]')
136 EmptyLine = _line_type('EmptyLine', '', '') 136 EmptyLine = _line_type('EmptyLine', '', '')
137 Comment = _line_type('Comment', 'text', '! {.text}') 137 Comment = _line_type('Comment', 'text', '! {.text}')
138 Metadata = _line_type('Metadata', 'key value', '! {0.key}: {0.value}') 138 Metadata = _line_type('Metadata', 'key value', '! {0.key}: {0.value}')
139 Filter = _line_type('Filter', 'text selector action options', '{.text}') 139 Filter = _line_type('Filter', 'text selector action options', '{.text}')
140 Include = _line_type('Include', 'target', '%include {0.target}%') 140 Include = _line_type('Include', 'target', '%include {0.target}%')
141 141
142 142
143 METADATA_REGEXP = re.compile(r'!\s*([\w-]+)\s*:(?!//)\s*(.*)') 143 METADATA_REGEXP = re.compile(r'(?:([\w-]+)|(?:\S.*?))\s*:\s*(.*)')
144 INCLUDE_REGEXP = re.compile(r'%include\s+(.+)%') 144 INCLUDE_REGEXP = re.compile(r'%include\s+(.+)%')
145 HEADER_REGEXP = re.compile(r'\[(Adblock(?:\s*Plus\s*[\d\.]+?)?)\]', flags=re.I) 145 HEADER_REGEXP = re.compile(r'\[(Adblock(?:\s*Plus\s*[\d\.]+?)?)\]', flags=re.I)
146 HIDING_FILTER_REGEXP = re.compile(r'^([^/*|@"!]*?)#([@?])?#(.+)$') 146 HIDING_FILTER_REGEXP = re.compile(r'^([^/*|@"!]*?)#([@?])?#(.+)$')
147 FILTER_OPTIONS_REGEXP = re.compile( 147 FILTER_OPTIONS_REGEXP = re.compile(
148 r'\$(~?[\w-]+(?:=[^,]+)?(?:,~?[\w-]+(?:=[^,]+)?)*)$' 148 r'\$(~?[\w-]+(?:=[^,]+)?(?:,~?[\w-]+(?:=[^,]+)?)*)$'
149 ) 149 )
150 150
151 151
152 def _parse_comment(text):
153 match = METADATA_REGEXP.match(text)
154 if match:
155 return Metadata(match.group(1), match.group(2))
156 return Comment(text[1:].strip())
157
158
159 def _parse_header(text): 152 def _parse_header(text):
160 match = HEADER_REGEXP.match(text) 153 match = HEADER_REGEXP.match(text)
161 if not match: 154 if not match:
162 raise ParseError('Malformed header', text) 155 raise ParseError('Malformed header', text)
163 return Header(match.group(1)) 156 return Header(match.group(1))
164 157
165 158
166 def _parse_instruction(text): 159 def _parse_instruction(text):
167 match = INCLUDE_REGEXP.match(text) 160 match = INCLUDE_REGEXP.match(text)
168 if not match: 161 if not match:
(...skipping 108 matching lines...) Expand 10 before | Expand all | Expand 10 after
277 ParseError: If the line can't be parsed. 270 ParseError: If the line can't be parsed.
278 """ 271 """
279 if isinstance(line_text, type(b'')): 272 if isinstance(line_text, type(b'')):
280 line_text = line_text.decode('utf-8') 273 line_text = line_text.decode('utf-8')
281 274
282 content = line_text.strip() 275 content = line_text.strip()
283 276
284 if content == '': 277 if content == '':
285 line = EmptyLine() 278 line = EmptyLine()
286 elif content.startswith('!'): 279 elif content.startswith('!'):
287 line = _parse_comment(content) 280 line = Comment(content[1:].lstrip())
288 elif content.startswith('%') and content.endswith('%'): 281 elif content.startswith('%') and content.endswith('%'):
289 line = _parse_instruction(content) 282 line = _parse_instruction(content)
290 elif content.startswith('[') and content.endswith(']'): 283 elif content.startswith('[') and content.endswith(']'):
291 line = _parse_header(content) 284 line = _parse_header(content)
292 else: 285 else:
293 line = parse_filter(content) 286 line = parse_filter(content)
294 287
295 assert line.to_string().replace(' ', '') == content.replace(' ', '') 288 assert line.to_string().replace(' ', '') == content.replace(' ', '')
296 return line 289 return line
297 290
(...skipping 12 matching lines...) Expand all
310 Parsed lines of the filter list. 303 Parsed lines of the filter list.
311 304
312 Raises 305 Raises
313 ------ 306 ------
314 ParseError 307 ParseError
315 Thrown during iteration for invalid filter list lines. 308 Thrown during iteration for invalid filter list lines.
316 TypeError 309 TypeError
317 If `lines` is not iterable. 310 If `lines` is not iterable.
318 311
319 """ 312 """
313 metadata_closed = False
314
320 for line in lines: 315 for line in lines:
321 yield parse_line(line) 316 result = parse_line(line)
317
318 if isinstance(result, Comment):
319 match = METADATA_REGEXP.match(result.text)
320 if match:
321 # The regular expression matches as well if we see a
322 # malformed key (e.g. "Last modified"). In that case we
323 # want to keep looking for more metadata, but yield a
324 # Comment instead of a Metadata object.
325 #
326 # Historically, checksums can occur at the bottom of the
327 # filter list. Checksums are no longer used by Adblock Plus,
328 # but in order to strip them (in abp.filters.renderer),
329 # we have to make sure to still parse them regardless of
330 # their position in the filter list.
331 key, value = match.groups()
332 if key and (not metadata_closed or key.lower() == 'checksum'):
333 result = Metadata(key, value)
334
335 yield result
336 continue
337
338 if not isinstance(result, Header):
339 metadata_closed = True
340
341 yield result
OLDNEW
« no previous file with comments | « no previous file | tests/test_parser.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld