Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Side by Side Diff: abp/filters/parser.py

Issue 29873561: Issue 6920 - Only parse metadata from the top of the file (Closed)
Patch Set: Documented behavior for parse_line(), simplified end-of-metadata semantics Created Sept. 4, 2018, 6:26 p.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | tests/test_parser.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 # This file is part of Adblock Plus <https://adblockplus.org/>, 1 # This file is part of Adblock Plus <https://adblockplus.org/>,
2 # Copyright (C) 2006-present eyeo GmbH 2 # Copyright (C) 2006-present eyeo GmbH
3 # 3 #
4 # Adblock Plus is free software: you can redistribute it and/or modify 4 # Adblock Plus is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU General Public License version 3 as 5 # it under the terms of the GNU General Public License version 3 as
6 # published by the Free Software Foundation. 6 # published by the Free Software Foundation.
7 # 7 #
8 # Adblock Plus is distributed in the hope that it will be useful, 8 # Adblock Plus is distributed in the hope that it will be useful,
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of 9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
(...skipping 122 matching lines...) Expand 10 before | Expand all | Expand 10 after
133 133
134 134
135 Header = _line_type('Header', 'version', '[{.version}]') 135 Header = _line_type('Header', 'version', '[{.version}]')
136 EmptyLine = _line_type('EmptyLine', '', '') 136 EmptyLine = _line_type('EmptyLine', '', '')
137 Comment = _line_type('Comment', 'text', '! {.text}') 137 Comment = _line_type('Comment', 'text', '! {.text}')
138 Metadata = _line_type('Metadata', 'key value', '! {0.key}: {0.value}') 138 Metadata = _line_type('Metadata', 'key value', '! {0.key}: {0.value}')
139 Filter = _line_type('Filter', 'text selector action options', '{.text}') 139 Filter = _line_type('Filter', 'text selector action options', '{.text}')
140 Include = _line_type('Include', 'target', '%include {0.target}%') 140 Include = _line_type('Include', 'target', '%include {0.target}%')
141 141
142 142
143 METADATA_REGEXP = re.compile(r'!\s*([\w-]+)\s*:(?!//)\s*(.*)') 143 METADATA_REGEXP = re.compile(r'([\w-]+)\s*:\s*(.*)')
144 INCLUDE_REGEXP = re.compile(r'%include\s+(.+)%') 144 INCLUDE_REGEXP = re.compile(r'%include\s+(.+)%')
145 HEADER_REGEXP = re.compile(r'\[(Adblock(?:\s*Plus\s*[\d\.]+?)?)\]', flags=re.I) 145 HEADER_REGEXP = re.compile(r'\[(Adblock(?:\s*Plus\s*[\d\.]+?)?)\]', flags=re.I)
146 HIDING_FILTER_REGEXP = re.compile(r'^([^/*|@"!]*?)#([@?])?#(.+)$') 146 HIDING_FILTER_REGEXP = re.compile(r'^([^/*|@"!]*?)#([@?])?#(.+)$')
147 FILTER_OPTIONS_REGEXP = re.compile( 147 FILTER_OPTIONS_REGEXP = re.compile(
148 r'\$(~?[\w-]+(?:=[^,]+)?(?:,~?[\w-]+(?:=[^,]+)?)*)$' 148 r'\$(~?[\w-]+(?:=[^,]+)?(?:,~?[\w-]+(?:=[^,]+)?)*)$'
149 ) 149 )
150 150
151 151
152 def _parse_comment(text):
153 match = METADATA_REGEXP.match(text)
154 if match:
155 return Metadata(match.group(1), match.group(2))
156 return Comment(text[1:].strip())
157
158
159 def _parse_header(text): 152 def _parse_header(text):
160 match = HEADER_REGEXP.match(text) 153 match = HEADER_REGEXP.match(text)
161 if not match: 154 if not match:
162 raise ParseError('Malformed header', text) 155 raise ParseError('Malformed header', text)
163 return Header(match.group(1)) 156 return Header(match.group(1))
164 157
165 158
166 def _parse_instruction(text): 159 def _parse_instruction(text):
167 match = INCLUDE_REGEXP.match(text) 160 match = INCLUDE_REGEXP.match(text)
168 if not match: 161 if not match:
(...skipping 85 matching lines...) Expand 10 before | Expand all | Expand 10 after
254 if '#' in text: 247 if '#' in text:
255 match = HIDING_FILTER_REGEXP.search(text) 248 match = HIDING_FILTER_REGEXP.search(text)
256 if match: 249 if match:
257 return _parse_hiding_filter(text, *match.groups()) 250 return _parse_hiding_filter(text, *match.groups())
258 return _parse_blocking_filter(text) 251 return _parse_blocking_filter(text)
259 252
260 253
261 def parse_line(line_text): 254 def parse_line(line_text):
262 """Parse one line of a filter list. 255 """Parse one line of a filter list.
263 256
257 Note that parse_line() doesn't handle special comments, hence never returns
258 a Metadata() object, Adblock Plus only considers metadata when parsing the
259 whole filter list and only if they are given at the top of the filter list.
260
264 Parameters 261 Parameters
265 ---------- 262 ----------
266 line_text : str 263 line_text : str
267 Line of a filter list. 264 Line of a filter list.
268 265
269 Returns 266 Returns
270 ------- 267 -------
271 namedtuple 268 namedtuple
272 Parsed line (see `_line_type`). 269 Parsed line (see `_line_type`).
273 270
274 Raises 271 Raises
275 ------ 272 ------
276 ParseError 273 ParseError
277 ParseError: If the line can't be parsed. 274 ParseError: If the line can't be parsed.
278 """ 275 """
279 if isinstance(line_text, type(b'')): 276 if isinstance(line_text, type(b'')):
280 line_text = line_text.decode('utf-8') 277 line_text = line_text.decode('utf-8')
281 278
282 content = line_text.strip() 279 content = line_text.strip()
283 280
284 if content == '': 281 if content == '':
285 line = EmptyLine() 282 line = EmptyLine()
286 elif content.startswith('!'): 283 elif content.startswith('!'):
287 line = _parse_comment(content) 284 line = Comment(content[1:].lstrip())
288 elif content.startswith('%') and content.endswith('%'): 285 elif content.startswith('%') and content.endswith('%'):
289 line = _parse_instruction(content) 286 line = _parse_instruction(content)
290 elif content.startswith('[') and content.endswith(']'): 287 elif content.startswith('[') and content.endswith(']'):
291 line = _parse_header(content) 288 line = _parse_header(content)
292 else: 289 else:
293 line = parse_filter(content) 290 line = parse_filter(content)
294 291
295 assert line.to_string().replace(' ', '') == content.replace(' ', '') 292 assert line.to_string().replace(' ', '') == content.replace(' ', '')
296 return line 293 return line
297 294
(...skipping 12 matching lines...) Expand all
310 Parsed lines of the filter list. 307 Parsed lines of the filter list.
311 308
312 Raises 309 Raises
313 ------ 310 ------
314 ParseError 311 ParseError
315 Thrown during iteration for invalid filter list lines. 312 Thrown during iteration for invalid filter list lines.
316 TypeError 313 TypeError
317 If `lines` is not iterable. 314 If `lines` is not iterable.
318 315
319 """ 316 """
317 metadata_closed = False
318
320 for line in lines: 319 for line in lines:
321 yield parse_line(line) 320 result = parse_line(line)
321
322 if isinstance(result, Comment):
323 match = METADATA_REGEXP.match(result.text)
324 if match:
325 key, value = match.groups()
326
327 # Historically, checksums can occur at the bottom of the
328 # filter list. Checksums are no longer used by Adblock Plus,
329 # but in order to strip them (in abp.filters.renderer),
330 # we have to make sure to still parse them regardless of
331 # their position in the filter list.
332 if not metadata_closed or key.lower() == 'checksum':
333 yield Metadata(key, value)
334 continue
335
336 if not result.text:
337 metadata_closed = True
338 elif isinstance(result, (EmptyLine, Filter)):
Vasily Kuznetsov 2018/09/04 19:50:56 I just thought that since this could also be used
Sebastian Noack 2018/09/04 20:23:32 Done.
339 metadata_closed = True
340
341 yield result
OLDNEW
« no previous file with comments | « no previous file | tests/test_parser.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld