Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Delta Between Two Patch Sets: abp/filters/parser.py

Issue 29873561: Issue 6920 - Only parse metadata from the top of the file (Closed)
Left Patch Set: Fixed typo and moved logic to parse_filterlist() Created Sept. 4, 2018, 3:43 p.m.
Right Patch Set: Test 'Last modified' case Created Sept. 5, 2018, 9:09 a.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
Left: Side by side diff | Download
Right: Side by side diff | Download
« no previous file with change/comment | « no previous file | tests/test_parser.py » ('j') | no next file with change/comment »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
LEFTRIGHT
1 # This file is part of Adblock Plus <https://adblockplus.org/>, 1 # This file is part of Adblock Plus <https://adblockplus.org/>,
2 # Copyright (C) 2006-present eyeo GmbH 2 # Copyright (C) 2006-present eyeo GmbH
3 # 3 #
4 # Adblock Plus is free software: you can redistribute it and/or modify 4 # Adblock Plus is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU General Public License version 3 as 5 # it under the terms of the GNU General Public License version 3 as
6 # published by the Free Software Foundation. 6 # published by the Free Software Foundation.
7 # 7 #
8 # Adblock Plus is distributed in the hope that it will be useful, 8 # Adblock Plus is distributed in the hope that it will be useful,
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of 9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
(...skipping 122 matching lines...) Expand 10 before | Expand all | Expand 10 after
133 133
134 134
135 Header = _line_type('Header', 'version', '[{.version}]') 135 Header = _line_type('Header', 'version', '[{.version}]')
136 EmptyLine = _line_type('EmptyLine', '', '') 136 EmptyLine = _line_type('EmptyLine', '', '')
137 Comment = _line_type('Comment', 'text', '! {.text}') 137 Comment = _line_type('Comment', 'text', '! {.text}')
138 Metadata = _line_type('Metadata', 'key value', '! {0.key}: {0.value}') 138 Metadata = _line_type('Metadata', 'key value', '! {0.key}: {0.value}')
139 Filter = _line_type('Filter', 'text selector action options', '{.text}') 139 Filter = _line_type('Filter', 'text selector action options', '{.text}')
140 Include = _line_type('Include', 'target', '%include {0.target}%') 140 Include = _line_type('Include', 'target', '%include {0.target}%')
141 141
142 142
143 METADATA_REGEXP = re.compile(r'(?:([\w-]+)|(?:\S.*?))\s*:\s*(.*)') 143 METADATA_REGEXP = re.compile(r'([\w-]+)\s*:\s*(.*)')
144 INCLUDE_REGEXP = re.compile(r'%include\s+(.+)%') 144 INCLUDE_REGEXP = re.compile(r'%include\s+(.+)%')
145 HEADER_REGEXP = re.compile(r'\[(Adblock(?:\s*Plus\s*[\d\.]+?)?)\]', flags=re.I) 145 HEADER_REGEXP = re.compile(r'\[(Adblock(?:\s*Plus\s*[\d\.]+?)?)\]', flags=re.I)
146 HIDING_FILTER_REGEXP = re.compile(r'^([^/*|@"!]*?)#([@?])?#(.+)$') 146 HIDING_FILTER_REGEXP = re.compile(r'^([^/*|@"!]*?)#([@?])?#(.+)$')
147 FILTER_OPTIONS_REGEXP = re.compile( 147 FILTER_OPTIONS_REGEXP = re.compile(
148 r'\$(~?[\w-]+(?:=[^,]+)?(?:,~?[\w-]+(?:=[^,]+)?)*)$' 148 r'\$(~?[\w-]+(?:=[^,]+)?(?:,~?[\w-]+(?:=[^,]+)?)*)$'
149 ) 149 )
150 150
151 151
152 def _parse_header(text): 152 def _parse_header(text):
153 match = HEADER_REGEXP.match(text) 153 match = HEADER_REGEXP.match(text)
(...skipping 92 matching lines...) Expand 10 before | Expand all | Expand 10 after
246 """ 246 """
247 if '#' in text: 247 if '#' in text:
248 match = HIDING_FILTER_REGEXP.search(text) 248 match = HIDING_FILTER_REGEXP.search(text)
249 if match: 249 if match:
250 return _parse_hiding_filter(text, *match.groups()) 250 return _parse_hiding_filter(text, *match.groups())
251 return _parse_blocking_filter(text) 251 return _parse_blocking_filter(text)
252 252
253 253
254 def parse_line(line_text): 254 def parse_line(line_text):
255 """Parse one line of a filter list. 255 """Parse one line of a filter list.
256
257 Note that parse_line() doesn't handle special comments, hence never returns
258 a Metadata() object, Adblock Plus only considers metadata when parsing the
259 whole filter list and only if they are given at the top of the filter list.
256 260
257 Parameters 261 Parameters
258 ---------- 262 ----------
259 line_text : str 263 line_text : str
260 Line of a filter list. 264 Line of a filter list.
261 265
262 Returns 266 Returns
263 ------- 267 -------
264 namedtuple 268 namedtuple
265 Parsed line (see `_line_type`). 269 Parsed line (see `_line_type`).
(...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after
311 315
312 """ 316 """
313 metadata_closed = False 317 metadata_closed = False
314 318
315 for line in lines: 319 for line in lines:
316 result = parse_line(line) 320 result = parse_line(line)
317 321
318 if isinstance(result, Comment): 322 if isinstance(result, Comment):
319 match = METADATA_REGEXP.match(result.text) 323 match = METADATA_REGEXP.match(result.text)
320 if match: 324 if match:
321 # The regular expression matches as well if we see a 325 key, value = match.groups()
322 # malformed key (e.g. "Last modified"). In that case we 326
323 # want to keep looking for more metadata, but yield a
324 # Comment instead of a Metadata object.
325 #
326 # Historically, checksums can occur at the bottom of the 327 # Historically, checksums can occur at the bottom of the
327 # filter list. Checksums are no longer used by Adblock Plus, 328 # filter list. Checksums are no longer used by Adblock Plus,
328 # but in order to strip them (in abp.filters.renderer), 329 # but in order to strip them (in abp.filters.renderer),
329 # we have to make sure to still parse them regardless of 330 # we have to make sure to still parse them regardless of
330 # their position in the filter list. 331 # their position in the filter list.
331 key, value = match.groups() 332 if not metadata_closed or key.lower() == 'checksum':
332 if key and (not metadata_closed or key.lower() == 'checksum'): 333 yield Metadata(key, value)
333 result = Metadata(key, value) 334 continue
334 335
335 yield result 336 if not result.text:
336 continue 337 metadata_closed = True
337 338 elif not isinstance(result, Header):
338 if not isinstance(result, Header):
339 metadata_closed = True 339 metadata_closed = True
340 340
341 yield result 341 yield result
LEFTRIGHT

Powered by Google App Engine
This is Rietveld