| LEFT | RIGHT |
| 1 # This file is part of Adblock Plus <https://adblockplus.org/>, | 1 # This file is part of Adblock Plus <https://adblockplus.org/>, |
| 2 # Copyright (C) 2006-present eyeo GmbH | 2 # Copyright (C) 2006-present eyeo GmbH |
| 3 # | 3 # |
| 4 # Adblock Plus is free software: you can redistribute it and/or modify | 4 # Adblock Plus is free software: you can redistribute it and/or modify |
| 5 # it under the terms of the GNU General Public License version 3 as | 5 # it under the terms of the GNU General Public License version 3 as |
| 6 # published by the Free Software Foundation. | 6 # published by the Free Software Foundation. |
| 7 # | 7 # |
| 8 # Adblock Plus is distributed in the hope that it will be useful, | 8 # Adblock Plus is distributed in the hope that it will be useful, |
| 9 # but WITHOUT ANY WARRANTY; without even the implied warranty of | 9 # but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | 10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| (...skipping 122 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 133 | 133 |
| 134 | 134 |
| 135 Header = _line_type('Header', 'version', '[{.version}]') | 135 Header = _line_type('Header', 'version', '[{.version}]') |
| 136 EmptyLine = _line_type('EmptyLine', '', '') | 136 EmptyLine = _line_type('EmptyLine', '', '') |
| 137 Comment = _line_type('Comment', 'text', '! {.text}') | 137 Comment = _line_type('Comment', 'text', '! {.text}') |
| 138 Metadata = _line_type('Metadata', 'key value', '! {0.key}: {0.value}') | 138 Metadata = _line_type('Metadata', 'key value', '! {0.key}: {0.value}') |
| 139 Filter = _line_type('Filter', 'text selector action options', '{.text}') | 139 Filter = _line_type('Filter', 'text selector action options', '{.text}') |
| 140 Include = _line_type('Include', 'target', '%include {0.target}%') | 140 Include = _line_type('Include', 'target', '%include {0.target}%') |
| 141 | 141 |
| 142 | 142 |
| 143 METADATA_REGEXP = re.compile(r'(?:([\w-]+)|(?:\S.*?))\s*:\s*(.*)') | 143 METADATA_REGEXP = re.compile(r'([\w-]+)\s*:\s*(.*)') |
| 144 INCLUDE_REGEXP = re.compile(r'%include\s+(.+)%') | 144 INCLUDE_REGEXP = re.compile(r'%include\s+(.+)%') |
| 145 HEADER_REGEXP = re.compile(r'\[(Adblock(?:\s*Plus\s*[\d\.]+?)?)\]', flags=re.I) | 145 HEADER_REGEXP = re.compile(r'\[(Adblock(?:\s*Plus\s*[\d\.]+?)?)\]', flags=re.I) |
| 146 HIDING_FILTER_REGEXP = re.compile(r'^([^/*|@"!]*?)#([@?])?#(.+)$') | 146 HIDING_FILTER_REGEXP = re.compile(r'^([^/*|@"!]*?)#([@?])?#(.+)$') |
| 147 FILTER_OPTIONS_REGEXP = re.compile( | 147 FILTER_OPTIONS_REGEXP = re.compile( |
| 148 r'\$(~?[\w-]+(?:=[^,]+)?(?:,~?[\w-]+(?:=[^,]+)?)*)$' | 148 r'\$(~?[\w-]+(?:=[^,]+)?(?:,~?[\w-]+(?:=[^,]+)?)*)$' |
| 149 ) | 149 ) |
| 150 | 150 |
| 151 | 151 |
| 152 def _parse_header(text): | 152 def _parse_header(text): |
| 153 match = HEADER_REGEXP.match(text) | 153 match = HEADER_REGEXP.match(text) |
| (...skipping 92 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 246 """ | 246 """ |
| 247 if '#' in text: | 247 if '#' in text: |
| 248 match = HIDING_FILTER_REGEXP.search(text) | 248 match = HIDING_FILTER_REGEXP.search(text) |
| 249 if match: | 249 if match: |
| 250 return _parse_hiding_filter(text, *match.groups()) | 250 return _parse_hiding_filter(text, *match.groups()) |
| 251 return _parse_blocking_filter(text) | 251 return _parse_blocking_filter(text) |
| 252 | 252 |
| 253 | 253 |
| 254 def parse_line(line_text): | 254 def parse_line(line_text): |
| 255 """Parse one line of a filter list. | 255 """Parse one line of a filter list. |
| 256 |
| 257 Note that parse_line() doesn't handle special comments, hence never returns |
| 258 a Metadata() object, Adblock Plus only considers metadata when parsing the |
| 259 whole filter list and only if they are given at the top of the filter list. |
| 256 | 260 |
| 257 Parameters | 261 Parameters |
| 258 ---------- | 262 ---------- |
| 259 line_text : str | 263 line_text : str |
| 260 Line of a filter list. | 264 Line of a filter list. |
| 261 | 265 |
| 262 Returns | 266 Returns |
| 263 ------- | 267 ------- |
| 264 namedtuple | 268 namedtuple |
| 265 Parsed line (see `_line_type`). | 269 Parsed line (see `_line_type`). |
| (...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 311 | 315 |
| 312 """ | 316 """ |
| 313 metadata_closed = False | 317 metadata_closed = False |
| 314 | 318 |
| 315 for line in lines: | 319 for line in lines: |
| 316 result = parse_line(line) | 320 result = parse_line(line) |
| 317 | 321 |
| 318 if isinstance(result, Comment): | 322 if isinstance(result, Comment): |
| 319 match = METADATA_REGEXP.match(result.text) | 323 match = METADATA_REGEXP.match(result.text) |
| 320 if match: | 324 if match: |
| 321 # The regular expression matches as well if we see a | 325 key, value = match.groups() |
| 322 # malformed key (e.g. "Last modified"). In that case we | 326 |
| 323 # want to keep looking for more metadata, but yield a | |
| 324 # Comment instead of a Metadata object. | |
| 325 # | |
| 326 # Historically, checksums can occur at the bottom of the | 327 # Historically, checksums can occur at the bottom of the |
| 327 # filter list. Checksums are no longer used by Adblock Plus, | 328 # filter list. Checksums are no longer used by Adblock Plus, |
| 328 # but in order to strip them (in abp.filters.renderer), | 329 # but in order to strip them (in abp.filters.renderer), |
| 329 # we have to make sure to still parse them regardless of | 330 # we have to make sure to still parse them regardless of |
| 330 # their position in the filter list. | 331 # their position in the filter list. |
| 331 key, value = match.groups() | 332 if not metadata_closed or key.lower() == 'checksum': |
| 332 if key and (not metadata_closed or key.lower() == 'checksum'): | 333 yield Metadata(key, value) |
| 333 result = Metadata(key, value) | 334 continue |
| 334 | 335 |
| 335 yield result | 336 if not result.text: |
| 336 continue | 337 metadata_closed = True |
| 337 | 338 elif not isinstance(result, Header): |
| 338 if not isinstance(result, Header): | |
| 339 metadata_closed = True | 339 metadata_closed = True |
| 340 | 340 |
| 341 yield result | 341 yield result |
| LEFT | RIGHT |