| Left: | ||
| Right: |
| LEFT | RIGHT |
|---|---|
| (no file at all) | |
| 1 # This file is part of Adblock Plus <https://adblockplus.org/>, | 1 # This file is part of Adblock Plus <https://adblockplus.org/>, |
| 2 # Copyright (C) 2006-present eyeo GmbH | 2 # Copyright (C) 2006-present eyeo GmbH |
| 3 # | 3 # |
| 4 # Adblock Plus is free software: you can redistribute it and/or modify | 4 # Adblock Plus is free software: you can redistribute it and/or modify |
| 5 # it under the terms of the GNU General Public License version 3 as | 5 # it under the terms of the GNU General Public License version 3 as |
| 6 # published by the Free Software Foundation. | 6 # published by the Free Software Foundation. |
| 7 # | 7 # |
| 8 # Adblock Plus is distributed in the hope that it will be useful, | 8 # Adblock Plus is distributed in the hope that it will be useful, |
| 9 # but WITHOUT ANY WARRANTY; without even the implied warranty of | 9 # but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | 10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 11 # GNU General Public License for more details. | 11 # GNU General Public License for more details. |
| 12 # | 12 # |
| 13 # You should have received a copy of the GNU General Public License | 13 # You should have received a copy of the GNU General Public License |
| 14 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. | 14 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. |
| 15 | 15 |
| 16 """Parser for ABP filterlist format.""" | |
| 17 | |
| 16 from __future__ import unicode_literals | 18 from __future__ import unicode_literals |
| 17 | 19 |
| 18 import re | 20 import re |
| 19 from collections import namedtuple | 21 from collections import namedtuple |
| 20 | 22 |
| 21 __all__ = [ | 23 __all__ = [ |
| 22 'FILTER_ACTION', | 24 'FILTER_ACTION', |
| 23 'FILTER_OPTION', | 25 'FILTER_OPTION', |
| 26 'SELECTOR_TYPE', | |
| 24 'ParseError', | 27 'ParseError', |
| 25 'SELECTOR_TYPE', | |
| 26 'parse_filterlist', | 28 'parse_filterlist', |
| 27 'parse_line', | 29 'parse_line', |
| 28 ] | 30 ] |
| 29 | 31 |
| 30 | 32 |
| 31 class ParseError(Exception): | 33 class ParseError(Exception): |
| 32 """Exception thrown by the parser when it encounters invalid input. | 34 """Exception thrown by the parser when it encounters invalid input. |
| 33 | 35 |
| 34 :param error: Description of the error. | 36 Parameters |
| 35 :param text: The text which was being parsed when an error occurred. | 37 ---------- |
| 38 error : str | |
| 39 Description of the error. | |
| 40 text : str | |
| 41 The source text that caused an error. | |
| 42 | |
|
Vasily Kuznetsov
2017/10/24 16:11:00
pep8-docstrings demands an empty line at the end o
| |
| 36 """ | 43 """ |
| 37 | 44 |
| 38 def __init__(self, error, text): | 45 def __init__(self, error, text): |
| 39 Exception.__init__(self, '{} in "{}"'.format(error, text)) | 46 Exception.__init__(self, '{} in "{}"'.format(error, text)) |
| 40 self.text = text | 47 self.text = text |
| 41 self.error = error | 48 self.error = error |
| 42 | 49 |
| 43 | 50 |
| 44 # Constants related to filters (see https://adblockplus.org/filters). | 51 # Constants related to filters (see https://adblockplus.org/filters). |
| 45 class SELECTOR_TYPE: # flake8: noqa (This class is an enumeration constant). | 52 class SELECTOR_TYPE: # flake8: noqa (this is a namespace of constants). |
| 46 """Selector types""" | 53 """Selector type constants.""" |
| 47 URL_PATTERN = 'url-pattern' # Normal URL patterns. | 54 URL_PATTERN = 'url-pattern' # Normal URL patterns. |
| 48 URL_REGEXP = 'url-regexp' # Regular expressions for URLs. | 55 URL_REGEXP = 'url-regexp' # Regular expressions for URLs. |
| 49 CSS = 'css' # CSS selectors for hiding filters. | 56 CSS = 'css' # CSS selectors for hiding filters. |
| 50 XCSS = 'extended-css' # Extended CSS selectors (to emulate CSS4). | 57 XCSS = 'extended-css' # Extended CSS selectors (to emulate CSS4). |
| 51 ABP_SIMPLE = 'abp-simple' # Simplified element hiding syntax. | 58 ABP_SIMPLE = 'abp-simple' # Simplified element hiding syntax. |
| 52 | 59 |
| 53 | 60 |
| 54 class FILTER_ACTION: # flake8: noqa (This class is an enumeration constant). | 61 class FILTER_ACTION: # flake8: noqa (this is a namespace of constants). |
| 55 """Filter actions""" | 62 """Filter action constants.""" |
| 56 BLOCK = 'block' # Block the request. | 63 BLOCK = 'block' # Block the request. |
| 57 ALLOW = 'allow' # Allow the request (whitelist). | 64 ALLOW = 'allow' # Allow the request (whitelist). |
| 58 HIDE = 'hide' # Hide selected element(s). | 65 HIDE = 'hide' # Hide selected element(s). |
| 59 SHOW = 'show' # Show selected element(s) (whitelist). | 66 SHOW = 'show' # Show selected element(s) (whitelist). |
| 60 | 67 |
| 61 | 68 |
| 62 class FILTER_OPTION: # flake8: noqa (This class is an enumeration constant). | 69 class FILTER_OPTION: # flake8: noqa (this is a namespace of constants). |
| 63 """Filter options""" | 70 """Filter option constants.""" |
| 64 # Resource types. | 71 # Resource types. |
| 65 OTHER = 'other' | 72 OTHER = 'other' |
| 66 SCRIPT = 'script' | 73 SCRIPT = 'script' |
| 67 IMAGE = 'image' | 74 IMAGE = 'image' |
| 68 STYLESHEET = 'stylesheet' | 75 STYLESHEET = 'stylesheet' |
| 69 OBJECT = 'object' | 76 OBJECT = 'object' |
| 70 SUBDOCUMENT = 'subdocument' | 77 SUBDOCUMENT = 'subdocument' |
| 71 DOCUMENT = 'document' | 78 DOCUMENT = 'document' |
| 72 WEBSOCKET = 'websocket' | 79 WEBSOCKET = 'websocket' |
| 73 WEBRTC = 'webrtc' | 80 WEBRTC = 'webrtc' |
| (...skipping 17 matching lines...) Expand all Loading... | |
| 91 DOMAIN = 'domain' | 98 DOMAIN = 'domain' |
| 92 THIRD_PARTY = 'third-party' | 99 THIRD_PARTY = 'third-party' |
| 93 COLLAPSE = 'collapse' | 100 COLLAPSE = 'collapse' |
| 94 SITEKEY = 'sitekey' | 101 SITEKEY = 'sitekey' |
| 95 DONOTTRACK = 'donottrack' | 102 DONOTTRACK = 'donottrack' |
| 96 | 103 |
| 97 | 104 |
| 98 def _line_type(name, field_names, format_string): | 105 def _line_type(name, field_names, format_string): |
| 99 """Define a line type. | 106 """Define a line type. |
| 100 | 107 |
| 101 :param name: The name of the line type to define. | 108 Parameters |
| 102 :param field_names: A sequence of field names or one space-separated | 109 ---------- |
| 103 string that contains all field names. | 110 name: str |
| 104 :param format_string: A format specifier for converting this line type | 111 The name of the line type to define. |
| 105 back to string representation. | 112 field_names: str or list |
| 106 :returns: Class created with `namedtuple` that has `.type` set to | 113 A sequence of field names or one space-separated string that contains |
| 107 lowercased `name` and supports conversion back to string with | 114 all field names. |
| 108 `.to_string()` method. | 115 format_string: str |
| 116 A format specifier for converting this line type back to string | |
| 117 representation. | |
| 118 | |
| 119 Returns | |
| 120 ------- | |
| 121 class | |
| 122 Class created with `namedtuple` that has `.type` set to lowercased | |
| 123 `name` and supports conversion back to string with `.to_string()` | |
| 124 method. | |
| 125 | |
| 109 """ | 126 """ |
| 110 lt = namedtuple(name, field_names) | 127 lt = namedtuple(name, field_names) |
| 111 lt.type = name.lower() | 128 lt.type = name.lower() |
| 112 lt.to_string = lambda self: format_string.format(self) | 129 lt.to_string = lambda self: format_string.format(self) |
| 113 return lt | 130 return lt |
| 114 | 131 |
| 115 | 132 |
| 116 Header = _line_type('Header', 'version', '[{.version}]') | 133 Header = _line_type('Header', 'version', '[{.version}]') |
| 117 EmptyLine = _line_type('EmptyLine', '', '') | 134 EmptyLine = _line_type('EmptyLine', '', '') |
| 118 Comment = _line_type('Comment', 'text', '! {.text}') | 135 Comment = _line_type('Comment', 'text', '! {.text}') |
| (...skipping 97 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 216 if domain: | 233 if domain: |
| 217 domains = [_parse_option(d) for d in domain.split(',')] | 234 domains = [_parse_option(d) for d in domain.split(',')] |
| 218 options.append((FILTER_OPTION.DOMAIN, domains)) | 235 options.append((FILTER_OPTION.DOMAIN, domains)) |
| 219 | 236 |
| 220 return Filter(text, selector, action, options) | 237 return Filter(text, selector, action, options) |
| 221 | 238 |
| 222 | 239 |
| 223 def parse_filter(text): | 240 def parse_filter(text): |
| 224 """Parse one filter. | 241 """Parse one filter. |
| 225 | 242 |
| 226 :param text: Text representation of a filter. | 243 Parameters |
| 227 :returns: Filter object. | 244 ---------- |
| 245 text : str | |
| 246 Filter to parse in ABP filter list syntax. | |
| 247 | |
| 248 Returns | |
| 249 ------- | |
| 250 namedtuple | |
| 251 Parsed filter. | |
| 252 | |
| 228 """ | 253 """ |
| 229 if '#' in text: | 254 if '#' in text: |
| 230 match = HIDING_FILTER_REGEXP.search(text) | 255 match = HIDING_FILTER_REGEXP.search(text) |
| 231 if match: | 256 if match: |
| 232 return _parse_hiding_filter(text, *match.groups()) | 257 return _parse_hiding_filter(text, *match.groups()) |
| 233 return _parse_blocking_filter(text) | 258 return _parse_blocking_filter(text) |
| 234 | 259 |
| 235 | 260 |
| 236 def parse_line(line_text): | 261 def parse_line(line_text): |
| 237 """Parse one line of a filter list. | 262 """Parse one line of a filter list. |
| 238 | 263 |
| 239 :param line_text: Line of a filter list (must be a unicode string). | 264 Parameters |
| 240 :returns: Parsed line object (see `_line_type`). | 265 ---------- |
| 241 :raises ParseError: If the line can't be successfully parsed. | 266 line_text : str |
| 267 Line of a filter list. | |
| 268 | |
| 269 Returns | |
| 270 ------- | |
| 271 namedtuple | |
| 272 Parsed line (see `_line_type`). | |
| 273 | |
| 274 Raises | |
| 275 ------ | |
| 276 ParseError | |
| 277 ParseError: If the line can't be parsed. | |
| 242 """ | 278 """ |
| 243 content = line_text.strip() | 279 content = line_text.strip() |
| 244 | 280 |
| 245 if content == '': | 281 if content == '': |
| 246 line = EmptyLine() | 282 line = EmptyLine() |
| 247 elif content.startswith('!'): | 283 elif content.startswith('!'): |
| 248 line = _parse_comment(content) | 284 line = _parse_comment(content) |
| 249 elif content.startswith('%') and content.endswith('%'): | 285 elif content.startswith('%') and content.endswith('%'): |
| 250 line = _parse_instruction(content) | 286 line = _parse_instruction(content) |
| 251 elif content.startswith('[') and content.endswith(']'): | 287 elif content.startswith('[') and content.endswith(']'): |
| 252 line = _parse_header(content) | 288 line = _parse_header(content) |
| 253 else: | 289 else: |
| 254 line = parse_filter(content) | 290 line = parse_filter(content) |
| 255 | 291 |
| 256 assert line.to_string().replace(' ', '') == content.replace(' ', '') | 292 assert line.to_string().replace(' ', '') == content.replace(' ', '') |
| 257 return line | 293 return line |
| 258 | 294 |
| 259 | 295 |
| 260 def parse_filterlist(lines): | 296 def parse_filterlist(lines): |
| 261 """Parse filter list from an iterable. | 297 """Parse filter list from an iterable. |
| 262 | 298 |
| 263 :param lines: List of strings or file or other iterable. | 299 Parameters |
| 264 :returns: Iterator over parsed lines. | 300 ---------- |
| 265 :raises ParseError: Can be thrown during iteration for invalid lines. | 301 lines: iterable of str |
| 302 Lines of the filter list. | |
| 303 | |
| 304 Returns | |
| 305 ------- | |
| 306 iterator of namedtuple | |
| 307 Parsed lines of the filter list. | |
| 308 | |
| 309 Raises | |
| 310 ------ | |
| 311 ParseError | |
| 312 Thrown during iteration for invalid filter list lines. | |
| 313 TypeError | |
|
Vasily Kuznetsov
2017/10/24 16:11:00
As suggested by Matze, this error is easy to fores
| |
| 314 If `lines` is not iterable. | |
| 315 | |
| 266 """ | 316 """ |
| 267 for line in lines: | 317 for line in lines: |
| 268 yield parse_line(line) | 318 yield parse_line(line) |
| LEFT | RIGHT |