| Left: | ||
| Right: | 
| LEFT | RIGHT | 
|---|---|
| (no file at all) | |
| 1 # This file is part of Adblock Plus <https://adblockplus.org/>, | 1 # This file is part of Adblock Plus <https://adblockplus.org/>, | 
| 2 # Copyright (C) 2006-present eyeo GmbH | 2 # Copyright (C) 2006-present eyeo GmbH | 
| 3 # | 3 # | 
| 4 # Adblock Plus is free software: you can redistribute it and/or modify | 4 # Adblock Plus is free software: you can redistribute it and/or modify | 
| 5 # it under the terms of the GNU General Public License version 3 as | 5 # it under the terms of the GNU General Public License version 3 as | 
| 6 # published by the Free Software Foundation. | 6 # published by the Free Software Foundation. | 
| 7 # | 7 # | 
| 8 # Adblock Plus is distributed in the hope that it will be useful, | 8 # Adblock Plus is distributed in the hope that it will be useful, | 
| 9 # but WITHOUT ANY WARRANTY; without even the implied warranty of | 9 # but WITHOUT ANY WARRANTY; without even the implied warranty of | 
| 10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | 10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | 
| 11 # GNU General Public License for more details. | 11 # GNU General Public License for more details. | 
| 12 # | 12 # | 
| 13 # You should have received a copy of the GNU General Public License | 13 # You should have received a copy of the GNU General Public License | 
| 14 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. | 14 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. | 
| 15 | 15 | 
| 16 """Parser for ABP filterlist format.""" | |
| 17 | |
| 16 from __future__ import unicode_literals | 18 from __future__ import unicode_literals | 
| 17 | 19 | 
| 18 import re | 20 import re | 
| 19 from collections import namedtuple | 21 from collections import namedtuple | 
| 20 | 22 | 
| 21 __all__ = [ | 23 __all__ = [ | 
| 22 'FILTER_ACTION', | 24 'FILTER_ACTION', | 
| 23 'FILTER_OPTION', | 25 'FILTER_OPTION', | 
| 26 'SELECTOR_TYPE', | |
| 24 'ParseError', | 27 'ParseError', | 
| 25 'SELECTOR_TYPE', | |
| 26 'parse_filterlist', | 28 'parse_filterlist', | 
| 27 'parse_line', | 29 'parse_line', | 
| 28 ] | 30 ] | 
| 29 | 31 | 
| 30 | 32 | 
| 31 class ParseError(Exception): | 33 class ParseError(Exception): | 
| 32 """Exception thrown by the parser when it encounters invalid input. | 34 """Exception thrown by the parser when it encounters invalid input. | 
| 33 | 35 | 
| 34 :param error: Description of the error. | 36 Parameters | 
| 35 :param text: The text which was being parsed when an error occurred. | 37 ---------- | 
| 38 error : str | |
| 39 Description of the error. | |
| 40 text : str | |
| 41 The source text that caused an error. | |
| 42 | |
| 
 
Vasily Kuznetsov
2017/10/24 16:11:00
pep8-docstrings demands an empty line at the end o
 
 | |
| 36 """ | 43 """ | 
| 37 | 44 | 
| 38 def __init__(self, error, text): | 45 def __init__(self, error, text): | 
| 39 Exception.__init__(self, '{} in "{}"'.format(error, text)) | 46 Exception.__init__(self, '{} in "{}"'.format(error, text)) | 
| 40 self.text = text | 47 self.text = text | 
| 41 self.error = error | 48 self.error = error | 
| 42 | 49 | 
| 43 | 50 | 
| 44 # Constants related to filters (see https://adblockplus.org/filters). | 51 # Constants related to filters (see https://adblockplus.org/filters). | 
| 45 class SELECTOR_TYPE: # flake8: noqa (This class is an enumeration constant). | 52 class SELECTOR_TYPE: # flake8: noqa (this is a namespace of constants). | 
| 46 """Selector types""" | 53 """Selector type constants.""" | 
| 47 URL_PATTERN = 'url-pattern' # Normal URL patterns. | 54 URL_PATTERN = 'url-pattern' # Normal URL patterns. | 
| 48 URL_REGEXP = 'url-regexp' # Regular expressions for URLs. | 55 URL_REGEXP = 'url-regexp' # Regular expressions for URLs. | 
| 49 CSS = 'css' # CSS selectors for hiding filters. | 56 CSS = 'css' # CSS selectors for hiding filters. | 
| 50 XCSS = 'extended-css' # Extended CSS selectors (to emulate CSS4). | 57 XCSS = 'extended-css' # Extended CSS selectors (to emulate CSS4). | 
| 51 ABP_SIMPLE = 'abp-simple' # Simplified element hiding syntax. | 58 ABP_SIMPLE = 'abp-simple' # Simplified element hiding syntax. | 
| 52 | 59 | 
| 53 | 60 | 
| 54 class FILTER_ACTION: # flake8: noqa (This class is an enumeration constant). | 61 class FILTER_ACTION: # flake8: noqa (this is a namespace of constants). | 
| 55 """Filter actions""" | 62 """Filter action constants.""" | 
| 56 BLOCK = 'block' # Block the request. | 63 BLOCK = 'block' # Block the request. | 
| 57 ALLOW = 'allow' # Allow the request (whitelist). | 64 ALLOW = 'allow' # Allow the request (whitelist). | 
| 58 HIDE = 'hide' # Hide selected element(s). | 65 HIDE = 'hide' # Hide selected element(s). | 
| 59 SHOW = 'show' # Show selected element(s) (whitelist). | 66 SHOW = 'show' # Show selected element(s) (whitelist). | 
| 60 | 67 | 
| 61 | 68 | 
| 62 class FILTER_OPTION: # flake8: noqa (This class is an enumeration constant). | 69 class FILTER_OPTION: # flake8: noqa (this is a namespace of constants). | 
| 63 """Filter options""" | 70 """Filter option constants.""" | 
| 64 # Resource types. | 71 # Resource types. | 
| 65 OTHER = 'other' | 72 OTHER = 'other' | 
| 66 SCRIPT = 'script' | 73 SCRIPT = 'script' | 
| 67 IMAGE = 'image' | 74 IMAGE = 'image' | 
| 68 STYLESHEET = 'stylesheet' | 75 STYLESHEET = 'stylesheet' | 
| 69 OBJECT = 'object' | 76 OBJECT = 'object' | 
| 70 SUBDOCUMENT = 'subdocument' | 77 SUBDOCUMENT = 'subdocument' | 
| 71 DOCUMENT = 'document' | 78 DOCUMENT = 'document' | 
| 72 WEBSOCKET = 'websocket' | 79 WEBSOCKET = 'websocket' | 
| 73 WEBRTC = 'webrtc' | 80 WEBRTC = 'webrtc' | 
| (...skipping 17 matching lines...) Expand all Loading... | |
| 91 DOMAIN = 'domain' | 98 DOMAIN = 'domain' | 
| 92 THIRD_PARTY = 'third-party' | 99 THIRD_PARTY = 'third-party' | 
| 93 COLLAPSE = 'collapse' | 100 COLLAPSE = 'collapse' | 
| 94 SITEKEY = 'sitekey' | 101 SITEKEY = 'sitekey' | 
| 95 DONOTTRACK = 'donottrack' | 102 DONOTTRACK = 'donottrack' | 
| 96 | 103 | 
| 97 | 104 | 
| 98 def _line_type(name, field_names, format_string): | 105 def _line_type(name, field_names, format_string): | 
| 99 """Define a line type. | 106 """Define a line type. | 
| 100 | 107 | 
| 101 :param name: The name of the line type to define. | 108 Parameters | 
| 102 :param field_names: A sequence of field names or one space-separated | 109 ---------- | 
| 103 string that contains all field names. | 110 name: str | 
| 104 :param format_string: A format specifier for converting this line type | 111 The name of the line type to define. | 
| 105 back to string representation. | 112 field_names: str or list | 
| 106 :returns: Class created with `namedtuple` that has `.type` set to | 113 A sequence of field names or one space-separated string that contains | 
| 107 lowercased `name` and supports conversion back to string with | 114 all field names. | 
| 108 `.to_string()` method. | 115 format_string: str | 
| 116 A format specifier for converting this line type back to string | |
| 117 representation. | |
| 118 | |
| 119 Returns | |
| 120 ------- | |
| 121 class | |
| 122 Class created with `namedtuple` that has `.type` set to lowercased | |
| 123 `name` and supports conversion back to string with `.to_string()` | |
| 124 method. | |
| 125 | |
| 109 """ | 126 """ | 
| 110 lt = namedtuple(name, field_names) | 127 lt = namedtuple(name, field_names) | 
| 111 lt.type = name.lower() | 128 lt.type = name.lower() | 
| 112 lt.to_string = lambda self: format_string.format(self) | 129 lt.to_string = lambda self: format_string.format(self) | 
| 113 return lt | 130 return lt | 
| 114 | 131 | 
| 115 | 132 | 
| 116 Header = _line_type('Header', 'version', '[{.version}]') | 133 Header = _line_type('Header', 'version', '[{.version}]') | 
| 117 EmptyLine = _line_type('EmptyLine', '', '') | 134 EmptyLine = _line_type('EmptyLine', '', '') | 
| 118 Comment = _line_type('Comment', 'text', '! {.text}') | 135 Comment = _line_type('Comment', 'text', '! {.text}') | 
| (...skipping 97 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 216 if domain: | 233 if domain: | 
| 217 domains = [_parse_option(d) for d in domain.split(',')] | 234 domains = [_parse_option(d) for d in domain.split(',')] | 
| 218 options.append((FILTER_OPTION.DOMAIN, domains)) | 235 options.append((FILTER_OPTION.DOMAIN, domains)) | 
| 219 | 236 | 
| 220 return Filter(text, selector, action, options) | 237 return Filter(text, selector, action, options) | 
| 221 | 238 | 
| 222 | 239 | 
| 223 def parse_filter(text): | 240 def parse_filter(text): | 
| 224 """Parse one filter. | 241 """Parse one filter. | 
| 225 | 242 | 
| 226 :param text: Text representation of a filter. | 243 Parameters | 
| 227 :returns: Filter object. | 244 ---------- | 
| 245 text : str | |
| 246 Filter to parse in ABP filter list syntax. | |
| 247 | |
| 248 Returns | |
| 249 ------- | |
| 250 namedtuple | |
| 251 Parsed filter. | |
| 252 | |
| 228 """ | 253 """ | 
| 229 if '#' in text: | 254 if '#' in text: | 
| 230 match = HIDING_FILTER_REGEXP.search(text) | 255 match = HIDING_FILTER_REGEXP.search(text) | 
| 231 if match: | 256 if match: | 
| 232 return _parse_hiding_filter(text, *match.groups()) | 257 return _parse_hiding_filter(text, *match.groups()) | 
| 233 return _parse_blocking_filter(text) | 258 return _parse_blocking_filter(text) | 
| 234 | 259 | 
| 235 | 260 | 
| 236 def parse_line(line_text): | 261 def parse_line(line_text): | 
| 237 """Parse one line of a filter list. | 262 """Parse one line of a filter list. | 
| 238 | 263 | 
| 239 :param line_text: Line of a filter list (must be a unicode string). | 264 Parameters | 
| 240 :returns: Parsed line object (see `_line_type`). | 265 ---------- | 
| 241 :raises ParseError: If the line can't be successfully parsed. | 266 line_text : str | 
| 267 Line of a filter list. | |
| 268 | |
| 269 Returns | |
| 270 ------- | |
| 271 namedtuple | |
| 272 Parsed line (see `_line_type`). | |
| 273 | |
| 274 Raises | |
| 275 ------ | |
| 276 ParseError | |
| 277 ParseError: If the line can't be parsed. | |
| 242 """ | 278 """ | 
| 243 content = line_text.strip() | 279 content = line_text.strip() | 
| 244 | 280 | 
| 245 if content == '': | 281 if content == '': | 
| 246 line = EmptyLine() | 282 line = EmptyLine() | 
| 247 elif content.startswith('!'): | 283 elif content.startswith('!'): | 
| 248 line = _parse_comment(content) | 284 line = _parse_comment(content) | 
| 249 elif content.startswith('%') and content.endswith('%'): | 285 elif content.startswith('%') and content.endswith('%'): | 
| 250 line = _parse_instruction(content) | 286 line = _parse_instruction(content) | 
| 251 elif content.startswith('[') and content.endswith(']'): | 287 elif content.startswith('[') and content.endswith(']'): | 
| 252 line = _parse_header(content) | 288 line = _parse_header(content) | 
| 253 else: | 289 else: | 
| 254 line = parse_filter(content) | 290 line = parse_filter(content) | 
| 255 | 291 | 
| 256 assert line.to_string().replace(' ', '') == content.replace(' ', '') | 292 assert line.to_string().replace(' ', '') == content.replace(' ', '') | 
| 257 return line | 293 return line | 
| 258 | 294 | 
| 259 | 295 | 
| 260 def parse_filterlist(lines): | 296 def parse_filterlist(lines): | 
| 261 """Parse filter list from an iterable. | 297 """Parse filter list from an iterable. | 
| 262 | 298 | 
| 263 :param lines: List of strings or file or other iterable. | 299 Parameters | 
| 264 :returns: Iterator over parsed lines. | 300 ---------- | 
| 265 :raises ParseError: Can be thrown during iteration for invalid lines. | 301 lines: iterable of str | 
| 302 Lines of the filter list. | |
| 303 | |
| 304 Returns | |
| 305 ------- | |
| 306 iterator of namedtuple | |
| 307 Parsed lines of the filter list. | |
| 308 | |
| 309 Raises | |
| 310 ------ | |
| 311 ParseError | |
| 312 Thrown during iteration for invalid filter list lines. | |
| 313 TypeError | |
| 
 
Vasily Kuznetsov
2017/10/24 16:11:00
As suggested by Matze, this error is easy to fores
 
 | |
| 314 If `lines` is not iterable. | |
| 315 | |
| 266 """ | 316 """ | 
| 267 for line in lines: | 317 for line in lines: | 
| 268 yield parse_line(line) | 318 yield parse_line(line) | 
| LEFT | RIGHT |