OLD | NEW |
1 # This file is part of Adblock Plus <https://adblockplus.org/>, | 1 # This file is part of Adblock Plus <https://adblockplus.org/>, |
2 # Copyright (C) 2006-2017 eyeo GmbH | 2 # Copyright (C) 2006-2017 eyeo GmbH |
3 # | 3 # |
4 # Adblock Plus is free software: you can redistribute it and/or modify | 4 # Adblock Plus is free software: you can redistribute it and/or modify |
5 # it under the terms of the GNU General Public License version 3 as | 5 # it under the terms of the GNU General Public License version 3 as |
6 # published by the Free Software Foundation. | 6 # published by the Free Software Foundation. |
7 # | 7 # |
8 # Adblock Plus is distributed in the hope that it will be useful, | 8 # Adblock Plus is distributed in the hope that it will be useful, |
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of | 9 # but WITHOUT ANY WARRANTY; without even the implied warranty of |
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | 10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
11 # GNU General Public License for more details. | 11 # GNU General Public License for more details. |
12 # | 12 # |
13 # You should have received a copy of the GNU General Public License | 13 # You should have received a copy of the GNU General Public License |
14 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. | 14 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. |
15 | 15 |
| 16 """Parser for ABP filterlist format.""" |
| 17 |
16 from __future__ import unicode_literals | 18 from __future__ import unicode_literals |
17 | 19 |
18 import re | 20 import re |
19 from collections import namedtuple | 21 from collections import namedtuple |
20 | 22 |
21 __all__ = [ | 23 __all__ = [ |
22 'FILTER_ACTION', | 24 'FILTER_ACTION', |
23 'FILTER_OPTION', | 25 'FILTER_OPTION', |
| 26 'SELECTOR_TYPE', |
24 'ParseError', | 27 'ParseError', |
25 'SELECTOR_TYPE', | |
26 'parse_filterlist', | 28 'parse_filterlist', |
27 'parse_line', | 29 'parse_line', |
28 ] | 30 ] |
29 | 31 |
30 | 32 |
31 class ParseError(Exception): | 33 class ParseError(Exception): |
32 """Exception thrown by the parser when it encounters invalid input. | 34 """Exception thrown by the parser when it encounters invalid input. |
33 | 35 |
34 :param error: Description of the error. | 36 Parameters |
35 :param text: The text which was being parsed when an error occurred. | 37 ---------- |
| 38 error : str |
| 39 Description of the error. |
| 40 text : str |
| 41 The source text that caused an error. |
36 """ | 42 """ |
37 | 43 |
38 def __init__(self, error, text): | 44 def __init__(self, error, text): |
39 Exception.__init__(self, '{} in "{}"'.format(error, text)) | 45 Exception.__init__(self, '{} in "{}"'.format(error, text)) |
40 self.text = text | 46 self.text = text |
41 self.error = error | 47 self.error = error |
42 | 48 |
43 | 49 |
44 # Constants related to filters (see https://adblockplus.org/filters). | 50 # Constants related to filters (see https://adblockplus.org/filters). |
45 class SELECTOR_TYPE: # flake8: noqa (This class is an enumeration constant). | 51 class SELECTOR_TYPE: # flake8: noqa (this is a namespace of constants). |
46 """Selector types""" | 52 """Selector type constants.""" |
47 URL_PATTERN = 'url-pattern' # Normal URL patterns. | 53 URL_PATTERN = 'url-pattern' # Normal URL patterns. |
48 URL_REGEXP = 'url-regexp' # Regular expressions for URLs. | 54 URL_REGEXP = 'url-regexp' # Regular expressions for URLs. |
49 CSS = 'css' # CSS selectors for hiding filters. | 55 CSS = 'css' # CSS selectors for hiding filters. |
50 XCSS = 'extended-css' # Extended CSS selectors (to emulate CSS4). | 56 XCSS = 'extended-css' # Extended CSS selectors (to emulate CSS4). |
51 ABP_SIMPLE = 'abp-simple' # Simplified element hiding syntax. | 57 ABP_SIMPLE = 'abp-simple' # Simplified element hiding syntax. |
52 | 58 |
53 | 59 |
54 class FILTER_ACTION: # flake8: noqa (This class is an enumeration constant). | 60 class FILTER_ACTION: # flake8: noqa (this is a namespace of constants). |
55 """Filter actions""" | 61 """Filter action constants.""" |
56 BLOCK = 'block' # Block the request. | 62 BLOCK = 'block' # Block the request. |
57 ALLOW = 'allow' # Allow the request (whitelist). | 63 ALLOW = 'allow' # Allow the request (whitelist). |
58 HIDE = 'hide' # Hide selected element(s). | 64 HIDE = 'hide' # Hide selected element(s). |
59 SHOW = 'show' # Show selected element(s) (whitelist). | 65 SHOW = 'show' # Show selected element(s) (whitelist). |
60 | 66 |
61 | 67 |
62 class FILTER_OPTION: # flake8: noqa (This class is an enumeration constant). | 68 class FILTER_OPTION: # flake8: noqa (this is a namespace of constants). |
63 """Filter options""" | 69 """Filter option constants.""" |
64 # Resource types. | 70 # Resource types. |
65 OTHER = 'other' | 71 OTHER = 'other' |
66 SCRIPT = 'script' | 72 SCRIPT = 'script' |
67 IMAGE = 'image' | 73 IMAGE = 'image' |
68 STYLESHEET = 'stylesheet' | 74 STYLESHEET = 'stylesheet' |
69 OBJECT = 'object' | 75 OBJECT = 'object' |
70 SUBDOCUMENT = 'subdocument' | 76 SUBDOCUMENT = 'subdocument' |
71 DOCUMENT = 'document' | 77 DOCUMENT = 'document' |
72 WEBSOCKET = 'websocket' | 78 WEBSOCKET = 'websocket' |
73 WEBRTC = 'webrtc' | 79 WEBRTC = 'webrtc' |
(...skipping 17 matching lines...) Expand all Loading... |
91 DOMAIN = 'domain' | 97 DOMAIN = 'domain' |
92 THIRD_PARTY = 'third-party' | 98 THIRD_PARTY = 'third-party' |
93 COLLAPSE = 'collapse' | 99 COLLAPSE = 'collapse' |
94 SITEKEY = 'sitekey' | 100 SITEKEY = 'sitekey' |
95 DONOTTRACK = 'donottrack' | 101 DONOTTRACK = 'donottrack' |
96 | 102 |
97 | 103 |
98 def _line_type(name, field_names, format_string): | 104 def _line_type(name, field_names, format_string): |
99 """Define a line type. | 105 """Define a line type. |
100 | 106 |
101 :param name: The name of the line type to define. | 107 Parameters |
102 :param field_names: A sequence of field names or one space-separated | 108 ---------- |
103 string that contains all field names. | 109 name: str |
104 :param format_string: A format specifier for converting this line type | 110 The name of the line type to define. |
105 back to string representation. | 111 field_names: str or list |
106 :returns: Class created with `namedtuple` that has `.type` set to | 112 A sequence of field names or one space-separated string that contains |
107 lowercased `name` and supports conversion back to string with | 113 all field names. |
108 `.to_string()` method. | 114 format_string: str |
| 115 A format specifier for converting this line type back to string |
| 116 representation. |
| 117 |
| 118 Returns |
| 119 ------- |
| 120 class |
| 121 Class created with `namedtuple` that has `.type` set to lowercased |
| 122 `name` and supports conversion back to string with `.to_string()` |
| 123 method. |
109 """ | 124 """ |
110 lt = namedtuple(name, field_names) | 125 lt = namedtuple(name, field_names) |
111 lt.type = name.lower() | 126 lt.type = name.lower() |
112 lt.to_string = lambda self: format_string.format(self) | 127 lt.to_string = lambda self: format_string.format(self) |
113 return lt | 128 return lt |
114 | 129 |
115 | 130 |
116 Header = _line_type('Header', 'version', '[{.version}]') | 131 Header = _line_type('Header', 'version', '[{.version}]') |
117 EmptyLine = _line_type('EmptyLine', '', '') | 132 EmptyLine = _line_type('EmptyLine', '', '') |
118 Comment = _line_type('Comment', 'text', '! {.text}') | 133 Comment = _line_type('Comment', 'text', '! {.text}') |
(...skipping 97 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
216 if domain: | 231 if domain: |
217 domains = [_parse_option(d) for d in domain.split(',')] | 232 domains = [_parse_option(d) for d in domain.split(',')] |
218 options.append((FILTER_OPTION.DOMAIN, domains)) | 233 options.append((FILTER_OPTION.DOMAIN, domains)) |
219 | 234 |
220 return Filter(text, selector, action, options) | 235 return Filter(text, selector, action, options) |
221 | 236 |
222 | 237 |
223 def parse_filter(text): | 238 def parse_filter(text): |
224 """Parse one filter. | 239 """Parse one filter. |
225 | 240 |
226 :param text: Text representation of a filter. | 241 Parameters |
227 :returns: Filter object. | 242 ---------- |
| 243 text : str |
| 244 Filter to parse in ABP filter list syntax. |
| 245 |
| 246 Returns |
| 247 ------- |
| 248 namedtuple |
| 249 Parsed filter. |
228 """ | 250 """ |
229 if '#' in text: | 251 if '#' in text: |
230 match = HIDING_FILTER_REGEXP.search(text) | 252 match = HIDING_FILTER_REGEXP.search(text) |
231 if match: | 253 if match: |
232 return _parse_hiding_filter(text, *match.groups()) | 254 return _parse_hiding_filter(text, *match.groups()) |
233 return _parse_blocking_filter(text) | 255 return _parse_blocking_filter(text) |
234 | 256 |
235 | 257 |
236 def parse_line(line_text): | 258 def parse_line(line_text): |
237 """Parse one line of a filter list. | 259 """Parse one line of a filter list. |
238 | 260 |
239 :param line_text: Line of a filter list (must be a unicode string). | 261 Parameters |
240 :returns: Parsed line object (see `_line_type`). | 262 ---------- |
241 :raises ParseError: If the line can't be successfully parsed. | 263 line_text : str |
| 264 Line of a filter list. |
| 265 |
| 266 Returns |
| 267 ------- |
| 268 namedtuple |
| 269 Parsed line (see `_line_type`). |
| 270 |
| 271 Raises |
| 272 ------ |
| 273 ParseError |
| 274 ParseError: If the line can't be parsed. |
242 """ | 275 """ |
243 content = line_text.strip() | 276 content = line_text.strip() |
244 | 277 |
245 if content == '': | 278 if content == '': |
246 line = EmptyLine() | 279 line = EmptyLine() |
247 elif content.startswith('!'): | 280 elif content.startswith('!'): |
248 line = _parse_comment(content) | 281 line = _parse_comment(content) |
249 elif content.startswith('%') and content.endswith('%'): | 282 elif content.startswith('%') and content.endswith('%'): |
250 line = _parse_instruction(content) | 283 line = _parse_instruction(content) |
251 elif content.startswith('[') and content.endswith(']'): | 284 elif content.startswith('[') and content.endswith(']'): |
252 line = _parse_header(content) | 285 line = _parse_header(content) |
253 else: | 286 else: |
254 line = parse_filter(content) | 287 line = parse_filter(content) |
255 | 288 |
256 assert line.to_string().replace(' ', '') == content.replace(' ', '') | 289 assert line.to_string().replace(' ', '') == content.replace(' ', '') |
257 return line | 290 return line |
258 | 291 |
259 | 292 |
260 def parse_filterlist(lines): | 293 def parse_filterlist(lines): |
261 """Parse filter list from an iterable. | 294 """Parse filter list from an iterable. |
262 | 295 |
263 :param lines: List of strings or file or other iterable. | 296 Parameters |
264 :returns: Iterator over parsed lines. | 297 ---------- |
265 :raises ParseError: Can be thrown during iteration for invalid lines. | 298 lines: iterable of str |
| 299 Lines of the filter list. |
| 300 |
| 301 Returns |
| 302 ------- |
| 303 iterator of namedtuple |
| 304 Parsed lines of the filter list. |
| 305 |
| 306 Raises |
| 307 ------ |
| 308 ParseError |
| 309 Thrown during iteration for invalid filter list lines. |
266 """ | 310 """ |
267 for line in lines: | 311 for line in lines: |
268 yield parse_line(line) | 312 yield parse_line(line) |
OLD | NEW |