Left: | ||
Right: |
OLD | NEW |
---|---|
1 # This file is part of Adblock Plus <https://adblockplus.org/>, | 1 # This file is part of Adblock Plus <https://adblockplus.org/>, |
2 # Copyright (C) 2006-present eyeo GmbH | 2 # Copyright (C) 2006-present eyeo GmbH |
3 # | 3 # |
4 # Adblock Plus is free software: you can redistribute it and/or modify | 4 # Adblock Plus is free software: you can redistribute it and/or modify |
5 # it under the terms of the GNU General Public License version 3 as | 5 # it under the terms of the GNU General Public License version 3 as |
6 # published by the Free Software Foundation. | 6 # published by the Free Software Foundation. |
7 # | 7 # |
8 # Adblock Plus is distributed in the hope that it will be useful, | 8 # Adblock Plus is distributed in the hope that it will be useful, |
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of | 9 # but WITHOUT ANY WARRANTY; without even the implied warranty of |
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | 10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
11 # GNU General Public License for more details. | 11 # GNU General Public License for more details. |
12 # | 12 # |
13 # You should have received a copy of the GNU General Public License | 13 # You should have received a copy of the GNU General Public License |
14 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. | 14 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. |
15 | 15 |
16 """Parser for ABP filterlist format.""" | |
17 | |
16 from __future__ import unicode_literals | 18 from __future__ import unicode_literals |
17 | 19 |
18 import re | 20 import re |
19 from collections import namedtuple | 21 from collections import namedtuple |
20 | 22 |
21 __all__ = [ | 23 __all__ = [ |
22 'FILTER_ACTION', | 24 'FILTER_ACTION', |
23 'FILTER_OPTION', | 25 'FILTER_OPTION', |
26 'SELECTOR_TYPE', | |
24 'ParseError', | 27 'ParseError', |
25 'SELECTOR_TYPE', | |
26 'parse_filterlist', | 28 'parse_filterlist', |
27 'parse_line', | 29 'parse_line', |
28 ] | 30 ] |
29 | 31 |
30 | 32 |
31 class ParseError(Exception): | 33 class ParseError(Exception): |
32 """Exception thrown by the parser when it encounters invalid input. | 34 """Exception thrown by the parser when it encounters invalid input. |
33 | 35 |
34 :param error: Description of the error. | 36 Parameters |
35 :param text: The text which was being parsed when an error occurred. | 37 ---------- |
38 error : str | |
39 Description of the error. | |
40 text : str | |
41 The source text that caused an error. | |
42 | |
Vasily Kuznetsov
2017/10/24 16:11:00
pep8-docstrings demands an empty line at the end o
| |
36 """ | 43 """ |
37 | 44 |
38 def __init__(self, error, text): | 45 def __init__(self, error, text): |
39 Exception.__init__(self, '{} in "{}"'.format(error, text)) | 46 Exception.__init__(self, '{} in "{}"'.format(error, text)) |
40 self.text = text | 47 self.text = text |
41 self.error = error | 48 self.error = error |
42 | 49 |
43 | 50 |
44 # Constants related to filters (see https://adblockplus.org/filters). | 51 # Constants related to filters (see https://adblockplus.org/filters). |
45 class SELECTOR_TYPE: # flake8: noqa (This class is an enumeration constant). | 52 class SELECTOR_TYPE: # flake8: noqa (this is a namespace of constants). |
46 """Selector types""" | 53 """Selector type constants.""" |
47 URL_PATTERN = 'url-pattern' # Normal URL patterns. | 54 URL_PATTERN = 'url-pattern' # Normal URL patterns. |
48 URL_REGEXP = 'url-regexp' # Regular expressions for URLs. | 55 URL_REGEXP = 'url-regexp' # Regular expressions for URLs. |
49 CSS = 'css' # CSS selectors for hiding filters. | 56 CSS = 'css' # CSS selectors for hiding filters. |
50 XCSS = 'extended-css' # Extended CSS selectors (to emulate CSS4). | 57 XCSS = 'extended-css' # Extended CSS selectors (to emulate CSS4). |
51 ABP_SIMPLE = 'abp-simple' # Simplified element hiding syntax. | 58 ABP_SIMPLE = 'abp-simple' # Simplified element hiding syntax. |
52 | 59 |
53 | 60 |
54 class FILTER_ACTION: # flake8: noqa (This class is an enumeration constant). | 61 class FILTER_ACTION: # flake8: noqa (this is a namespace of constants). |
55 """Filter actions""" | 62 """Filter action constants.""" |
56 BLOCK = 'block' # Block the request. | 63 BLOCK = 'block' # Block the request. |
57 ALLOW = 'allow' # Allow the request (whitelist). | 64 ALLOW = 'allow' # Allow the request (whitelist). |
58 HIDE = 'hide' # Hide selected element(s). | 65 HIDE = 'hide' # Hide selected element(s). |
59 SHOW = 'show' # Show selected element(s) (whitelist). | 66 SHOW = 'show' # Show selected element(s) (whitelist). |
60 | 67 |
61 | 68 |
62 class FILTER_OPTION: # flake8: noqa (This class is an enumeration constant). | 69 class FILTER_OPTION: # flake8: noqa (this is a namespace of constants). |
63 """Filter options""" | 70 """Filter option constants.""" |
64 # Resource types. | 71 # Resource types. |
65 OTHER = 'other' | 72 OTHER = 'other' |
66 SCRIPT = 'script' | 73 SCRIPT = 'script' |
67 IMAGE = 'image' | 74 IMAGE = 'image' |
68 STYLESHEET = 'stylesheet' | 75 STYLESHEET = 'stylesheet' |
69 OBJECT = 'object' | 76 OBJECT = 'object' |
70 SUBDOCUMENT = 'subdocument' | 77 SUBDOCUMENT = 'subdocument' |
71 DOCUMENT = 'document' | 78 DOCUMENT = 'document' |
72 WEBSOCKET = 'websocket' | 79 WEBSOCKET = 'websocket' |
73 WEBRTC = 'webrtc' | 80 WEBRTC = 'webrtc' |
(...skipping 17 matching lines...) Expand all Loading... | |
91 DOMAIN = 'domain' | 98 DOMAIN = 'domain' |
92 THIRD_PARTY = 'third-party' | 99 THIRD_PARTY = 'third-party' |
93 COLLAPSE = 'collapse' | 100 COLLAPSE = 'collapse' |
94 SITEKEY = 'sitekey' | 101 SITEKEY = 'sitekey' |
95 DONOTTRACK = 'donottrack' | 102 DONOTTRACK = 'donottrack' |
96 | 103 |
97 | 104 |
98 def _line_type(name, field_names, format_string): | 105 def _line_type(name, field_names, format_string): |
99 """Define a line type. | 106 """Define a line type. |
100 | 107 |
101 :param name: The name of the line type to define. | 108 Parameters |
102 :param field_names: A sequence of field names or one space-separated | 109 ---------- |
103 string that contains all field names. | 110 name: str |
104 :param format_string: A format specifier for converting this line type | 111 The name of the line type to define. |
105 back to string representation. | 112 field_names: str or list |
106 :returns: Class created with `namedtuple` that has `.type` set to | 113 A sequence of field names or one space-separated string that contains |
107 lowercased `name` and supports conversion back to string with | 114 all field names. |
108 `.to_string()` method. | 115 format_string: str |
116 A format specifier for converting this line type back to string | |
117 representation. | |
118 | |
119 Returns | |
120 ------- | |
121 class | |
122 Class created with `namedtuple` that has `.type` set to lowercased | |
123 `name` and supports conversion back to string with `.to_string()` | |
124 method. | |
125 | |
109 """ | 126 """ |
110 lt = namedtuple(name, field_names) | 127 lt = namedtuple(name, field_names) |
111 lt.type = name.lower() | 128 lt.type = name.lower() |
112 lt.to_string = lambda self: format_string.format(self) | 129 lt.to_string = lambda self: format_string.format(self) |
113 return lt | 130 return lt |
114 | 131 |
115 | 132 |
116 Header = _line_type('Header', 'version', '[{.version}]') | 133 Header = _line_type('Header', 'version', '[{.version}]') |
117 EmptyLine = _line_type('EmptyLine', '', '') | 134 EmptyLine = _line_type('EmptyLine', '', '') |
118 Comment = _line_type('Comment', 'text', '! {.text}') | 135 Comment = _line_type('Comment', 'text', '! {.text}') |
(...skipping 97 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
216 if domain: | 233 if domain: |
217 domains = [_parse_option(d) for d in domain.split(',')] | 234 domains = [_parse_option(d) for d in domain.split(',')] |
218 options.append((FILTER_OPTION.DOMAIN, domains)) | 235 options.append((FILTER_OPTION.DOMAIN, domains)) |
219 | 236 |
220 return Filter(text, selector, action, options) | 237 return Filter(text, selector, action, options) |
221 | 238 |
222 | 239 |
223 def parse_filter(text): | 240 def parse_filter(text): |
224 """Parse one filter. | 241 """Parse one filter. |
225 | 242 |
226 :param text: Text representation of a filter. | 243 Parameters |
227 :returns: Filter object. | 244 ---------- |
245 text : str | |
246 Filter to parse in ABP filter list syntax. | |
247 | |
248 Returns | |
249 ------- | |
250 namedtuple | |
251 Parsed filter. | |
252 | |
228 """ | 253 """ |
229 if '#' in text: | 254 if '#' in text: |
230 match = HIDING_FILTER_REGEXP.search(text) | 255 match = HIDING_FILTER_REGEXP.search(text) |
231 if match: | 256 if match: |
232 return _parse_hiding_filter(text, *match.groups()) | 257 return _parse_hiding_filter(text, *match.groups()) |
233 return _parse_blocking_filter(text) | 258 return _parse_blocking_filter(text) |
234 | 259 |
235 | 260 |
236 def parse_line(line_text): | 261 def parse_line(line_text): |
237 """Parse one line of a filter list. | 262 """Parse one line of a filter list. |
238 | 263 |
239 :param line_text: Line of a filter list (must be a unicode string). | 264 Parameters |
240 :returns: Parsed line object (see `_line_type`). | 265 ---------- |
241 :raises ParseError: If the line can't be successfully parsed. | 266 line_text : str |
267 Line of a filter list. | |
268 | |
269 Returns | |
270 ------- | |
271 namedtuple | |
272 Parsed line (see `_line_type`). | |
273 | |
274 Raises | |
275 ------ | |
276 ParseError | |
277 ParseError: If the line can't be parsed. | |
242 """ | 278 """ |
243 content = line_text.strip() | 279 content = line_text.strip() |
244 | 280 |
245 if content == '': | 281 if content == '': |
246 line = EmptyLine() | 282 line = EmptyLine() |
247 elif content.startswith('!'): | 283 elif content.startswith('!'): |
248 line = _parse_comment(content) | 284 line = _parse_comment(content) |
249 elif content.startswith('%') and content.endswith('%'): | 285 elif content.startswith('%') and content.endswith('%'): |
250 line = _parse_instruction(content) | 286 line = _parse_instruction(content) |
251 elif content.startswith('[') and content.endswith(']'): | 287 elif content.startswith('[') and content.endswith(']'): |
252 line = _parse_header(content) | 288 line = _parse_header(content) |
253 else: | 289 else: |
254 line = parse_filter(content) | 290 line = parse_filter(content) |
255 | 291 |
256 assert line.to_string().replace(' ', '') == content.replace(' ', '') | 292 assert line.to_string().replace(' ', '') == content.replace(' ', '') |
257 return line | 293 return line |
258 | 294 |
259 | 295 |
260 def parse_filterlist(lines): | 296 def parse_filterlist(lines): |
261 """Parse filter list from an iterable. | 297 """Parse filter list from an iterable. |
262 | 298 |
263 :param lines: List of strings or file or other iterable. | 299 Parameters |
264 :returns: Iterator over parsed lines. | 300 ---------- |
265 :raises ParseError: Can be thrown during iteration for invalid lines. | 301 lines: iterable of str |
302 Lines of the filter list. | |
303 | |
304 Returns | |
305 ------- | |
306 iterator of namedtuple | |
307 Parsed lines of the filter list. | |
308 | |
309 Raises | |
310 ------ | |
311 ParseError | |
312 Thrown during iteration for invalid filter list lines. | |
313 TypeError | |
Vasily Kuznetsov
2017/10/24 16:11:00
As suggested by Matze, this error is easy to fores
| |
314 If `lines` is not iterable. | |
315 | |
266 """ | 316 """ |
267 for line in lines: | 317 for line in lines: |
268 yield parse_line(line) | 318 yield parse_line(line) |
OLD | NEW |