Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Delta Between Two Patch Sets: abp/filters/parser.py

Issue 29465720: Issue 4970 - Document the library API of python-abp (Closed)
Left Patch Set: Created June 14, 2017, 5:45 p.m.
Right Patch Set: Rebase to match the new master and retouche the docstrings. Created Oct. 24, 2017, 4:06 p.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
Right: Side by side diff | Download
LEFTRIGHT
(no file at all)
1 # This file is part of Adblock Plus <https://adblockplus.org/>, 1 # This file is part of Adblock Plus <https://adblockplus.org/>,
2 # Copyright (C) 2006-present eyeo GmbH 2 # Copyright (C) 2006-present eyeo GmbH
3 # 3 #
4 # Adblock Plus is free software: you can redistribute it and/or modify 4 # Adblock Plus is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU General Public License version 3 as 5 # it under the terms of the GNU General Public License version 3 as
6 # published by the Free Software Foundation. 6 # published by the Free Software Foundation.
7 # 7 #
8 # Adblock Plus is distributed in the hope that it will be useful, 8 # Adblock Plus is distributed in the hope that it will be useful,
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of 9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # GNU General Public License for more details. 11 # GNU General Public License for more details.
12 # 12 #
13 # You should have received a copy of the GNU General Public License 13 # You should have received a copy of the GNU General Public License
14 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. 14 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.
15 15
16 """Parser for ABP filterlist format."""
17
16 from __future__ import unicode_literals 18 from __future__ import unicode_literals
17 19
18 import re 20 import re
19 from collections import namedtuple 21 from collections import namedtuple
20 22
21 __all__ = [ 23 __all__ = [
22 'FILTER_ACTION', 24 'FILTER_ACTION',
23 'FILTER_OPTION', 25 'FILTER_OPTION',
26 'SELECTOR_TYPE',
24 'ParseError', 27 'ParseError',
25 'SELECTOR_TYPE',
26 'parse_filterlist', 28 'parse_filterlist',
27 'parse_line', 29 'parse_line',
28 ] 30 ]
29 31
30 32
31 class ParseError(Exception): 33 class ParseError(Exception):
32 """Exception thrown by the parser when it encounters invalid input. 34 """Exception thrown by the parser when it encounters invalid input.
33 35
34 :param error: Description of the error. 36 Parameters
35 :param text: The text which was being parsed when an error occurred. 37 ----------
38 error : str
39 Description of the error.
40 text : str
41 The source text that caused an error.
42
Vasily Kuznetsov 2017/10/24 16:11:00 pep8-docstrings demands an empty line at the end o
36 """ 43 """
37 44
38 def __init__(self, error, text): 45 def __init__(self, error, text):
39 Exception.__init__(self, '{} in "{}"'.format(error, text)) 46 Exception.__init__(self, '{} in "{}"'.format(error, text))
40 self.text = text 47 self.text = text
41 self.error = error 48 self.error = error
42 49
43 50
44 # Constants related to filters (see https://adblockplus.org/filters). 51 # Constants related to filters (see https://adblockplus.org/filters).
45 class SELECTOR_TYPE: # flake8: noqa (This class is an enumeration constant). 52 class SELECTOR_TYPE: # flake8: noqa (this is a namespace of constants).
46 """Selector types""" 53 """Selector type constants."""
47 URL_PATTERN = 'url-pattern' # Normal URL patterns. 54 URL_PATTERN = 'url-pattern' # Normal URL patterns.
48 URL_REGEXP = 'url-regexp' # Regular expressions for URLs. 55 URL_REGEXP = 'url-regexp' # Regular expressions for URLs.
49 CSS = 'css' # CSS selectors for hiding filters. 56 CSS = 'css' # CSS selectors for hiding filters.
50 XCSS = 'extended-css' # Extended CSS selectors (to emulate CSS4). 57 XCSS = 'extended-css' # Extended CSS selectors (to emulate CSS4).
51 ABP_SIMPLE = 'abp-simple' # Simplified element hiding syntax. 58 ABP_SIMPLE = 'abp-simple' # Simplified element hiding syntax.
52 59
53 60
54 class FILTER_ACTION: # flake8: noqa (This class is an enumeration constant). 61 class FILTER_ACTION: # flake8: noqa (this is a namespace of constants).
55 """Filter actions""" 62 """Filter action constants."""
56 BLOCK = 'block' # Block the request. 63 BLOCK = 'block' # Block the request.
57 ALLOW = 'allow' # Allow the request (whitelist). 64 ALLOW = 'allow' # Allow the request (whitelist).
58 HIDE = 'hide' # Hide selected element(s). 65 HIDE = 'hide' # Hide selected element(s).
59 SHOW = 'show' # Show selected element(s) (whitelist). 66 SHOW = 'show' # Show selected element(s) (whitelist).
60 67
61 68
62 class FILTER_OPTION: # flake8: noqa (This class is an enumeration constant). 69 class FILTER_OPTION: # flake8: noqa (this is a namespace of constants).
63 """Filter options""" 70 """Filter option constants."""
64 # Resource types. 71 # Resource types.
65 OTHER = 'other' 72 OTHER = 'other'
66 SCRIPT = 'script' 73 SCRIPT = 'script'
67 IMAGE = 'image' 74 IMAGE = 'image'
68 STYLESHEET = 'stylesheet' 75 STYLESHEET = 'stylesheet'
69 OBJECT = 'object' 76 OBJECT = 'object'
70 SUBDOCUMENT = 'subdocument' 77 SUBDOCUMENT = 'subdocument'
71 DOCUMENT = 'document' 78 DOCUMENT = 'document'
72 WEBSOCKET = 'websocket' 79 WEBSOCKET = 'websocket'
73 WEBRTC = 'webrtc' 80 WEBRTC = 'webrtc'
(...skipping 17 matching lines...) Expand all
91 DOMAIN = 'domain' 98 DOMAIN = 'domain'
92 THIRD_PARTY = 'third-party' 99 THIRD_PARTY = 'third-party'
93 COLLAPSE = 'collapse' 100 COLLAPSE = 'collapse'
94 SITEKEY = 'sitekey' 101 SITEKEY = 'sitekey'
95 DONOTTRACK = 'donottrack' 102 DONOTTRACK = 'donottrack'
96 103
97 104
98 def _line_type(name, field_names, format_string): 105 def _line_type(name, field_names, format_string):
99 """Define a line type. 106 """Define a line type.
100 107
101 :param name: The name of the line type to define. 108 Parameters
102 :param field_names: A sequence of field names or one space-separated 109 ----------
103 string that contains all field names. 110 name: str
104 :param format_string: A format specifier for converting this line type 111 The name of the line type to define.
105 back to string representation. 112 field_names: str or list
106 :returns: Class created with `namedtuple` that has `.type` set to 113 A sequence of field names or one space-separated string that contains
107 lowercased `name` and supports conversion back to string with 114 all field names.
108 `.to_string()` method. 115 format_string: str
116 A format specifier for converting this line type back to string
117 representation.
118
119 Returns
120 -------
121 class
122 Class created with `namedtuple` that has `.type` set to lowercased
123 `name` and supports conversion back to string with `.to_string()`
124 method.
125
109 """ 126 """
110 lt = namedtuple(name, field_names) 127 lt = namedtuple(name, field_names)
111 lt.type = name.lower() 128 lt.type = name.lower()
112 lt.to_string = lambda self: format_string.format(self) 129 lt.to_string = lambda self: format_string.format(self)
113 return lt 130 return lt
114 131
115 132
116 Header = _line_type('Header', 'version', '[{.version}]') 133 Header = _line_type('Header', 'version', '[{.version}]')
117 EmptyLine = _line_type('EmptyLine', '', '') 134 EmptyLine = _line_type('EmptyLine', '', '')
118 Comment = _line_type('Comment', 'text', '! {.text}') 135 Comment = _line_type('Comment', 'text', '! {.text}')
(...skipping 97 matching lines...) Expand 10 before | Expand all | Expand 10 after
216 if domain: 233 if domain:
217 domains = [_parse_option(d) for d in domain.split(',')] 234 domains = [_parse_option(d) for d in domain.split(',')]
218 options.append((FILTER_OPTION.DOMAIN, domains)) 235 options.append((FILTER_OPTION.DOMAIN, domains))
219 236
220 return Filter(text, selector, action, options) 237 return Filter(text, selector, action, options)
221 238
222 239
223 def parse_filter(text): 240 def parse_filter(text):
224 """Parse one filter. 241 """Parse one filter.
225 242
226 :param text: Text representation of a filter. 243 Parameters
227 :returns: Filter object. 244 ----------
245 text : str
246 Filter to parse in ABP filter list syntax.
247
248 Returns
249 -------
250 namedtuple
251 Parsed filter.
252
228 """ 253 """
229 if '#' in text: 254 if '#' in text:
230 match = HIDING_FILTER_REGEXP.search(text) 255 match = HIDING_FILTER_REGEXP.search(text)
231 if match: 256 if match:
232 return _parse_hiding_filter(text, *match.groups()) 257 return _parse_hiding_filter(text, *match.groups())
233 return _parse_blocking_filter(text) 258 return _parse_blocking_filter(text)
234 259
235 260
236 def parse_line(line_text): 261 def parse_line(line_text):
237 """Parse one line of a filter list. 262 """Parse one line of a filter list.
238 263
239 :param line_text: Line of a filter list (must be a unicode string). 264 Parameters
240 :returns: Parsed line object (see `_line_type`). 265 ----------
241 :raises ParseError: If the line can't be successfully parsed. 266 line_text : str
267 Line of a filter list.
268
269 Returns
270 -------
271 namedtuple
272 Parsed line (see `_line_type`).
273
274 Raises
275 ------
276 ParseError
277 ParseError: If the line can't be parsed.
242 """ 278 """
243 content = line_text.strip() 279 content = line_text.strip()
244 280
245 if content == '': 281 if content == '':
246 line = EmptyLine() 282 line = EmptyLine()
247 elif content.startswith('!'): 283 elif content.startswith('!'):
248 line = _parse_comment(content) 284 line = _parse_comment(content)
249 elif content.startswith('%') and content.endswith('%'): 285 elif content.startswith('%') and content.endswith('%'):
250 line = _parse_instruction(content) 286 line = _parse_instruction(content)
251 elif content.startswith('[') and content.endswith(']'): 287 elif content.startswith('[') and content.endswith(']'):
252 line = _parse_header(content) 288 line = _parse_header(content)
253 else: 289 else:
254 line = parse_filter(content) 290 line = parse_filter(content)
255 291
256 assert line.to_string().replace(' ', '') == content.replace(' ', '') 292 assert line.to_string().replace(' ', '') == content.replace(' ', '')
257 return line 293 return line
258 294
259 295
260 def parse_filterlist(lines): 296 def parse_filterlist(lines):
261 """Parse filter list from an iterable. 297 """Parse filter list from an iterable.
262 298
263 :param lines: List of strings or file or other iterable. 299 Parameters
264 :returns: Iterator over parsed lines. 300 ----------
265 :raises ParseError: Can be thrown during iteration for invalid lines. 301 lines: iterable of str
302 Lines of the filter list.
303
304 Returns
305 -------
306 iterator of namedtuple
307 Parsed lines of the filter list.
308
309 Raises
310 ------
311 ParseError
312 Thrown during iteration for invalid filter list lines.
313 TypeError
Vasily Kuznetsov 2017/10/24 16:11:00 As suggested by Matze, this error is easy to fores
314 If `lines` is not iterable.
315
266 """ 316 """
267 for line in lines: 317 for line in lines:
268 yield parse_line(line) 318 yield parse_line(line)
LEFTRIGHT

Powered by Google App Engine
This is Rietveld