Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Side by Side Diff: abp/filters/parser.py

Issue 29465720: Issue 4970 - Document the library API of python-abp (Closed)
Patch Set: Improve the docstrings and help() behavior, shorten the README, add Development documentation and a… Created Oct. 10, 2017, 4:25 p.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View unified diff | Download patch
« no previous file with comments | « abp/filters/__init__.py ('k') | abp/filters/render_script.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 # This file is part of Adblock Plus <https://adblockplus.org/>, 1 # This file is part of Adblock Plus <https://adblockplus.org/>,
2 # Copyright (C) 2006-2017 eyeo GmbH 2 # Copyright (C) 2006-2017 eyeo GmbH
3 # 3 #
4 # Adblock Plus is free software: you can redistribute it and/or modify 4 # Adblock Plus is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU General Public License version 3 as 5 # it under the terms of the GNU General Public License version 3 as
6 # published by the Free Software Foundation. 6 # published by the Free Software Foundation.
7 # 7 #
8 # Adblock Plus is distributed in the hope that it will be useful, 8 # Adblock Plus is distributed in the hope that it will be useful,
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of 9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # GNU General Public License for more details. 11 # GNU General Public License for more details.
12 # 12 #
13 # You should have received a copy of the GNU General Public License 13 # You should have received a copy of the GNU General Public License
14 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. 14 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.
15 15
16 """Parser for ABP filterlist format."""
17
16 from __future__ import unicode_literals 18 from __future__ import unicode_literals
17 19
18 import re 20 import re
19 from collections import namedtuple 21 from collections import namedtuple
20 22
21 __all__ = [ 23 __all__ = [
22 'FILTER_ACTION', 24 'FILTER_ACTION',
23 'FILTER_OPTION', 25 'FILTER_OPTION',
26 'SELECTOR_TYPE',
24 'ParseError', 27 'ParseError',
25 'SELECTOR_TYPE',
26 'parse_filterlist', 28 'parse_filterlist',
27 'parse_line', 29 'parse_line',
28 ] 30 ]
29 31
30 32
31 class ParseError(Exception): 33 class ParseError(Exception):
32 """Exception thrown by the parser when it encounters invalid input. 34 """Exception thrown by the parser when it encounters invalid input.
33 35
34 :param error: Description of the error. 36 Parameters
35 :param text: The text which was being parsed when an error occurred. 37 ----------
38 error : str
39 Description of the error.
40 text : str
41 The source text that caused an error.
36 """ 42 """
37 43
38 def __init__(self, error, text): 44 def __init__(self, error, text):
39 Exception.__init__(self, '{} in "{}"'.format(error, text)) 45 Exception.__init__(self, '{} in "{}"'.format(error, text))
40 self.text = text 46 self.text = text
41 self.error = error 47 self.error = error
42 48
43 49
44 # Constants related to filters (see https://adblockplus.org/filters). 50 # Constants related to filters (see https://adblockplus.org/filters).
45 class SELECTOR_TYPE: # flake8: noqa (This class is an enumeration constant). 51 class SELECTOR_TYPE: # flake8: noqa (this is a namespace of constants).
46 """Selector types""" 52 """Selector type constants."""
47 URL_PATTERN = 'url-pattern' # Normal URL patterns. 53 URL_PATTERN = 'url-pattern' # Normal URL patterns.
48 URL_REGEXP = 'url-regexp' # Regular expressions for URLs. 54 URL_REGEXP = 'url-regexp' # Regular expressions for URLs.
49 CSS = 'css' # CSS selectors for hiding filters. 55 CSS = 'css' # CSS selectors for hiding filters.
50 XCSS = 'extended-css' # Extended CSS selectors (to emulate CSS4). 56 XCSS = 'extended-css' # Extended CSS selectors (to emulate CSS4).
51 ABP_SIMPLE = 'abp-simple' # Simplified element hiding syntax. 57 ABP_SIMPLE = 'abp-simple' # Simplified element hiding syntax.
52 58
53 59
54 class FILTER_ACTION: # flake8: noqa (This class is an enumeration constant). 60 class FILTER_ACTION: # flake8: noqa (this is a namespace of constants).
55 """Filter actions""" 61 """Filter action constants."""
56 BLOCK = 'block' # Block the request. 62 BLOCK = 'block' # Block the request.
57 ALLOW = 'allow' # Allow the request (whitelist). 63 ALLOW = 'allow' # Allow the request (whitelist).
58 HIDE = 'hide' # Hide selected element(s). 64 HIDE = 'hide' # Hide selected element(s).
59 SHOW = 'show' # Show selected element(s) (whitelist). 65 SHOW = 'show' # Show selected element(s) (whitelist).
60 66
61 67
62 class FILTER_OPTION: # flake8: noqa (This class is an enumeration constant). 68 class FILTER_OPTION: # flake8: noqa (this is a namespace of constants).
63 """Filter options""" 69 """Filter option constants."""
64 # Resource types. 70 # Resource types.
65 OTHER = 'other' 71 OTHER = 'other'
66 SCRIPT = 'script' 72 SCRIPT = 'script'
67 IMAGE = 'image' 73 IMAGE = 'image'
68 STYLESHEET = 'stylesheet' 74 STYLESHEET = 'stylesheet'
69 OBJECT = 'object' 75 OBJECT = 'object'
70 SUBDOCUMENT = 'subdocument' 76 SUBDOCUMENT = 'subdocument'
71 DOCUMENT = 'document' 77 DOCUMENT = 'document'
72 WEBSOCKET = 'websocket' 78 WEBSOCKET = 'websocket'
73 WEBRTC = 'webrtc' 79 WEBRTC = 'webrtc'
(...skipping 17 matching lines...) Expand all
91 DOMAIN = 'domain' 97 DOMAIN = 'domain'
92 THIRD_PARTY = 'third-party' 98 THIRD_PARTY = 'third-party'
93 COLLAPSE = 'collapse' 99 COLLAPSE = 'collapse'
94 SITEKEY = 'sitekey' 100 SITEKEY = 'sitekey'
95 DONOTTRACK = 'donottrack' 101 DONOTTRACK = 'donottrack'
96 102
97 103
98 def _line_type(name, field_names, format_string): 104 def _line_type(name, field_names, format_string):
99 """Define a line type. 105 """Define a line type.
100 106
101 :param name: The name of the line type to define. 107 Parameters
102 :param field_names: A sequence of field names or one space-separated 108 ----------
103 string that contains all field names. 109 name: str
104 :param format_string: A format specifier for converting this line type 110 The name of the line type to define.
105 back to string representation. 111 field_names: str or list
106 :returns: Class created with `namedtuple` that has `.type` set to 112 A sequence of field names or one space-separated string that contains
107 lowercased `name` and supports conversion back to string with 113 all field names.
108 `.to_string()` method. 114 format_string: str
115 A format specifier for converting this line type back to string
116 representation.
117
118 Returns
119 -------
120 class
121 Class created with `namedtuple` that has `.type` set to lowercased
122 `name` and supports conversion back to string with `.to_string()`
123 method.
109 """ 124 """
110 lt = namedtuple(name, field_names) 125 lt = namedtuple(name, field_names)
111 lt.type = name.lower() 126 lt.type = name.lower()
112 lt.to_string = lambda self: format_string.format(self) 127 lt.to_string = lambda self: format_string.format(self)
113 return lt 128 return lt
114 129
115 130
116 Header = _line_type('Header', 'version', '[{.version}]') 131 Header = _line_type('Header', 'version', '[{.version}]')
117 EmptyLine = _line_type('EmptyLine', '', '') 132 EmptyLine = _line_type('EmptyLine', '', '')
118 Comment = _line_type('Comment', 'text', '! {.text}') 133 Comment = _line_type('Comment', 'text', '! {.text}')
(...skipping 97 matching lines...) Expand 10 before | Expand all | Expand 10 after
216 if domain: 231 if domain:
217 domains = [_parse_option(d) for d in domain.split(',')] 232 domains = [_parse_option(d) for d in domain.split(',')]
218 options.append((FILTER_OPTION.DOMAIN, domains)) 233 options.append((FILTER_OPTION.DOMAIN, domains))
219 234
220 return Filter(text, selector, action, options) 235 return Filter(text, selector, action, options)
221 236
222 237
223 def parse_filter(text): 238 def parse_filter(text):
224 """Parse one filter. 239 """Parse one filter.
225 240
226 :param text: Text representation of a filter. 241 Parameters
227 :returns: Filter object. 242 ----------
243 text : str
244 Filter to parse in ABP filter list syntax.
245
246 Returns
247 -------
248 namedtuple
249 Parsed filter.
228 """ 250 """
229 if '#' in text: 251 if '#' in text:
230 match = HIDING_FILTER_REGEXP.search(text) 252 match = HIDING_FILTER_REGEXP.search(text)
231 if match: 253 if match:
232 return _parse_hiding_filter(text, *match.groups()) 254 return _parse_hiding_filter(text, *match.groups())
233 return _parse_blocking_filter(text) 255 return _parse_blocking_filter(text)
234 256
235 257
236 def parse_line(line_text): 258 def parse_line(line_text):
237 """Parse one line of a filter list. 259 """Parse one line of a filter list.
238 260
239 :param line_text: Line of a filter list (must be a unicode string). 261 Parameters
240 :returns: Parsed line object (see `_line_type`). 262 ----------
241 :raises ParseError: If the line can't be successfully parsed. 263 line_text : str
264 Line of a filter list.
265
266 Returns
267 -------
268 namedtuple
269 Parsed line (see `_line_type`).
270
271 Raises
272 ------
273 ParseError
274 ParseError: If the line can't be parsed.
242 """ 275 """
243 content = line_text.strip() 276 content = line_text.strip()
244 277
245 if content == '': 278 if content == '':
246 line = EmptyLine() 279 line = EmptyLine()
247 elif content.startswith('!'): 280 elif content.startswith('!'):
248 line = _parse_comment(content) 281 line = _parse_comment(content)
249 elif content.startswith('%') and content.endswith('%'): 282 elif content.startswith('%') and content.endswith('%'):
250 line = _parse_instruction(content) 283 line = _parse_instruction(content)
251 elif content.startswith('[') and content.endswith(']'): 284 elif content.startswith('[') and content.endswith(']'):
252 line = _parse_header(content) 285 line = _parse_header(content)
253 else: 286 else:
254 line = parse_filter(content) 287 line = parse_filter(content)
255 288
256 assert line.to_string().replace(' ', '') == content.replace(' ', '') 289 assert line.to_string().replace(' ', '') == content.replace(' ', '')
257 return line 290 return line
258 291
259 292
260 def parse_filterlist(lines): 293 def parse_filterlist(lines):
261 """Parse filter list from an iterable. 294 """Parse filter list from an iterable.
262 295
263 :param lines: List of strings or file or other iterable. 296 Parameters
264 :returns: Iterator over parsed lines. 297 ----------
265 :raises ParseError: Can be thrown during iteration for invalid lines. 298 lines: iterable of str
299 Lines of the filter list.
300
301 Returns
302 -------
303 iterator of namedtuple
304 Parsed lines of the filter list.
305
306 Raises
307 ------
308 ParseError
309 Thrown during iteration for invalid filter list lines.
266 """ 310 """
267 for line in lines: 311 for line in lines:
268 yield parse_line(line) 312 yield parse_line(line)
OLDNEW
« no previous file with comments | « abp/filters/__init__.py ('k') | abp/filters/render_script.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld