abp/filters/parser.py - Issue 29465720: Issue 4970 - Document the library API of python-abp

Unified Diff: abp/filters/parser.py

Issue 29465720: Issue 4970 - Document the library API of python-abp (Closed)

Patch Set: Improve the docstrings and help() behavior, shorten the README, add Development documentation and a… Created Oct. 10, 2017, 4:25 p.m.

Use n/p to move between diff chunks; N/P to move between comments.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: abp/filters/parser.py

===================================================================

--- a/abp/filters/parser.py

+++ b/abp/filters/parser.py

@@ -8,64 +8,70 @@

# Adblock Plus is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License

# along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.

+"""Parser for ABP filterlist format."""

from __future__ import unicode_literals

import re

from collections import namedtuple

__all__ = [

'FILTER_ACTION',

'FILTER_OPTION',

+ 'SELECTOR_TYPE',

'ParseError',

- 'SELECTOR_TYPE',

'parse_filterlist',

'parse_line',

]

class ParseError(Exception):

"""Exception thrown by the parser when it encounters invalid input.

- :param error: Description of the error.

- :param text: The text which was being parsed when an error occurred.

+ Parameters

+ ----------

+ error : str

+ Description of the error.

+ text : str

+ The source text that caused an error.

"""

def __init__(self, error, text):

Exception.__init__(self, '{} in "{}"'.format(error, text))

self.text = text

self.error = error

# Constants related to filters (see https://adblockplus.org/filters).

-class SELECTOR_TYPE: # flake8: noqa (This class is an enumeration constant).

- """Selector types"""

+class SELECTOR_TYPE: # flake8: noqa (this is a namespace of constants).

+ """Selector type constants."""

URL_PATTERN = 'url-pattern' # Normal URL patterns.

URL_REGEXP = 'url-regexp' # Regular expressions for URLs.

CSS = 'css' # CSS selectors for hiding filters.

XCSS = 'extended-css' # Extended CSS selectors (to emulate CSS4).

ABP_SIMPLE = 'abp-simple' # Simplified element hiding syntax.

-class FILTER_ACTION: # flake8: noqa (This class is an enumeration constant).

- """Filter actions"""

+class FILTER_ACTION: # flake8: noqa (this is a namespace of constants).

+ """Filter action constants."""

BLOCK = 'block' # Block the request.

ALLOW = 'allow' # Allow the request (whitelist).

HIDE = 'hide' # Hide selected element(s).

SHOW = 'show' # Show selected element(s) (whitelist).

-class FILTER_OPTION: # flake8: noqa (This class is an enumeration constant).

- """Filter options"""

+class FILTER_OPTION: # flake8: noqa (this is a namespace of constants).

+ """Filter option constants."""

# Resource types.

OTHER = 'other'

SCRIPT = 'script'

IMAGE = 'image'

STYLESHEET = 'stylesheet'

OBJECT = 'object'

SUBDOCUMENT = 'subdocument'

DOCUMENT = 'document'

@@ -93,24 +99,33 @@

COLLAPSE = 'collapse'

SITEKEY = 'sitekey'

DONOTTRACK = 'donottrack'

def _line_type(name, field_names, format_string):

"""Define a line type.

- :param name: The name of the line type to define.

- :param field_names: A sequence of field names or one space-separated

- string that contains all field names.

- :param format_string: A format specifier for converting this line type

- back to string representation.

- :returns: Class created with `namedtuple` that has `.type` set to

- lowercased `name` and supports conversion back to string with

- `.to_string()` method.

+ Parameters

+ ----------

+ name: str

+ The name of the line type to define.

+ field_names: str or list

+ A sequence of field names or one space-separated string that contains

+ all field names.

+ format_string: str

+ A format specifier for converting this line type back to string

+ representation.

+ Returns

+ -------

+ class

+ Class created with `namedtuple` that has `.type` set to lowercased

+ `name` and supports conversion back to string with `.to_string()`

+ method.

"""

lt = namedtuple(name, field_names)

lt.type = name.lower()

lt.to_string = lambda self: format_string.format(self)

return lt

Header = _line_type('Header', 'version', '[{.version}]')

@@ -218,32 +233,50 @@

options.append((FILTER_OPTION.DOMAIN, domains))

return Filter(text, selector, action, options)

def parse_filter(text):

"""Parse one filter.

- :param text: Text representation of a filter.

- :returns: Filter object.

+ Parameters

+ ----------

+ text : str

+ Filter to parse in ABP filter list syntax.

+ Returns

+ -------

+ namedtuple

+ Parsed filter.

"""

if '#' in text:

match = HIDING_FILTER_REGEXP.search(text)

if match:

return _parse_hiding_filter(text, *match.groups())

return _parse_blocking_filter(text)

def parse_line(line_text):

"""Parse one line of a filter list.

- :param line_text: Line of a filter list (must be a unicode string).

- :returns: Parsed line object (see `_line_type`).

- :raises ParseError: If the line can't be successfully parsed.

+ Parameters

+ ----------

+ line_text : str

+ Line of a filter list.

+ Returns

+ -------

+ namedtuple

+ Parsed line (see `_line_type`).

+ Raises

+ ------

+ ParseError

+ ParseError: If the line can't be parsed.

"""

content = line_text.strip()

if content == '':

line = EmptyLine()

elif content.startswith('!'):

line = _parse_comment(content)

elif content.startswith('%') and content.endswith('%'):

@@ -255,14 +288,25 @@

assert line.to_string().replace(' ', '') == content.replace(' ', '')

return line

def parse_filterlist(lines):

"""Parse filter list from an iterable.

- :param lines: List of strings or file or other iterable.

- :returns: Iterator over parsed lines.

- :raises ParseError: Can be thrown during iteration for invalid lines.

+ Parameters

+ ----------

+ lines: iterable of str

+ Lines of the filter list.

+ Returns

+ -------

+ iterator of namedtuple

+ Parsed lines of the filter list.

+ Raises

+ ------

+ ParseError

+ Thrown during iteration for invalid filter list lines.

"""

for line in lines:

yield parse_line(line)

« no previous file with comments | « abp/filters/__init__.py ('k') | abp/filters/render_script.py » ('j') | no next file with comments »