| Index: abp/filters/parser.py |
| =================================================================== |
| --- a/abp/filters/parser.py |
| +++ b/abp/filters/parser.py |
| @@ -8,64 +8,70 @@ |
| # Adblock Plus is distributed in the hope that it will be useful, |
| # but WITHOUT ANY WARRANTY; without even the implied warranty of |
| # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| # GNU General Public License for more details. |
| # |
| # You should have received a copy of the GNU General Public License |
| # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. |
| +"""Parser for ABP filterlist format.""" |
| + |
| from __future__ import unicode_literals |
| import re |
| from collections import namedtuple |
| __all__ = [ |
| 'FILTER_ACTION', |
| 'FILTER_OPTION', |
| + 'SELECTOR_TYPE', |
| 'ParseError', |
| - 'SELECTOR_TYPE', |
| 'parse_filterlist', |
| 'parse_line', |
| ] |
| class ParseError(Exception): |
| """Exception thrown by the parser when it encounters invalid input. |
| - :param error: Description of the error. |
| - :param text: The text which was being parsed when an error occurred. |
| + Parameters |
| + ---------- |
| + error : str |
| + Description of the error. |
| + text : str |
| + The source text that caused an error. |
| """ |
| def __init__(self, error, text): |
| Exception.__init__(self, '{} in "{}"'.format(error, text)) |
| self.text = text |
| self.error = error |
| # Constants related to filters (see https://adblockplus.org/filters). |
| -class SELECTOR_TYPE: # flake8: noqa (This class is an enumeration constant). |
| - """Selector types""" |
| +class SELECTOR_TYPE: # flake8: noqa (this is a namespace of constants). |
| + """Selector type constants.""" |
| URL_PATTERN = 'url-pattern' # Normal URL patterns. |
| URL_REGEXP = 'url-regexp' # Regular expressions for URLs. |
| CSS = 'css' # CSS selectors for hiding filters. |
| XCSS = 'extended-css' # Extended CSS selectors (to emulate CSS4). |
| ABP_SIMPLE = 'abp-simple' # Simplified element hiding syntax. |
| -class FILTER_ACTION: # flake8: noqa (This class is an enumeration constant). |
| - """Filter actions""" |
| +class FILTER_ACTION: # flake8: noqa (this is a namespace of constants). |
| + """Filter action constants.""" |
| BLOCK = 'block' # Block the request. |
| ALLOW = 'allow' # Allow the request (whitelist). |
| HIDE = 'hide' # Hide selected element(s). |
| SHOW = 'show' # Show selected element(s) (whitelist). |
| -class FILTER_OPTION: # flake8: noqa (This class is an enumeration constant). |
| - """Filter options""" |
| +class FILTER_OPTION: # flake8: noqa (this is a namespace of constants). |
| + """Filter option constants.""" |
| # Resource types. |
| OTHER = 'other' |
| SCRIPT = 'script' |
| IMAGE = 'image' |
| STYLESHEET = 'stylesheet' |
| OBJECT = 'object' |
| SUBDOCUMENT = 'subdocument' |
| DOCUMENT = 'document' |
| @@ -93,24 +99,33 @@ |
| COLLAPSE = 'collapse' |
| SITEKEY = 'sitekey' |
| DONOTTRACK = 'donottrack' |
| def _line_type(name, field_names, format_string): |
| """Define a line type. |
| - :param name: The name of the line type to define. |
| - :param field_names: A sequence of field names or one space-separated |
| - string that contains all field names. |
| - :param format_string: A format specifier for converting this line type |
| - back to string representation. |
| - :returns: Class created with `namedtuple` that has `.type` set to |
| - lowercased `name` and supports conversion back to string with |
| - `.to_string()` method. |
| + Parameters |
| + ---------- |
| + name: str |
| + The name of the line type to define. |
| + field_names: str or list |
| + A sequence of field names or one space-separated string that contains |
| + all field names. |
| + format_string: str |
| + A format specifier for converting this line type back to string |
| + representation. |
| + |
| + Returns |
| + ------- |
| + class |
| + Class created with `namedtuple` that has `.type` set to lowercased |
| + `name` and supports conversion back to string with `.to_string()` |
| + method. |
| """ |
| lt = namedtuple(name, field_names) |
| lt.type = name.lower() |
| lt.to_string = lambda self: format_string.format(self) |
| return lt |
| Header = _line_type('Header', 'version', '[{.version}]') |
| @@ -218,32 +233,50 @@ |
| options.append((FILTER_OPTION.DOMAIN, domains)) |
| return Filter(text, selector, action, options) |
| def parse_filter(text): |
| """Parse one filter. |
| - :param text: Text representation of a filter. |
| - :returns: Filter object. |
| + Parameters |
| + ---------- |
| + text : str |
| + Filter to parse in ABP filter list syntax. |
| + |
| + Returns |
| + ------- |
| + namedtuple |
| + Parsed filter. |
| """ |
| if '#' in text: |
| match = HIDING_FILTER_REGEXP.search(text) |
| if match: |
| return _parse_hiding_filter(text, *match.groups()) |
| return _parse_blocking_filter(text) |
| def parse_line(line_text): |
| """Parse one line of a filter list. |
| - :param line_text: Line of a filter list (must be a unicode string). |
| - :returns: Parsed line object (see `_line_type`). |
| - :raises ParseError: If the line can't be successfully parsed. |
| + Parameters |
| + ---------- |
| + line_text : str |
| + Line of a filter list. |
| + |
| + Returns |
| + ------- |
| + namedtuple |
| + Parsed line (see `_line_type`). |
| + |
| + Raises |
| + ------ |
| + ParseError |
| + ParseError: If the line can't be parsed. |
| """ |
| content = line_text.strip() |
| if content == '': |
| line = EmptyLine() |
| elif content.startswith('!'): |
| line = _parse_comment(content) |
| elif content.startswith('%') and content.endswith('%'): |
| @@ -255,14 +288,25 @@ |
| assert line.to_string().replace(' ', '') == content.replace(' ', '') |
| return line |
| def parse_filterlist(lines): |
| """Parse filter list from an iterable. |
| - :param lines: List of strings or file or other iterable. |
| - :returns: Iterator over parsed lines. |
| - :raises ParseError: Can be thrown during iteration for invalid lines. |
| + Parameters |
| + ---------- |
| + lines: iterable of str |
| + Lines of the filter list. |
| + |
| + Returns |
| + ------- |
| + iterator of namedtuple |
| + Parsed lines of the filter list. |
| + |
| + Raises |
| + ------ |
| + ParseError |
| + Thrown during iteration for invalid filter list lines. |
| """ |
| for line in lines: |
| yield parse_line(line) |