OLD | NEW |
1 # This file is part of Adblock Plus <https://adblockplus.org/>, | 1 # This file is part of Adblock Plus <https://adblockplus.org/>, |
2 # Copyright (C) 2006-2017 eyeo GmbH | 2 # Copyright (C) 2006-2017 eyeo GmbH |
3 # | 3 # |
4 # Adblock Plus is free software: you can redistribute it and/or modify | 4 # Adblock Plus is free software: you can redistribute it and/or modify |
5 # it under the terms of the GNU General Public License version 3 as | 5 # it under the terms of the GNU General Public License version 3 as |
6 # published by the Free Software Foundation. | 6 # published by the Free Software Foundation. |
7 # | 7 # |
8 # Adblock Plus is distributed in the hope that it will be useful, | 8 # Adblock Plus is distributed in the hope that it will be useful, |
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of | 9 # but WITHOUT ANY WARRANTY; without even the implied warranty of |
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | 10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
(...skipping 16 matching lines...) Expand all Loading... |
27 :param error: Description of the error. | 27 :param error: Description of the error. |
28 :param text: The text which was being parsed when an error occurred. | 28 :param text: The text which was being parsed when an error occurred. |
29 """ | 29 """ |
30 | 30 |
31 def __init__(self, error, text): | 31 def __init__(self, error, text): |
32 Exception.__init__(self, '{} in "{}"'.format(error, text)) | 32 Exception.__init__(self, '{} in "{}"'.format(error, text)) |
33 self.text = text | 33 self.text = text |
34 self.error = error | 34 self.error = error |
35 | 35 |
36 | 36 |
37 def line_type(name, field_names, format_string): | 37 def _line_type(name, field_names, format_string): |
38 """Define a line type. | 38 """Define a line type. |
39 | 39 |
40 :param name: The name of the line type to define. | 40 :param name: The name of the line type to define. |
41 :param field_names: A sequence of field names or one space-separated | 41 :param field_names: A sequence of field names or one space-separated |
42 string that contains all field names. | 42 string that contains all field names. |
| 43 :param format_string: A format specifier for converting this line type |
| 44 back to string representation. |
43 :returns: Class created with `namedtuple` that has `.type` set to | 45 :returns: Class created with `namedtuple` that has `.type` set to |
44 lowercased `name` and supports conversion back to string with | 46 lowercased `name` and supports conversion back to string with |
45 `.to_string()` method. | 47 `.to_string()` method. |
46 """ | 48 """ |
47 lt = namedtuple(name, field_names) | 49 lt = namedtuple(name, field_names) |
48 lt.type = name.lower() | 50 lt.type = name.lower() |
49 lt.to_string = lambda self: format_string.format(self) | 51 lt.to_string = lambda self: format_string.format(self) |
50 return lt | 52 return lt |
51 | 53 |
52 | 54 |
53 Header = line_type('Header', 'version', '[{.version}]') | 55 Header = _line_type('Header', 'version', '[{.version}]') |
54 EmptyLine = line_type('EmptyLine', '', '') | 56 EmptyLine = _line_type('EmptyLine', '', '') |
55 Comment = line_type('Comment', 'text', '! {.text}') | 57 Comment = _line_type('Comment', 'text', '! {.text}') |
56 Metadata = line_type('Metadata', 'key value', '! {0.key}: {0.value}') | 58 Metadata = _line_type('Metadata', 'key value', '! {0.key}: {0.value}') |
57 Filter = line_type('Filter', 'expression', '{.expression}') | 59 Filter = _line_type('Filter', 'expression', '{.expression}') |
58 Include = line_type('Include', 'target', '%include {0.target}%') | 60 Include = _line_type('Include', 'target', '%include {0.target}%') |
59 | 61 |
60 | 62 |
61 METADATA_REGEXP = re.compile(r'!\s*(\w+)\s*:\s*(.*)') | 63 METADATA_REGEXP = re.compile(r'!\s*(\w+)\s*:\s*(.*)') |
62 METADATA_KEYS = {'Homepage', 'Title', 'Expires', 'Checksum', 'Redirect', | 64 METADATA_KEYS = {'Homepage', 'Title', 'Expires', 'Checksum', 'Redirect', |
63 'Version'} | 65 'Version'} |
64 INCLUDE_REGEXP = re.compile(r'%include\s+(.+)%') | 66 INCLUDE_REGEXP = re.compile(r'%include\s+(.+)%') |
65 HEADER_REGEXP = re.compile(r'\[(Adblock(?:\s*Plus\s*[\d\.]+?)?)\]', flags=re.I) | 67 HEADER_REGEXP = re.compile(r'\[(Adblock(?:\s*Plus\s*[\d\.]+?)?)\]', flags=re.I) |
66 | 68 |
67 | 69 |
68 def _parse_comment(text): | 70 def _parse_comment(text): |
(...skipping 14 matching lines...) Expand all Loading... |
83 match = INCLUDE_REGEXP.match(text) | 85 match = INCLUDE_REGEXP.match(text) |
84 if not match: | 86 if not match: |
85 raise ParseError('Unrecognized instruction', text) | 87 raise ParseError('Unrecognized instruction', text) |
86 return Include(match.group(1)) | 88 return Include(match.group(1)) |
87 | 89 |
88 | 90 |
89 def parse_line(line_text): | 91 def parse_line(line_text): |
90 """Parse one line of a filter list. | 92 """Parse one line of a filter list. |
91 | 93 |
92 :param line_text: Line of a filter list (must be a unicode string). | 94 :param line_text: Line of a filter list (must be a unicode string). |
93 :returns: Parsed line object (see `line_type`). | 95 :returns: Parsed line object (see `_line_type`). |
94 :raises ParseError: If the line can't be successfully parsed. | 96 :raises ParseError: If the line can't be successfully parsed. |
95 """ | 97 """ |
96 content = line_text.strip() | 98 content = line_text.strip() |
97 | 99 |
98 if content == '': | 100 if content == '': |
99 line = EmptyLine() | 101 line = EmptyLine() |
100 elif content.startswith('!'): | 102 elif content.startswith('!'): |
101 line = _parse_comment(content) | 103 line = _parse_comment(content) |
102 elif content.startswith('%') and content.endswith('%'): | 104 elif content.startswith('%') and content.endswith('%'): |
103 line = _parse_instruction(content) | 105 line = _parse_instruction(content) |
104 elif content.startswith('[') and content.endswith(']'): | 106 elif content.startswith('[') and content.endswith(']'): |
105 line = _parse_header(content) | 107 line = _parse_header(content) |
106 else: | 108 else: |
107 line = Filter(content) | 109 line = Filter(content) |
108 | 110 |
109 assert line.to_string().replace(' ', '') == content.replace(' ', '') | 111 assert line.to_string().replace(' ', '') == content.replace(' ', '') |
110 return line | 112 return line |
111 | 113 |
112 | 114 |
113 def parse_filterlist(lines): | 115 def parse_filterlist(lines): |
114 """Parse filter list from an iterable. | 116 """Parse filter list from an iterable. |
115 | 117 |
116 :param lines: List of strings or file or other iterable. | 118 :param lines: List of strings or file or other iterable. |
117 :returns: Iterator over parsed lines. | 119 :returns: Iterator over parsed lines. |
118 :raises ParseError: Can be thrown during iteration for invalid lines. | 120 :raises ParseError: Can be thrown during iteration for invalid lines. |
119 """ | 121 """ |
120 for line in lines: | 122 for line in lines: |
121 yield parse_line(line) | 123 yield parse_line(line) |
OLD | NEW |