Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Delta Between Two Patch Sets: abp/filters/parser.py

Issue 29465715: Fixes 4969 - Add parsing of filters (Closed)
Left Patch Set: Address review comments on patch set 2 Created July 28, 2017, 6:52 p.m.
Right Patch Set: Rebase to 1f5d7ead9bff Created Oct. 24, 2017, 3:58 p.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
Left: Side by side diff | Download
Right: Side by side diff | Download
« no previous file with change/comment | « no previous file | tests/test_parser.py » ('j') | no next file with change/comment »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
LEFTRIGHT
1 # This file is part of Adblock Plus <https://adblockplus.org/>, 1 # This file is part of Adblock Plus <https://adblockplus.org/>,
2 # Copyright (C) 2006-2017 eyeo GmbH 2 # Copyright (C) 2006-present eyeo GmbH
3 # 3 #
4 # Adblock Plus is free software: you can redistribute it and/or modify 4 # Adblock Plus is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU General Public License version 3 as 5 # it under the terms of the GNU General Public License version 3 as
6 # published by the Free Software Foundation. 6 # published by the Free Software Foundation.
7 # 7 #
8 # Adblock Plus is distributed in the hope that it will be useful, 8 # Adblock Plus is distributed in the hope that it will be useful,
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of 9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # GNU General Public License for more details. 11 # GNU General Public License for more details.
12 # 12 #
13 # You should have received a copy of the GNU General Public License 13 # You should have received a copy of the GNU General Public License
14 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. 14 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.
15 15
16 from __future__ import unicode_literals 16 from __future__ import unicode_literals
17 17
18 import re 18 import re
19 from collections import namedtuple 19 from collections import namedtuple
20 20
21 __all__ = ['parse_filterlist', 'parse_line', 'ParseError', 21 __all__ = [
22 'SELECTOR_TYPE', 'FILTER_ACTION', 'FILTER_OPTION'] 22 'FILTER_ACTION',
23 'FILTER_OPTION',
24 'ParseError',
25 'SELECTOR_TYPE',
26 'parse_filterlist',
27 'parse_line',
28 ]
23 29
24 30
25 class ParseError(Exception): 31 class ParseError(Exception):
26 """Exception thrown by the parser when it encounters invalid input. 32 """Exception thrown by the parser when it encounters invalid input.
27 33
28 :param error: Description of the error. 34 :param error: Description of the error.
29 :param text: The text which was being parsed when an error occurred. 35 :param text: The text which was being parsed when an error occurred.
30 """ 36 """
31 37
32 def __init__(self, error, text): 38 def __init__(self, error, text):
(...skipping 49 matching lines...) Expand 10 before | Expand all | Expand 10 after
82 88
83 # Other options. 89 # Other options.
84 MATCH_CASE = 'match-case' 90 MATCH_CASE = 'match-case'
85 DOMAIN = 'domain' 91 DOMAIN = 'domain'
86 THIRD_PARTY = 'third-party' 92 THIRD_PARTY = 'third-party'
87 COLLAPSE = 'collapse' 93 COLLAPSE = 'collapse'
88 SITEKEY = 'sitekey' 94 SITEKEY = 'sitekey'
89 DONOTTRACK = 'donottrack' 95 DONOTTRACK = 'donottrack'
90 96
91 97
92 ALL_OPTIONS = {opt for name, opt in vars(FILTER_OPTION).items()
93 if not name.startswith('__')}
94
95
96 def _line_type(name, field_names, format_string): 98 def _line_type(name, field_names, format_string):
97 """Define a line type. 99 """Define a line type.
98 100
99 :param name: The name of the line type to define. 101 :param name: The name of the line type to define.
100 :param field_names: A sequence of field names or one space-separated 102 :param field_names: A sequence of field names or one space-separated
101 string that contains all field names. 103 string that contains all field names.
102 :param format_string: A format specifier for converting this line type 104 :param format_string: A format specifier for converting this line type
103 back to string representation. 105 back to string representation.
104 :returns: Class created with `namedtuple` that has `.type` set to 106 :returns: Class created with `namedtuple` that has `.type` set to
105 lowercased `name` and supports conversion back to string with 107 lowercased `name` and supports conversion back to string with
(...skipping 49 matching lines...) Expand 10 before | Expand all | Expand 10 after
155 if '=' in option: 157 if '=' in option:
156 return option.split('=', 1) 158 return option.split('=', 1)
157 if option.startswith('~'): 159 if option.startswith('~'):
158 return option[1:], False 160 return option[1:], False
159 return option, True 161 return option, True
160 162
161 163
162 def _parse_filter_option(option): 164 def _parse_filter_option(option):
163 name, value = _parse_option(option) 165 name, value = _parse_option(option)
164 166
165 if name not in ALL_OPTIONS:
mathias 2017/08/01 06:31:35 I don't think this part of the code should validat
Vasily Kuznetsov 2017/08/02 16:21:17 Following our conversation, I agree. Done
166 raise ParseError('Unrecognized option', name)
167
168 # Handle special cases of multivalued options. 167 # Handle special cases of multivalued options.
169 if name == FILTER_OPTION.DOMAIN: 168 if name == FILTER_OPTION.DOMAIN:
170 value = [_parse_option(o) for o in value.split('|')] 169 value = [_parse_option(o) for o in value.split('|')]
171 elif name == FILTER_OPTION.SITEKEY: 170 elif name == FILTER_OPTION.SITEKEY:
172 value = value.split('|') 171 value = value.split('|')
173 172
174 return name, value 173 return name, value
175 174
176 175
177 def _parse_filter_options(options, separator=','): 176 def _parse_filter_options(options):
mathias 2017/08/01 06:31:35 Why is the separator a parameter? The only place w
Vasily Kuznetsov 2017/08/02 16:21:17 This is left-over from an earlier version that use
178 return [_parse_filter_option(o) for o in options.split(separator)] 177 return [_parse_filter_option(o) for o in options.split(',')]
179 178
180 179
181 def _parse_blocking_filter(text): 180 def _parse_blocking_filter(text):
182 # Based on RegExpFilter.fromText in lib/filterClasses.js 181 # Based on RegExpFilter.fromText in lib/filterClasses.js
183 # in https://hg.adblockplus.org/adblockpluscore. 182 # in https://hg.adblockplus.org/adblockpluscore.
184 action = FILTER_ACTION.BLOCK 183 action = FILTER_ACTION.BLOCK
185 options = [] 184 options = []
186 selector = text 185 selector = text
187 186
188 if selector.startswith('@@'): 187 if selector.startswith('@@'):
(...skipping 71 matching lines...) Expand 10 before | Expand all | Expand 10 after
260 259
261 def parse_filterlist(lines): 260 def parse_filterlist(lines):
262 """Parse filter list from an iterable. 261 """Parse filter list from an iterable.
263 262
264 :param lines: List of strings or file or other iterable. 263 :param lines: List of strings or file or other iterable.
265 :returns: Iterator over parsed lines. 264 :returns: Iterator over parsed lines.
266 :raises ParseError: Can be thrown during iteration for invalid lines. 265 :raises ParseError: Can be thrown during iteration for invalid lines.
267 """ 266 """
268 for line in lines: 267 for line in lines:
269 yield parse_line(line) 268 yield parse_line(line)
LEFTRIGHT

Powered by Google App Engine
This is Rietveld