Left: | ||
Right: |
OLD | NEW |
---|---|
(Empty) | |
1 # This file is part of Adblock Plus <https://adblockplus.org/>, | |
2 # Copyright (C) 2006-present eyeo GmbH | |
3 # | |
4 # Adblock Plus is free software: you can redistribute it and/or modify | |
5 # it under the terms of the GNU General Public License version 3 as | |
6 # published by the Free Software Foundation. | |
7 # | |
8 # Adblock Plus is distributed in the hope that it will be useful, | |
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
11 # GNU General Public License for more details. | |
12 # | |
13 # You should have received a copy of the GNU General Public License | |
14 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. | |
15 | |
16 """Extract blocks of filters separated by comments. | |
17 | |
18 Blocks of filters separated by comments are common in real world filter lists | |
19 (e.g. easylist). This structure itself is not documented or standardized but | |
20 it's often useful to be able to parse it. | |
21 | |
22 This module exports one function: to_blocks(), that further processes a filter | |
23 list (after has been parsed by abp.filters.parser) by splitting it into blocks | |
24 of filters. The comments preceeding each block are merged to produce block | |
25 description. | |
26 | |
27 Some filter lists (e.g. ABP exception list) also make use of variable notation | |
28 ("!:varname=value") to define specific attributes of filters blocks. This | |
29 module supports this notation and will map those variables into attributes | |
30 of the block objects. If variables are present in comments preceeding a block, | |
31 only non-variable comments that follow the first variable declaration will be | |
32 included into the block description. | |
33 | |
34 Blocks also provide a method to convert them to dictionaries: .to_dict() -- | |
35 this can be used for JSON conversion. | |
36 | |
37 Example | |
38 ------- | |
39 | |
40 The following code will dump the blocks as dictionaries: | |
41 | |
42 from abp.filters import parse_filterlist | |
43 from abp.filters.blocks import to_blocks | |
44 | |
45 with open(fl_path) as f: | |
46 for block in to_blocks(parse_filterlist(f)): | |
47 print(block.to_dict()) | |
48 | |
49 This will produce output like this: | |
50 | |
51 {'partner_token': 'abc', 'partner_id': '3372', 'type': 'partner', | |
52 'description': 'Some comments', 'filters': [...]} | |
53 | |
54 """ | |
55 | |
56 from __future__ import unicode_literals | |
57 | |
58 import re | |
59 | |
60 from abp.filters.parser import ParseError | |
61 | |
62 __all__ = ['to_blocks'] | |
63 | |
64 VAR_REGEXP = re.compile(r'^:(\w+)=(.*)$') | |
65 | |
66 | |
67 class FiltersBlock(object): | |
68 """A block of filters (preceded by comments).""" | |
69 | |
70 @staticmethod | |
71 def _validate_varname(name, comment): | |
72 """Check that variable name is allowed, otherwise raise.""" | |
73 if name.startswith('_') or name in {'filters', 'description'}: | |
Tudor Avram
2019/05/09 15:51:03
If we're doing the validation, I guess it might ma
Vasily Kuznetsov
2019/05/09 15:59:42
Good catch, thanks. This was implemented before .t
Vasily Kuznetsov
2019/05/09 16:24:17
Now this validation is no longer needed.
| |
74 raise ParseError('Invalid variable name', comment.to_string()) | |
75 | |
76 def __init__(self, comments, filters): | |
77 """Create a filter block from filters and comments preceding them.""" | |
78 descr_lines = [] | |
79 vars_started = False | |
80 for comment in comments: | |
81 match = VAR_REGEXP.search(comment.text) | |
82 if match: | |
83 name, value = match.groups() | |
84 self._validate_varname(name, comment) | |
85 setattr(self, name, value) | |
Tudor Avram
2019/05/09 15:51:03
Any reason why you decided on adding the variables
Vasily Kuznetsov
2019/05/09 15:59:42
Edmundo: I think I agree with Tudor here, what do
Vasily Kuznetsov
2019/05/09 16:24:17
Done.
| |
86 if not vars_started: | |
87 # Any comments before the first variable, if any, are not | |
88 # considered part of the block. | |
89 descr_lines = [] | |
90 vars_started = True | |
91 else: | |
92 descr_lines.append(comment.text) | |
93 | |
94 self.description = '\n'.join(descr_lines) | |
95 self.filters = filters | |
96 | |
97 def to_dict(self): | |
98 ret = dict(self.__dict__) | |
99 ret['filters'] = [f.to_dict() for f in ret['filters']] | |
100 return ret | |
101 | |
102 | |
103 def to_blocks(parsed_lines): | |
104 """Convert a sequence of parser filter list lines to blocks. | |
105 | |
106 Parameters | |
107 ---------- | |
108 parsed_lines : iterable of namedtuple | |
109 Parsed filter list (see `parser.py` for details on how it's | |
110 represented). | |
111 | |
112 Returns | |
113 ------- | |
114 blocks : iterable of FiltersBlock. | |
Tudor Avram
2019/05/09 15:51:03
Nit: It looks like you're returning a generator. T
Vasily Kuznetsov
2019/05/09 15:59:43
This is true but I didn't want to constrain the im
| |
115 Blocks extracted from the parsed filter list. | |
116 | |
117 """ | |
118 comments = [] | |
119 filters = [] | |
120 | |
121 for line in parsed_lines: | |
122 if line.type == 'comment': | |
123 if filters: | |
124 yield FiltersBlock(comments, filters) | |
125 comments = [] | |
126 filters = [] | |
127 comments.append(line) | |
128 elif line.type == 'filter': | |
129 filters.append(line) | |
130 | |
131 if filters: | |
132 yield FiltersBlock(comments, filters) | |
OLD | NEW |