Left: | ||
Right: |
LEFT | RIGHT |
---|---|
1 # This file is part of Adblock Plus <https://adblockplus.org/>, | 1 # This file is part of Adblock Plus <https://adblockplus.org/>, |
2 # Copyright (C) 2006-present eyeo GmbH | 2 # Copyright (C) 2006-present eyeo GmbH |
3 # | 3 # |
4 # Adblock Plus is free software: you can redistribute it and/or modify | 4 # Adblock Plus is free software: you can redistribute it and/or modify |
5 # it under the terms of the GNU General Public License version 3 as | 5 # it under the terms of the GNU General Public License version 3 as |
6 # published by the Free Software Foundation. | 6 # published by the Free Software Foundation. |
7 # | 7 # |
8 # Adblock Plus is distributed in the hope that it will be useful, | 8 # Adblock Plus is distributed in the hope that it will be useful, |
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of | 9 # but WITHOUT ANY WARRANTY; without even the implied warranty of |
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | 10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
11 # GNU General Public License for more details. | 11 # GNU General Public License for more details. |
12 # | 12 # |
13 # You should have received a copy of the GNU General Public License | 13 # You should have received a copy of the GNU General Public License |
14 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. | 14 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. |
15 | 15 |
16 """Combine filter list fragments to produce filter lists.""" | 16 """Combine filter list fragments to produce filter lists.""" |
17 | 17 |
18 from __future__ import unicode_literals | 18 from __future__ import unicode_literals |
19 | 19 |
20 import base64 | |
21 import hashlib | |
22 import itertools | 20 import itertools |
23 import logging | 21 import logging |
24 import time | 22 import time |
25 | 23 |
26 from .parser import parse_filterlist, Comment, Metadata | 24 from .parser import parse_filterlist, Comment, Metadata |
27 from .sources import NotFound | 25 from .sources import NotFound |
28 | 26 |
29 __all__ = ['IncludeError', 'MissingHeader', 'render_filterlist'] | 27 __all__ = ['IncludeError', 'MissingHeader', 'render_filterlist', 'render_diff'] |
Vasily Kuznetsov
2018/08/17 10:30:54
render_diff() should probably become a public expo
rhowell
2018/08/20 18:21:27
Done.
| |
30 | 28 |
31 _logger = logging.getLogger(__name__) | 29 _logger = logging.getLogger(__name__) |
32 | 30 |
33 | 31 |
34 class IncludeError(Exception): | 32 class IncludeError(Exception): |
35 """Error in processing include instruction.""" | 33 """Error in processing include instruction.""" |
36 | 34 |
37 def __init__(self, error, stack): | 35 def __init__(self, error, stack): |
38 stack_str = ' from '.join(map("'{}'".format, reversed(stack))) | 36 stack_str = ' from '.join(map("'{}'".format, reversed(stack))) |
39 if stack_str: | 37 if stack_str: |
(...skipping 79 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
119 """Insert metadata comment with version (a.k.a. date).""" | 117 """Insert metadata comment with version (a.k.a. date).""" |
120 first_line, rest = _first_and_rest(lines) | 118 first_line, rest = _first_and_rest(lines) |
121 version = Metadata('Version', time.strftime('%Y%m%d%H%M', time.gmtime())) | 119 version = Metadata('Version', time.strftime('%Y%m%d%H%M', time.gmtime())) |
122 return itertools.chain([first_line, version], rest) | 120 return itertools.chain([first_line, version], rest) |
123 | 121 |
124 | 122 |
125 def _remove_duplicates(lines): | 123 def _remove_duplicates(lines): |
126 """Remove duplicate metadata and headers.""" | 124 """Remove duplicate metadata and headers.""" |
127 # Always remove checksum -- a checksum coming from a fragment | 125 # Always remove checksum -- a checksum coming from a fragment |
128 # will not match for the rendered list. | 126 # will not match for the rendered list. |
129 seen = {'Checksum'} | 127 seen = {'checksum'} |
130 for i, line in enumerate(lines): | 128 for i, line in enumerate(lines): |
131 if line.type == 'metadata': | 129 if line.type == 'metadata': |
132 if line.key not in seen: | 130 key = line.key.lower() |
133 seen.add(line.key) | 131 if key not in seen: |
132 seen.add(key) | |
134 yield line | 133 yield line |
135 elif line.type == 'header': | 134 elif line.type == 'header': |
136 if i == 0: | 135 if i == 0: |
137 yield line | 136 yield line |
138 else: | 137 else: |
139 yield line | 138 yield line |
140 | |
141 | |
142 def _insert_checksum(lines): | |
143 """Add checksum to the filter list. | |
144 | |
145 See https://adblockplus.org/filters#special-comments for description | |
146 of the checksum algorithm. | |
147 """ | |
148 md5sum = hashlib.md5() | |
149 | |
150 for line in lines: | |
151 if line.type != 'emptyline': | |
152 md5sum.update(line.to_string().encode('utf-8') + b'\n') | |
153 yield line | |
154 | |
155 checksum = base64.b64encode(md5sum.digest()).rstrip(b'=') | |
156 yield Metadata('Checksum', checksum.decode('utf-8')) | |
157 | 139 |
158 | 140 |
159 def _validate(lines): | 141 def _validate(lines): |
160 """Validate the final list.""" | 142 """Validate the final list.""" |
161 first_line, rest = _first_and_rest(lines) | 143 first_line, rest = _first_and_rest(lines) |
162 if first_line.type != 'header': | 144 if first_line.type != 'header': |
163 raise MissingHeader('No header found at the beginning of the input.') | 145 raise MissingHeader('No header found at the beginning of the input.') |
164 return itertools.chain([first_line], rest) | 146 return itertools.chain([first_line], rest) |
165 | 147 |
166 | 148 |
(...skipping 22 matching lines...) Expand all Loading... | |
189 When any of the fragments contain lines that can't be parsed. | 171 When any of the fragments contain lines that can't be parsed. |
190 MissingHeader | 172 MissingHeader |
191 If the top level fragment doesn't start with a valid header. This would | 173 If the top level fragment doesn't start with a valid header. This would |
192 lead to rendering an invalid filter list, so we immediately abort. | 174 lead to rendering an invalid filter list, so we immediately abort. |
193 | 175 |
194 """ | 176 """ |
195 _logger.info('Rendering: %s', name) | 177 _logger.info('Rendering: %s', name) |
196 lines, default_source = _get_and_parse_fragment(name, sources, top_source) | 178 lines, default_source = _get_and_parse_fragment(name, sources, top_source) |
197 lines = _process_includes(sources, default_source, [name], lines) | 179 lines = _process_includes(sources, default_source, [name], lines) |
198 for proc in [_process_timestamps, _insert_version, _remove_duplicates, | 180 for proc in [_process_timestamps, _insert_version, _remove_duplicates, |
199 _insert_checksum, _validate]: | 181 _validate]: |
200 lines = proc(lines) | 182 lines = proc(lines) |
201 return lines | 183 return lines |
202 | 184 |
203 | 185 |
186 def _split_list_for_diff(list_in): | |
187 """Split a filter list into metadata, keys, and rules.""" | |
Vasily Kuznetsov
2018/08/31 12:11:02
Nit: strictly speaking this function now returns m
rhowell
2018/08/31 21:21:02
Ah, good point! I'll update that.
| |
188 metadata = {} | |
189 rules = set() | |
190 for line in parse_filterlist(list_in): | |
191 if line.type == 'metadata': | |
192 metadata[line.key.lower()] = line | |
193 elif line.type == 'filter': | |
194 rules.add(line.to_string()) | |
195 return metadata, rules | |
196 | |
197 | |
204 def render_diff(base, latest): | 198 def render_diff(base, latest): |
Vasily Kuznetsov
2018/08/17 10:30:54
I think it makes more sense to make `base`, `lates
rhowell
2018/08/20 18:21:27
Done.
| |
205 """Return a diff between two filter lists.""" | 199 """Return a diff between two filter lists. |
206 # Collect the special comments | 200 |
207 diff = ['[Adblock Plus Diff]\n'] | 201 Parameters |
208 latest_fl, latest_md, latest_keys = (set() for i in range(3)) | 202 ---------- |
209 base_fl, base_md, base_keys = (set() for i in range(3)) | 203 base : iterator of str |
210 | 204 The base (old) list that we want to update to latest. |
211 for line in parse_filterlist(latest.splitlines()): | 205 lastest : iterator of str |
212 if line.type == 'metadata' and 'Checksum' not in line.to_string(): | 206 The latest (most recent) list that we want to update to. |
Vasily Kuznetsov
2018/08/17 10:30:53
Note: If this lands after the checksum patch, the
rhowell
2018/08/20 18:21:27
Are we guaranteed that no filter lists will be enc
Vasily Kuznetsov
2018/08/21 14:59:59
Hm. No, for now we're not guaranteed that there's
Sebastian Noack
2018/08/21 19:42:45
Isn't python-abp stripping checksums when generati
Sebastian Noack
2018/08/22 16:03:29
Moving the discussion from IRC over here:
16:34:4
| |
213 latest_md.add(line.to_string()) | 207 |
214 latest_keys.add(line.key) | 208 Returns |
215 elif line.type == 'filter': | 209 ------- |
216 latest_fl.add(line.to_string()) | 210 iterable of str |
217 | 211 A diff between two lists (https://issues.adblockplus.org/ticket/6685) |
218 # Get the diff between the rest of the lines | 212 |
219 for line in parse_filterlist(base.splitlines()): | 213 """ |
Vasily Kuznetsov
2018/08/17 10:30:53
It would be good to move this repeated piece of co
rhowell
2018/08/20 18:21:27
Done.
| |
220 if line.type == 'metadata' and 'Checksum' not in line.to_string(): | 214 latest_metadata, latest_rules = _split_list_for_diff(latest) |
221 base_md.add(line.to_string()) | 215 base_metadata, base_rules = _split_list_for_diff(base) |
222 base_keys.add(line.key) | 216 |
223 elif line.type == 'filter': | 217 yield '[Adblock Plus Diff]' |
224 base_fl.add(line.to_string()) | 218 for key, latest in latest_metadata.items(): |
225 new_md = latest_md - base_md | 219 base = base_metadata.get(key) |
226 removed_keys = base_keys - latest_keys | 220 if not base or base.value != latest.value: |
227 add_fl = latest_fl - base_fl | 221 yield latest.to_string() |
228 remove_fl = base_fl - latest_fl | 222 for key in set(base_metadata) - set(latest_metadata): |
229 for item in new_md: | 223 yield '! {}:'.format(base_metadata[key].key) |
230 diff.append('{}\n'.format(item)) | 224 for rule in base_rules - latest_rules: |
Vasily Kuznetsov
2018/08/17 10:30:53
If we return an iterable of strings from this func
rhowell
2018/08/20 18:21:27
Done.
| |
231 for key in removed_keys: | 225 yield '- {}'.format(rule) |
232 # If a special comment has been removed, enter it with a blank value | 226 for rule in latest_rules - base_rules: |
233 # so the client will set it back to the default value | 227 yield '+ {}'.format(rule) |
234 diff.append('! {}:\n'.format(key)) | |
235 for item in add_fl: | |
236 diff.append('+ {}\n'.format(item)) | |
237 for item in remove_fl: | |
238 diff.append('- {}\n'.format(item)) | |
239 return ''.join(diff) | |
LEFT | RIGHT |