Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Side by Side Diff: abp/filters/renderer.py

Issue 29845767: Issue 6685 - Offer incremental filter list downloads (Closed) Base URL: https://hg.adblockplus.org/python-abp/
Patch Set: Remove metadata_keys, yield deletions first Created Aug. 27, 2018, 10:04 p.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View unified diff | Download patch
OLDNEW
1 # This file is part of Adblock Plus <https://adblockplus.org/>, 1 # This file is part of Adblock Plus <https://adblockplus.org/>,
2 # Copyright (C) 2006-present eyeo GmbH 2 # Copyright (C) 2006-present eyeo GmbH
3 # 3 #
4 # Adblock Plus is free software: you can redistribute it and/or modify 4 # Adblock Plus is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU General Public License version 3 as 5 # it under the terms of the GNU General Public License version 3 as
6 # published by the Free Software Foundation. 6 # published by the Free Software Foundation.
7 # 7 #
8 # Adblock Plus is distributed in the hope that it will be useful, 8 # Adblock Plus is distributed in the hope that it will be useful,
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of 9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # GNU General Public License for more details. 11 # GNU General Public License for more details.
12 # 12 #
13 # You should have received a copy of the GNU General Public License 13 # You should have received a copy of the GNU General Public License
14 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. 14 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.
15 15
16 """Combine filter list fragments to produce filter lists.""" 16 """Combine filter list fragments to produce filter lists."""
17 17
18 from __future__ import unicode_literals 18 from __future__ import unicode_literals
19 19
20 import itertools 20 import itertools
21 import logging 21 import logging
22 import time 22 import time
23 23
24 from .parser import parse_filterlist, Comment, Metadata 24 from .parser import parse_filterlist, Comment, Metadata
25 from .sources import NotFound 25 from .sources import NotFound
26 26
27 __all__ = ['IncludeError', 'MissingHeader', 'render_filterlist'] 27 __all__ = ['IncludeError', 'MissingHeader', 'render_filterlist', 'render_diff']
28 28
29 _logger = logging.getLogger(__name__) 29 _logger = logging.getLogger(__name__)
30 30
31 31
32 class IncludeError(Exception): 32 class IncludeError(Exception):
33 """Error in processing include instruction.""" 33 """Error in processing include instruction."""
34 34
35 def __init__(self, error, stack): 35 def __init__(self, error, stack):
36 stack_str = ' from '.join(map("'{}'".format, reversed(stack))) 36 stack_str = ' from '.join(map("'{}'".format, reversed(stack)))
37 if stack_str: 37 if stack_str:
(...skipping 79 matching lines...) Expand 10 before | Expand all | Expand 10 after
117 """Insert metadata comment with version (a.k.a. date).""" 117 """Insert metadata comment with version (a.k.a. date)."""
118 first_line, rest = _first_and_rest(lines) 118 first_line, rest = _first_and_rest(lines)
119 version = Metadata('Version', time.strftime('%Y%m%d%H%M', time.gmtime())) 119 version = Metadata('Version', time.strftime('%Y%m%d%H%M', time.gmtime()))
120 return itertools.chain([first_line, version], rest) 120 return itertools.chain([first_line, version], rest)
121 121
122 122
123 def _remove_duplicates(lines): 123 def _remove_duplicates(lines):
124 """Remove duplicate metadata and headers.""" 124 """Remove duplicate metadata and headers."""
125 # Always remove checksum -- a checksum coming from a fragment 125 # Always remove checksum -- a checksum coming from a fragment
126 # will not match for the rendered list. 126 # will not match for the rendered list.
127 seen = {'Checksum'} 127 seen = {'Checksum'}
Sebastian Noack 2018/08/28 19:52:18 Since we parse metadata with arbitrary keys now, w
rhowell 2018/08/29 21:43:34 Done.
128 for i, line in enumerate(lines): 128 for i, line in enumerate(lines):
129 if line.type == 'metadata': 129 if line.type == 'metadata':
130 if line.key not in seen: 130 if line.key not in seen:
131 seen.add(line.key) 131 seen.add(line.key)
132 yield line 132 yield line
133 elif line.type == 'header': 133 elif line.type == 'header':
134 if i == 0: 134 if i == 0:
135 yield line 135 yield line
136 else: 136 else:
137 yield line 137 yield line
(...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after
173 lead to rendering an invalid filter list, so we immediately abort. 173 lead to rendering an invalid filter list, so we immediately abort.
174 174
175 """ 175 """
176 _logger.info('Rendering: %s', name) 176 _logger.info('Rendering: %s', name)
177 lines, default_source = _get_and_parse_fragment(name, sources, top_source) 177 lines, default_source = _get_and_parse_fragment(name, sources, top_source)
178 lines = _process_includes(sources, default_source, [name], lines) 178 lines = _process_includes(sources, default_source, [name], lines)
179 for proc in [_process_timestamps, _insert_version, _remove_duplicates, 179 for proc in [_process_timestamps, _insert_version, _remove_duplicates,
180 _validate]: 180 _validate]:
181 lines = proc(lines) 181 lines = proc(lines)
182 return lines 182 return lines
183
184
185 def _split_list_for_diff(list_in):
186 filterlist, metadata, keys = set(), set(), set()
187 for line in parse_filterlist(list_in):
188 if line.type == 'metadata' and 'Checksum' not in line.to_string():
Sebastian Noack 2018/08/28 19:52:18 I think it has been agreed on that this isn't nece
rhowell 2018/08/29 21:43:35 Done.
189 metadata.add(line.to_string())
190 keys.add(line.key)
Sebastian Noack 2018/08/28 19:52:18 This code has to be changed as well, to handle cas
rhowell 2018/08/29 21:43:34 Any reason to use a dict instead of a set? I guess
Sebastian Noack 2018/08/29 21:56:37 The algorithm I suggest here, is the (probably) si
rhowell 2018/08/29 22:52:23 Done.
191 elif line.type == 'filter':
192 filterlist.add(line.to_string())
193 return filterlist, metadata, keys
194
195
196 def render_diff(base, latest):
197 """Return a diff between two filter lists.
198
199 Parameters
200 ----------
201 base : iterator of str
202 The base (old) list that we want to update to latest.
203 lastest : iterator of str
204 The latest (most recent) list that we want to update to.
205
206 Returns
207 -------
208 iterable of str
209 A diff between two lists (https://issues.adblockplus.org/ticket/6685)
210
211 """
212 latest_fl, latest_md, latest_keys = _split_list_for_diff(latest)
213 base_fl, base_md, base_keys = _split_list_for_diff(base)
214
215 new_md = latest_md - base_md
216 removed_keys = base_keys - latest_keys
217 add_fl = latest_fl - base_fl
218 remove_fl = base_fl - latest_fl
219
220 yield '[Adblock Plus Diff]'
221 for item in new_md:
222 yield item
223 for key in removed_keys:
224 # If a special comment has been removed, enter it with a blank value
225 # so the client will set it back to the default value
226 yield '! {}:'.format(key)
227 for item in remove_fl:
228 yield '- {}'.format(item)
229 for item in add_fl:
230 yield '+ {}'.format(item)
OLDNEW
« no previous file with comments | « abp/filters/parser.py ('k') | tests/test_differ.py » ('j') | tests/test_parser.py » ('J')

Powered by Google App Engine
This is Rietveld