Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Delta Between Two Patch Sets: abp/filters/renderer.py

Issue 29845767: Issue 6685 - Offer incremental filter list downloads (Closed) Base URL: https://hg.adblockplus.org/python-abp/
Left Patch Set: Use namedtuple filter list objects instead of strings Created Aug. 9, 2018, 7:26 p.m.
Right Patch Set: Address comments on PS8 Created Aug. 30, 2018, 5:37 p.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
Left: Side by side diff | Download
Right: Side by side diff | Download
« no previous file with change/comment | « abp/filters/parser.py ('k') | tests/test_differ.py » ('j') | no next file with change/comment »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
LEFTRIGHT
1 # This file is part of Adblock Plus <https://adblockplus.org/>, 1 # This file is part of Adblock Plus <https://adblockplus.org/>,
2 # Copyright (C) 2006-present eyeo GmbH 2 # Copyright (C) 2006-present eyeo GmbH
3 # 3 #
4 # Adblock Plus is free software: you can redistribute it and/or modify 4 # Adblock Plus is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU General Public License version 3 as 5 # it under the terms of the GNU General Public License version 3 as
6 # published by the Free Software Foundation. 6 # published by the Free Software Foundation.
7 # 7 #
8 # Adblock Plus is distributed in the hope that it will be useful, 8 # Adblock Plus is distributed in the hope that it will be useful,
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of 9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # GNU General Public License for more details. 11 # GNU General Public License for more details.
12 # 12 #
13 # You should have received a copy of the GNU General Public License 13 # You should have received a copy of the GNU General Public License
14 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. 14 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.
15 15
16 """Combine filter list fragments to produce filter lists.""" 16 """Combine filter list fragments to produce filter lists."""
17 17
18 from __future__ import unicode_literals 18 from __future__ import unicode_literals
19 19
20 import base64
21 import hashlib
22 import itertools 20 import itertools
23 import logging 21 import logging
24 import time 22 import time
25 23
26 from .parser import parse_filterlist, Comment, Metadata 24 from .parser import parse_filterlist, Comment, Metadata
27 from .sources import NotFound 25 from .sources import NotFound
28 26
29 __all__ = ['IncludeError', 'MissingHeader', 'render_filterlist'] 27 __all__ = ['IncludeError', 'MissingHeader', 'render_filterlist', 'render_diff']
Vasily Kuznetsov 2018/08/17 10:30:54 render_diff() should probably become a public expo
rhowell 2018/08/20 18:21:27 Done.
30 28
31 _logger = logging.getLogger(__name__) 29 _logger = logging.getLogger(__name__)
32 30
33 31
34 class IncludeError(Exception): 32 class IncludeError(Exception):
35 """Error in processing include instruction.""" 33 """Error in processing include instruction."""
36 34
37 def __init__(self, error, stack): 35 def __init__(self, error, stack):
38 stack_str = ' from '.join(map("'{}'".format, reversed(stack))) 36 stack_str = ' from '.join(map("'{}'".format, reversed(stack)))
39 if stack_str: 37 if stack_str:
(...skipping 79 matching lines...) Expand 10 before | Expand all | Expand 10 after
119 """Insert metadata comment with version (a.k.a. date).""" 117 """Insert metadata comment with version (a.k.a. date)."""
120 first_line, rest = _first_and_rest(lines) 118 first_line, rest = _first_and_rest(lines)
121 version = Metadata('Version', time.strftime('%Y%m%d%H%M', time.gmtime())) 119 version = Metadata('Version', time.strftime('%Y%m%d%H%M', time.gmtime()))
122 return itertools.chain([first_line, version], rest) 120 return itertools.chain([first_line, version], rest)
123 121
124 122
125 def _remove_duplicates(lines): 123 def _remove_duplicates(lines):
126 """Remove duplicate metadata and headers.""" 124 """Remove duplicate metadata and headers."""
127 # Always remove checksum -- a checksum coming from a fragment 125 # Always remove checksum -- a checksum coming from a fragment
128 # will not match for the rendered list. 126 # will not match for the rendered list.
129 seen = {'Checksum'} 127 seen = {'checksum'}
130 for i, line in enumerate(lines): 128 for i, line in enumerate(lines):
131 if line.type == 'metadata': 129 if line.type == 'metadata':
132 if line.key not in seen: 130 key = line.key.lower()
133 seen.add(line.key) 131 if key not in seen:
132 seen.add(key)
134 yield line 133 yield line
135 elif line.type == 'header': 134 elif line.type == 'header':
136 if i == 0: 135 if i == 0:
137 yield line 136 yield line
138 else: 137 else:
139 yield line 138 yield line
140
141
142 def _insert_checksum(lines):
143 """Add checksum to the filter list.
144
145 See https://adblockplus.org/filters#special-comments for description
146 of the checksum algorithm.
147 """
148 md5sum = hashlib.md5()
149
150 for line in lines:
151 if line.type != 'emptyline':
152 md5sum.update(line.to_string().encode('utf-8') + b'\n')
153 yield line
154
155 checksum = base64.b64encode(md5sum.digest()).rstrip(b'=')
156 yield Metadata('Checksum', checksum.decode('utf-8'))
157 139
158 140
159 def _validate(lines): 141 def _validate(lines):
160 """Validate the final list.""" 142 """Validate the final list."""
161 first_line, rest = _first_and_rest(lines) 143 first_line, rest = _first_and_rest(lines)
162 if first_line.type != 'header': 144 if first_line.type != 'header':
163 raise MissingHeader('No header found at the beginning of the input.') 145 raise MissingHeader('No header found at the beginning of the input.')
164 return itertools.chain([first_line], rest) 146 return itertools.chain([first_line], rest)
165 147
166 148
(...skipping 22 matching lines...) Expand all
189 When any of the fragments contain lines that can't be parsed. 171 When any of the fragments contain lines that can't be parsed.
190 MissingHeader 172 MissingHeader
191 If the top level fragment doesn't start with a valid header. This would 173 If the top level fragment doesn't start with a valid header. This would
192 lead to rendering an invalid filter list, so we immediately abort. 174 lead to rendering an invalid filter list, so we immediately abort.
193 175
194 """ 176 """
195 _logger.info('Rendering: %s', name) 177 _logger.info('Rendering: %s', name)
196 lines, default_source = _get_and_parse_fragment(name, sources, top_source) 178 lines, default_source = _get_and_parse_fragment(name, sources, top_source)
197 lines = _process_includes(sources, default_source, [name], lines) 179 lines = _process_includes(sources, default_source, [name], lines)
198 for proc in [_process_timestamps, _insert_version, _remove_duplicates, 180 for proc in [_process_timestamps, _insert_version, _remove_duplicates,
199 _insert_checksum, _validate]: 181 _validate]:
200 lines = proc(lines) 182 lines = proc(lines)
201 return lines 183 return lines
202 184
203 185
186 def _split_list_for_diff(list_in):
187 """Split a filter list into metadata, keys, and rules."""
Vasily Kuznetsov 2018/08/31 12:11:02 Nit: strictly speaking this function now returns m
rhowell 2018/08/31 21:21:02 Ah, good point! I'll update that.
188 metadata = {}
189 rules = set()
190 for line in parse_filterlist(list_in):
191 if line.type == 'metadata':
192 metadata[line.key.lower()] = line
193 elif line.type == 'filter':
194 rules.add(line.to_string())
195 return metadata, rules
196
197
204 def render_diff(base, latest): 198 def render_diff(base, latest):
Vasily Kuznetsov 2018/08/17 10:30:54 I think it makes more sense to make `base`, `lates
rhowell 2018/08/20 18:21:27 Done.
205 """Return a diff between two filter lists.""" 199 """Return a diff between two filter lists.
206 # Collect the special comments 200
207 diff = ['[Adblock Plus Diff]\n'] 201 Parameters
208 latest_fl, latest_md, latest_keys = (set() for i in range(3)) 202 ----------
209 base_fl, base_md, base_keys = (set() for i in range(3)) 203 base : iterator of str
210 204 The base (old) list that we want to update to latest.
211 for line in parse_filterlist(latest.splitlines()): 205 lastest : iterator of str
212 if line.type == 'metadata' and 'Checksum' not in line.to_string(): 206 The latest (most recent) list that we want to update to.
Vasily Kuznetsov 2018/08/17 10:30:53 Note: If this lands after the checksum patch, the
rhowell 2018/08/20 18:21:27 Are we guaranteed that no filter lists will be enc
Vasily Kuznetsov 2018/08/21 14:59:59 Hm. No, for now we're not guaranteed that there's
Sebastian Noack 2018/08/21 19:42:45 Isn't python-abp stripping checksums when generati
Sebastian Noack 2018/08/22 16:03:29 Moving the discussion from IRC over here: 16:34:4
213 latest_md.add(line.to_string()) 207
214 latest_keys.add(line.key) 208 Returns
215 elif line.type == 'filter': 209 -------
216 latest_fl.add(line.to_string()) 210 iterable of str
217 211 A diff between two lists (https://issues.adblockplus.org/ticket/6685)
218 # Get the diff between the rest of the lines 212
219 for line in parse_filterlist(base.splitlines()): 213 """
Vasily Kuznetsov 2018/08/17 10:30:53 It would be good to move this repeated piece of co
rhowell 2018/08/20 18:21:27 Done.
220 if line.type == 'metadata' and 'Checksum' not in line.to_string(): 214 latest_metadata, latest_rules = _split_list_for_diff(latest)
221 base_md.add(line.to_string()) 215 base_metadata, base_rules = _split_list_for_diff(base)
222 base_keys.add(line.key) 216
223 elif line.type == 'filter': 217 yield '[Adblock Plus Diff]'
224 base_fl.add(line.to_string()) 218 for key, latest in latest_metadata.items():
225 new_md = latest_md - base_md 219 base = base_metadata.get(key)
226 removed_keys = base_keys - latest_keys 220 if not base or base.value != latest.value:
227 add_fl = latest_fl - base_fl 221 yield latest.to_string()
228 remove_fl = base_fl - latest_fl 222 for key in set(base_metadata) - set(latest_metadata):
229 for item in new_md: 223 yield '! {}:'.format(base_metadata[key].key)
230 diff.append('{}\n'.format(item)) 224 for rule in base_rules - latest_rules:
Vasily Kuznetsov 2018/08/17 10:30:53 If we return an iterable of strings from this func
rhowell 2018/08/20 18:21:27 Done.
231 for key in removed_keys: 225 yield '- {}'.format(rule)
232 # If a special comment has been removed, enter it with a blank value 226 for rule in latest_rules - base_rules:
233 # so the client will set it back to the default value 227 yield '+ {}'.format(rule)
234 diff.append('! {}:\n'.format(key))
235 for item in add_fl:
236 diff.append('+ {}\n'.format(item))
237 for item in remove_fl:
238 diff.append('- {}\n'.format(item))
239 return ''.join(diff)
LEFTRIGHT

Powered by Google App Engine
This is Rietveld