abp/filters/renderer.py - Issue 29845767: Issue 6685 - Offer incremental filter list downloads

Side by Side Diff: abp/filters/renderer.py

Issue 29845767: Issue 6685 - Offer incremental filter list downloads (Closed) Base URL: https://hg.adblockplus.org/python-abp/

Patch Set: Store metadata in a dict Created Aug. 29, 2018, 10:51 p.m.

Left:
Right:

Use n/p to move between diff chunks; N/P to move between comments.

Jump to:

View unified diff | Download patch

OLD	NEW
1 # This file is part of Adblock Plus <https://adblockplus.org/>,	1 # This file is part of Adblock Plus <https://adblockplus.org/>,

2 # Copyright (C) 2006-present eyeo GmbH	2 # Copyright (C) 2006-present eyeo GmbH

3 #	3 #

4 # Adblock Plus is free software: you can redistribute it and/or modify	4 # Adblock Plus is free software: you can redistribute it and/or modify

5 # it under the terms of the GNU General Public License version 3 as	5 # it under the terms of the GNU General Public License version 3 as

6 # published by the Free Software Foundation.	6 # published by the Free Software Foundation.

7 #	7 #

8 # Adblock Plus is distributed in the hope that it will be useful,	8 # Adblock Plus is distributed in the hope that it will be useful,

9 # but WITHOUT ANY WARRANTY; without even the implied warranty of	9 # but WITHOUT ANY WARRANTY; without even the implied warranty of

10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the	10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

11 # GNU General Public License for more details.	11 # GNU General Public License for more details.

12 #	12 #

13 # You should have received a copy of the GNU General Public License	13 # You should have received a copy of the GNU General Public License

14 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.	14 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.

15	15

16 """Combine filter list fragments to produce filter lists."""	16 """Combine filter list fragments to produce filter lists."""

17	17

18 from __future__ import unicode_literals	18 from __future__ import unicode_literals

19	19

20 import itertools	20 import itertools

21 import logging	21 import logging

22 import time	22 import time

23	23

24 from .parser import parse_filterlist, Comment, Metadata	24 from .parser import parse_filterlist, Comment, Metadata

25 from .sources import NotFound	25 from .sources import NotFound

26	26

27 __all__ = ['IncludeError', 'MissingHeader', 'render_filterlist']	27 __all__ = ['IncludeError', 'MissingHeader', 'render_filterlist', 'render_diff']

28	28

29 _logger = logging.getLogger(__name__)	29 _logger = logging.getLogger(__name__)

30	30

31	31

32 class IncludeError(Exception):	32 class IncludeError(Exception):

33 """Error in processing include instruction."""	33 """Error in processing include instruction."""

34	34

35 def __init__(self, error, stack):	35 def __init__(self, error, stack):

36 stack_str = ' from '.join(map("'{}'".format, reversed(stack)))	36 stack_str = ' from '.join(map("'{}'".format, reversed(stack)))

37 if stack_str:	37 if stack_str:

(...skipping 79 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
117 """Insert metadata comment with version (a.k.a. date)."""	117 """Insert metadata comment with version (a.k.a. date)."""

118 first_line, rest = _first_and_rest(lines)	118 first_line, rest = _first_and_rest(lines)

119 version = Metadata('Version', time.strftime('%Y%m%d%H%M', time.gmtime()))	119 version = Metadata('Version', time.strftime('%Y%m%d%H%M', time.gmtime()))

120 return itertools.chain([first_line, version], rest)	120 return itertools.chain([first_line, version], rest)

121	121

122	122

123 def _remove_duplicates(lines):	123 def _remove_duplicates(lines):

124 """Remove duplicate metadata and headers."""	124 """Remove duplicate metadata and headers."""

125 # Always remove checksum -- a checksum coming from a fragment	125 # Always remove checksum -- a checksum coming from a fragment

126 # will not match for the rendered list.	126 # will not match for the rendered list.

127 seen = {'Checksum'}	127 seen = {'checksum'}

128 for i, line in enumerate(lines):	128 for i, line in enumerate(lines):

129 if line.type == 'metadata':	129 if line.type == 'metadata':

130 if line.key not in seen:	130 key = line.key.lower()

131 seen.add(line.key)	131 if key not in seen:

	132 seen.add(key)

132 yield line	133 yield line

133 elif line.type == 'header':	134 elif line.type == 'header':

134 if i == 0:	135 if i == 0:

135 yield line	136 yield line

136 else:	137 else:

137 yield line	138 yield line

138	139

139	140

140 def _validate(lines):	141 def _validate(lines):

141 """Validate the final list."""	142 """Validate the final list."""

(...skipping 31 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
173 lead to rendering an invalid filter list, so we immediately abort.	174 lead to rendering an invalid filter list, so we immediately abort.

174	175

175 """	176 """

176 _logger.info('Rendering: %s', name)	177 _logger.info('Rendering: %s', name)

177 lines, default_source = _get_and_parse_fragment(name, sources, top_source)	178 lines, default_source = _get_and_parse_fragment(name, sources, top_source)

178 lines = _process_includes(sources, default_source, [name], lines)	179 lines = _process_includes(sources, default_source, [name], lines)

179 for proc in [_process_timestamps, _insert_version, _remove_duplicates,	180 for proc in [_process_timestamps, _insert_version, _remove_duplicates,

180 _validate]:	181 _validate]:

181 lines = proc(lines)	182 lines = proc(lines)

182 return lines	183 return lines

	184

	185

	186 def _split_list_for_diff(list_in):

	187 """Split a filter list into metadata, keys, and rules."""

	188 metadata = {}

	189 rules = set()

	190 for line in parse_filterlist(list_in):

	191 if line.type == 'metadata':

	192 metadata[line.key.lower()] = line

	193 elif line.type == 'filter':

	194 rules.add(line.to_string())

	195 return metadata, rules

	196

	197

	198 def render_diff(base, latest):

	199 """Return a diff between two filter lists.

	200

	201 Parameters

	202 ----------

	203 base : iterator of str

	204 The base (old) list that we want to update to latest.

	205 lastest : iterator of str

	206 The latest (most recent) list that we want to update to.

	207

	208 Returns

	209 -------

	210 iterable of str

	211 A diff between two lists (https://issues.adblockplus.org/ticket/6685)

	212

	213 """

	214 latest_md, latest_rules = _split_list_for_diff(latest)

	215 base_md, base_rules = _split_list_for_diff(base)
	Sebastian Noack 2018/08/29 23:46:26 Nit: Is it necessary to abbreviate metadata to md Nit: Is it necessary to abbreviate metadata to md in order to avoid wrapping? If so that is fine, but otherwise, I think it reads better spelled out. rhowell 2018/08/30 16:23:30 Done. Show quoted text On 2018/08/29 23:46:26, Sebastian Noack wrote: > Nit: Is it necessary to abbreviate metadata to md in order to avoid wrapping? If > so that is fine, but otherwise, I think it reads better spelled out. Done.
	216 add_fl = latest_rules - base_rules
	Sebastian Noack 2018/08/29 23:46:25 Nit: These expressions could be inlined below. Thi Nit: These expressions could be inlined below. This would also be consistent with the second loop iterating over removed metadata keys. rhowell 2018/08/30 16:23:30 Done. Show quoted text On 2018/08/29 23:46:25, Sebastian Noack wrote: > Nit: These expressions could be inlined below. This would also be consistent > with the second loop iterating over removed metadata keys. Done.
	217 remove_fl = base_rules - latest_rules

	218

	219 yield '[Adblock Plus Diff]'

	220 for key, latest in latest_md.items():

	221 base = base_md.get(key)

	222 if not base or base.value != latest.value:

	223 yield latest.to_string()

	224 for key in set(base_md) - set(latest_md):

	225 yield '! {}:'.format(base_md[key].key)

	226 for item in remove_fl:

	227 yield '- {}'.format(item)

	228 for item in add_fl:

	229 yield '+ {}'.format(item)

OLD	NEW

« no previous file with comments | « abp/filters/parser.py ('k') | tests/test_differ.py » ('j') | tests/test_differ.py » ('J')