abp/filters/renderer.py - Issue 29879650: Issue 6950 - Don't include headers and metadata from includes into output

Side by Side Diff: abp/filters/renderer.py

Issue 29879650: Issue 6950 - Don't include headers and metadata from includes into output (Closed)

Patch Set: Simplify regexp Created Sept. 14, 2018, 2:35 p.m.

Left:
Right:

Use n/p to move between diff chunks; N/P to move between comments.

Jump to:

View unified diff | Download patch

OLD	NEW
1 # This file is part of Adblock Plus <https://adblockplus.org/>,	1 # This file is part of Adblock Plus <https://adblockplus.org/>,

2 # Copyright (C) 2006-present eyeo GmbH	2 # Copyright (C) 2006-present eyeo GmbH

3 #	3 #

4 # Adblock Plus is free software: you can redistribute it and/or modify	4 # Adblock Plus is free software: you can redistribute it and/or modify

5 # it under the terms of the GNU General Public License version 3 as	5 # it under the terms of the GNU General Public License version 3 as

6 # published by the Free Software Foundation.	6 # published by the Free Software Foundation.

7 #	7 #

8 # Adblock Plus is distributed in the hope that it will be useful,	8 # Adblock Plus is distributed in the hope that it will be useful,

9 # but WITHOUT ANY WARRANTY; without even the implied warranty of	9 # but WITHOUT ANY WARRANTY; without even the implied warranty of

10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the	10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

(...skipping 71 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
82	82

83 try:	83 try:

84 included, inherited_source = _get_and_parse_fragment(	84 included, inherited_source = _get_and_parse_fragment(

85 name, sources, default_source, include_stack)	85 name, sources, default_source, include_stack)

86 all_included = _process_includes(	86 all_included = _process_includes(

87 sources, inherited_source, include_stack, included)	87 sources, inherited_source, include_stack, included)

88	88

89 _logger.info('- including: %s', name)	89 _logger.info('- including: %s', name)

90 yield Comment('* {} *'.format(name))	90 yield Comment('* {} *'.format(name))

91 for line in all_included:	91 for line in all_included:

92 yield line	92 if line.type not in {'header', 'metadata'}:

	93 yield line

93 except (NotFound, ValueError) as exc:	94 except (NotFound, ValueError) as exc:

94 raise IncludeError(exc, include_stack)	95 raise IncludeError(exc, include_stack)

95 else:	96 else:

96 yield line	97 yield line

97	98

98	99

99 def _process_timestamps(lines):	100 def _process_timestamps(lines):

100 """Convert timestamp markers into actual timestamps."""	101 """Convert timestamp markers into actual timestamps."""

101 for line in lines:	102 for line in lines:

102 if line.type == 'comment' and '%timestamp%' in line.text:	103 if line.type == 'metadata' and line.value == '%timestamp%':
	Vasily Kuznetsov 2018/09/14 15:09:35 This function now looks better as the result of ou This function now looks better as the result of our greater tolerance to funny metadata keys.
103 timestamp = time.strftime('%d %b %Y %H:%M UTC', time.gmtime())	104 timestamp = time.strftime('%d %b %Y %H:%M UTC', time.gmtime())

104 yield Comment(text=line.text.replace('%timestamp%', timestamp))	105 yield Metadata(line.key, timestamp)

105 else:	106 else:

106 yield line	107 yield line

107	108

108	109

109 def _first_and_rest(iterable):	110 def _first_and_rest(iterable):

110 """Return the first item from the iterable and the rest as an iterator."""	111 """Return the first item from the iterable and the rest as an iterator."""

111 iterator = iter(iterable)	112 iterator = iter(iterable)

112 first_item = next(iterator)	113 first_item = next(iterator)

113 return first_item, iterator	114 return first_item, iterator

114	115

115	116

116 def _insert_version(lines):	117 def _insert_version(lines):

117 """Insert metadata comment with version (a.k.a. date)."""	118 """Insert metadata comment with version (a.k.a. date)."""

118 first_line, rest = _first_and_rest(lines)	119 first_line, rest = _first_and_rest(lines)

119 version = Metadata('Version', time.strftime('%Y%m%d%H%M', time.gmtime()))	120 version = Metadata('Version', time.strftime('%Y%m%d%H%M', time.gmtime()))

120 return itertools.chain([first_line, version], rest)	121 return itertools.chain([first_line, version], rest)

121	122

122	123

123 def _remove_duplicates(lines):	124 def _remove_checksum(lines):

124 """Remove duplicate metadata and headers."""	125 """Remove metadata comments giving a checksum.

125 # Always remove checksum -- a checksum coming from a fragment	126

126 # will not match for the rendered list.	127 Adblock Plus is no longer verifying checksums, so we don't have to

127 seen = {'checksum'}	128 calculate the checksum for the resulting filter list. But we have

128 for i, line in enumerate(lines):	129 to strip them for compatibility with older versions of Adblock Plus

129 if line.type == 'metadata':	130 and other ad blockers which might still verify a checksum if given.

130 key = line.key.lower()	131 """

131 if key not in seen:	132 for line in lines:

132 seen.add(key)	133 if line.type != 'metadata' or line.key.lower() != 'checksum':

133 yield line

134 elif line.type == 'header':

135 if i == 0:

136 yield line

137 else:

138 yield line	134 yield line

139	135

140	136

141 def _validate(lines):	137 def _validate(lines):

142 """Validate the final list."""	138 """Validate the final list."""

143 first_line, rest = _first_and_rest(lines)	139 first_line, rest = _first_and_rest(lines)

144 if first_line.type != 'header':	140 if first_line.type != 'header':

145 raise MissingHeader('No header found at the beginning of the input.')	141 raise MissingHeader('No header found at the beginning of the input.')

146 return itertools.chain([first_line], rest)	142 return itertools.chain([first_line], rest)

147	143

(...skipping 22 matching lines...) Expand all Loading...
170 ParseError	166 ParseError

171 When any of the fragments contain lines that can't be parsed.	167 When any of the fragments contain lines that can't be parsed.

172 MissingHeader	168 MissingHeader

173 If the top level fragment doesn't start with a valid header. This would	169 If the top level fragment doesn't start with a valid header. This would

174 lead to rendering an invalid filter list, so we immediately abort.	170 lead to rendering an invalid filter list, so we immediately abort.

175	171

176 """	172 """

177 _logger.info('Rendering: %s', name)	173 _logger.info('Rendering: %s', name)

178 lines, default_source = _get_and_parse_fragment(name, sources, top_source)	174 lines, default_source = _get_and_parse_fragment(name, sources, top_source)

179 lines = _process_includes(sources, default_source, [name], lines)	175 lines = _process_includes(sources, default_source, [name], lines)

180 for proc in [_process_timestamps, _insert_version, _remove_duplicates,	176 for proc in [_process_timestamps, _insert_version, _remove_checksum,

181 _validate]:	177 _validate]:

182 lines = proc(lines)	178 lines = proc(lines)

183 return lines	179 return lines

184	180

185	181

186 def _split_list_for_diff(list_in):	182 def _split_list_for_diff(list_in):

187 """Split a filter list into metadata and rules."""	183 """Split a filter list into metadata and rules."""

188 metadata = {}	184 metadata = {}

189 rules = set()	185 rules = set()

190 for line in parse_filterlist(list_in):	186 for line in parse_filterlist(list_in):

(...skipping 27 matching lines...) Expand all Loading...
218 for key, latest in latest_metadata.items():	214 for key, latest in latest_metadata.items():

219 base = base_metadata.get(key)	215 base = base_metadata.get(key)

220 if not base or base.value != latest.value:	216 if not base or base.value != latest.value:

221 yield latest.to_string()	217 yield latest.to_string()

222 for key in set(base_metadata) - set(latest_metadata):	218 for key in set(base_metadata) - set(latest_metadata):

223 yield '! {}:'.format(base_metadata[key].key)	219 yield '! {}:'.format(base_metadata[key].key)

224 for rule in base_rules - latest_rules:	220 for rule in base_rules - latest_rules:

225 yield '- {}'.format(rule)	221 yield '- {}'.format(rule)

226 for rule in latest_rules - base_rules:	222 for rule in latest_rules - base_rules:

227 yield '+ {}'.format(rule)	223 yield '+ {}'.format(rule)

OLD	NEW

« abp/filters/parser.py ('K') | « abp/filters/parser.py ('k') | tests/test_differ.py » ('j') | tests/test_parser.py » ('J')