OLD | NEW |
1 # This file is part of Adblock Plus <https://adblockplus.org/>, | 1 # This file is part of Adblock Plus <https://adblockplus.org/>, |
2 # Copyright (C) 2006-present eyeo GmbH | 2 # Copyright (C) 2006-present eyeo GmbH |
3 # | 3 # |
4 # Adblock Plus is free software: you can redistribute it and/or modify | 4 # Adblock Plus is free software: you can redistribute it and/or modify |
5 # it under the terms of the GNU General Public License version 3 as | 5 # it under the terms of the GNU General Public License version 3 as |
6 # published by the Free Software Foundation. | 6 # published by the Free Software Foundation. |
7 # | 7 # |
8 # Adblock Plus is distributed in the hope that it will be useful, | 8 # Adblock Plus is distributed in the hope that it will be useful, |
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of | 9 # but WITHOUT ANY WARRANTY; without even the implied warranty of |
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | 10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
(...skipping 122 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
133 | 133 |
134 | 134 |
135 Header = _line_type('Header', 'version', '[{.version}]') | 135 Header = _line_type('Header', 'version', '[{.version}]') |
136 EmptyLine = _line_type('EmptyLine', '', '') | 136 EmptyLine = _line_type('EmptyLine', '', '') |
137 Comment = _line_type('Comment', 'text', '! {.text}') | 137 Comment = _line_type('Comment', 'text', '! {.text}') |
138 Metadata = _line_type('Metadata', 'key value', '! {0.key}: {0.value}') | 138 Metadata = _line_type('Metadata', 'key value', '! {0.key}: {0.value}') |
139 Filter = _line_type('Filter', 'text selector action options', '{.text}') | 139 Filter = _line_type('Filter', 'text selector action options', '{.text}') |
140 Include = _line_type('Include', 'target', '%include {0.target}%') | 140 Include = _line_type('Include', 'target', '%include {0.target}%') |
141 | 141 |
142 | 142 |
143 METADATA_REGEXP = re.compile(r'(.*?)\s*:\s*(.*)') | 143 METADATA_REGEXP = re.compile(r'\s*!\s*(.*?)\s*:\s*(.*)') |
144 INCLUDE_REGEXP = re.compile(r'%include\s+(.+)%') | 144 INCLUDE_REGEXP = re.compile(r'%include\s+(.+)%') |
145 HEADER_REGEXP = re.compile(r'\[(Adblock(?:\s*Plus\s*[\d\.]+?)?)\]', flags=re.I) | 145 HEADER_REGEXP = re.compile(r'\[(?:(Adblock(?:\s*Plus\s*[\d\.]+?)?)|.*)\]$', flag
s=re.I) |
146 HIDING_FILTER_REGEXP = re.compile(r'^([^/*|@"!]*?)#([@?])?#(.+)$') | 146 HIDING_FILTER_REGEXP = re.compile(r'^([^/*|@"!]*?)#([@?])?#(.+)$') |
147 FILTER_OPTIONS_REGEXP = re.compile( | 147 FILTER_OPTIONS_REGEXP = re.compile( |
148 r'\$(~?[\w-]+(?:=[^,]+)?(?:,~?[\w-]+(?:=[^,]+)?)*)$' | 148 r'\$(~?[\w-]+(?:=[^,]+)?(?:,~?[\w-]+(?:=[^,]+)?)*)$' |
149 ) | 149 ) |
150 | 150 |
151 | 151 |
152 def _parse_header(text): | |
153 match = HEADER_REGEXP.match(text) | |
154 if not match: | |
155 raise ParseError('Malformed header', text) | |
156 return Header(match.group(1)) | |
157 | |
158 | |
159 def _parse_instruction(text): | 152 def _parse_instruction(text): |
160 match = INCLUDE_REGEXP.match(text) | 153 match = INCLUDE_REGEXP.match(text) |
161 if not match: | 154 if not match: |
162 raise ParseError('Unrecognized instruction', text) | 155 raise ParseError('Unrecognized instruction', text) |
163 return Include(match.group(1)) | 156 return Include(match.group(1)) |
164 | 157 |
165 | 158 |
166 def _parse_option(option): | 159 def _parse_option(option): |
167 if '=' in option: | 160 if '=' in option: |
168 return option.split('=', 1) | 161 return option.split('=', 1) |
(...skipping 75 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
244 Parsed filter. | 237 Parsed filter. |
245 | 238 |
246 """ | 239 """ |
247 if '#' in text: | 240 if '#' in text: |
248 match = HIDING_FILTER_REGEXP.search(text) | 241 match = HIDING_FILTER_REGEXP.search(text) |
249 if match: | 242 if match: |
250 return _parse_hiding_filter(text, *match.groups()) | 243 return _parse_hiding_filter(text, *match.groups()) |
251 return _parse_blocking_filter(text) | 244 return _parse_blocking_filter(text) |
252 | 245 |
253 | 246 |
| 247 def _decode_if_bytes(s): |
| 248 return s.decode('utf-8') if isinstance(s, type(b'')) else s |
| 249 |
| 250 |
254 def parse_line(line_text): | 251 def parse_line(line_text): |
255 """Parse one line of a filter list. | 252 """Parse one line of a filter list. |
256 | 253 |
257 Note that parse_line() doesn't handle special comments, hence never returns | 254 Note that parse_line() doesn't handle headers and special comments, |
258 a Metadata() object, Adblock Plus only considers metadata when parsing the | 255 hence never returns a Header() or Metadata() object. Adblock Plus only |
259 whole filter list and only if they are given at the top of the filter list. | 256 considers headers and metadata when parsing the whole filter list and |
| 257 only if they are given at the top of the filter list. |
260 | 258 |
261 Parameters | 259 Parameters |
262 ---------- | 260 ---------- |
263 line_text : str | 261 line_text : str |
264 Line of a filter list. | 262 Line of a filter list. |
265 | 263 |
266 Returns | 264 Returns |
267 ------- | 265 ------- |
268 namedtuple | 266 namedtuple |
269 Parsed line (see `_line_type`). | 267 Parsed line (see `_line_type`). |
270 | 268 |
271 Raises | 269 Raises |
272 ------ | 270 ------ |
273 ParseError | 271 ParseError |
274 ParseError: If the line can't be parsed. | 272 ParseError: If the line can't be parsed. |
275 """ | 273 """ |
276 if isinstance(line_text, type(b'')): | 274 content = _decode_if_bytes(line_text).strip() |
277 line_text = line_text.decode('utf-8') | |
278 | |
279 content = line_text.strip() | |
280 | 275 |
281 if content == '': | 276 if content == '': |
282 line = EmptyLine() | 277 line = EmptyLine() |
283 elif content.startswith('!'): | 278 elif content.startswith('!'): |
284 line = Comment(content[1:].lstrip()) | 279 line = Comment(content[1:].lstrip()) |
285 elif content.startswith('%') and content.endswith('%'): | 280 elif content.startswith('%') and content.endswith('%'): |
286 line = _parse_instruction(content) | 281 line = _parse_instruction(content) |
287 elif content.startswith('[') and content.endswith(']'): | |
288 line = _parse_header(content) | |
289 else: | 282 else: |
290 line = parse_filter(content) | 283 line = parse_filter(content) |
291 | 284 |
292 assert line.to_string().replace(' ', '') == content.replace(' ', '') | 285 assert line.to_string().replace(' ', '') == content.replace(' ', '') |
293 return line | 286 return line |
294 | 287 |
295 | 288 |
296 def parse_filterlist(lines): | 289 def parse_filterlist(lines): |
297 """Parse filter list from an iterable. | 290 """Parse filter list from an iterable. |
298 | 291 |
(...skipping 10 matching lines...) Expand all Loading... |
309 Raises | 302 Raises |
310 ------ | 303 ------ |
311 ParseError | 304 ParseError |
312 Thrown during iteration for invalid filter list lines. | 305 Thrown during iteration for invalid filter list lines. |
313 TypeError | 306 TypeError |
314 If `lines` is not iterable. | 307 If `lines` is not iterable. |
315 | 308 |
316 """ | 309 """ |
317 metadata_closed = False | 310 metadata_closed = False |
318 | 311 |
319 for line in lines: | 312 for i, line in enumerate(lines): |
320 result = parse_line(line) | 313 text = _decode_if_bytes(line) |
321 | 314 |
322 if result.type == 'comment': | 315 if i == 0: |
323 match = METADATA_REGEXP.match(result.text) | 316 match = HEADER_REGEXP.match(text) |
324 if match: | 317 if match: |
325 key, value = match.groups() | 318 version = match.group(1) |
| 319 if not version: |
| 320 raise ParseError('Malformed header', text) |
326 | 321 |
327 # Historically, checksums can occur at the bottom of the | 322 yield Header(version) |
328 # filter list. Checksums are no longer used by Adblock Plus, | 323 continue |
329 # but in order to strip them (in abp.filters.renderer), | |
330 # we have to make sure to still parse them regardless of | |
331 # their position in the filter list. | |
332 if not metadata_closed or key.lower() == 'checksum': | |
333 result = Metadata(key, value) | |
334 | 324 |
335 if result.type not in {'header', 'metadata'}: | 325 match = METADATA_REGEXP.match(text) |
336 metadata_closed = True | 326 if match: |
| 327 key, value = match.groups() |
337 | 328 |
338 yield result | 329 # Historically, checksums can occur at the bottom of the |
| 330 # filter list. Checksums are no longer used by Adblock Plus, |
| 331 # but in order to strip them (in abp.filters.renderer), |
| 332 # we have to make sure to still parse them regardless of |
| 333 # their position in the filter list. |
| 334 if not metadata_closed or key.lower() == 'checksum': |
| 335 yield Metadata(key, value) |
| 336 continue |
| 337 |
| 338 metadata_closed = True |
| 339 yield parse_line(text) |
OLD | NEW |