| Left: | ||
| Right: |
| OLD | NEW |
|---|---|
| 1 # This file is part of the Adblock Plus web scripts, | 1 # This file is part of the Adblock Plus web scripts, |
| 2 # Copyright (C) 2006-2017 eyeo GmbH | 2 # Copyright (C) 2006-2017 eyeo GmbH |
| 3 # | 3 # |
| 4 # Adblock Plus is free software: you can redistribute it and/or modify | 4 # Adblock Plus is free software: you can redistribute it and/or modify |
| 5 # it under the terms of the GNU General Public License version 3 as | 5 # it under the terms of the GNU General Public License version 3 as |
| 6 # published by the Free Software Foundation. | 6 # published by the Free Software Foundation. |
| 7 # | 7 # |
| 8 # Adblock Plus is distributed in the hope that it will be useful, | 8 # Adblock Plus is distributed in the hope that it will be useful, |
| 9 # but WITHOUT ANY WARRANTY; without even the implied warranty of | 9 # but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | 10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| (...skipping 98 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 109 # the document. | 109 # the document. |
| 110 self._append_text(data) | 110 self._append_text(data) |
| 111 | 111 |
| 112 def handle_entityref(self, name): | 112 def handle_entityref(self, name): |
| 113 self._append_text(self.unescape('&{};'.format(name))) | 113 self._append_text(self.unescape('&{};'.format(name))) |
| 114 | 114 |
| 115 def handle_charref(self, name): | 115 def handle_charref(self, name): |
| 116 self._append_text(self.unescape('&#{};'.format(name))) | 116 self._append_text(self.unescape('&#{};'.format(name))) |
| 117 | 117 |
| 118 | 118 |
| 119 def get_page_metadata(page, data): | |
| 120 """Generator which gets per page metadata and corresponding line indices""" | |
| 121 for i, line in enumerate(data.splitlines()): | |
| 122 if not re.search(r'^\s*[\w\-]+\s*=', line): | |
| 123 break | |
| 124 name, value = line.split('=', 1) | |
| 125 value = value.strip() | |
| 126 if value.startswith('[') and value.endswith(']'): | |
| 127 value = [element.strip() for element in value[1:-1].split(',')] | |
| 128 yield name.strip(), value, i | |
| 129 | |
| 130 | |
| 119 class Converter: | 131 class Converter: |
| 120 whitelist = {'a', 'em', 'sup', 'strong', 'code', 'span'} | 132 whitelist = {'a', 'em', 'sup', 'strong', 'code', 'span'} |
| 121 missing_translations = 0 | 133 missing_translations = 0 |
| 122 total_translations = 0 | 134 total_translations = 0 |
| 123 | 135 |
| 124 def __init__(self, params, key='pagedata'): | 136 def __init__(self, params, key='pagedata'): |
| 125 self._params = params | 137 self._params = params |
| 126 self._key = key | 138 self._key = key |
| 127 self._attribute_parser = AttributeParser(self.whitelist) | 139 self._attribute_parser = AttributeParser(self.whitelist) |
| 128 self._seen_defaults = {} | 140 self._seen_defaults = {} |
| 129 | 141 |
| 130 # Read in any parameters specified at the beginning of the file | 142 # Read in any parameters specified at the beginning of the file |
| 131 data, filename = params[key] | 143 data, filename = params[key] |
| 132 lines = data.splitlines(True) | 144 lines = data.splitlines(True) |
| 133 for i, line in enumerate(lines): | 145 for name, value, i in get_page_metadata(params['page'], data): |
|
Vasily Kuznetsov
2017/06/27 13:32:15
This refactoring moved the code into a separate fu
Jon Sonesen
2017/06/28 14:31:19
The line index is required to strip the metadata f
Jon Sonesen
2017/07/03 09:06:06
Done.
| |
| 134 if not re.search(r'^\s*[\w\-]+\s*=', line): | 146 params[name] = value |
| 135 break | |
| 136 name, value = line.split('=', 1) | |
| 137 params[name.strip()] = value.strip() | |
| 138 lines[i] = '\n' | 147 lines[i] = '\n' |
| 139 params[key] = (''.join(lines), filename) | 148 params[key] = (''.join(lines), filename) |
| 140 | 149 |
| 141 def localize_string( | 150 def localize_string( |
| 142 self, page, name, default, comment, localedata, escapes): | 151 self, page, name, default, comment, localedata, escapes): |
| 143 | 152 |
| 144 def escape(s): | 153 def escape(s): |
| 145 return re.sub(r'.', | 154 return re.sub(r'.', |
| 146 lambda match: escapes.get(match.group(0), | 155 lambda match: escapes.get(match.group(0), |
| 147 match.group(0)), | 156 match.group(0)), |
| (...skipping 228 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 376 filters = { | 385 filters = { |
| 377 'translate': self.translate, | 386 'translate': self.translate, |
| 378 'linkify': self.linkify, | 387 'linkify': self.linkify, |
| 379 'toclist': self.toclist, | 388 'toclist': self.toclist, |
| 380 } | 389 } |
| 381 | 390 |
| 382 globals = { | 391 globals = { |
| 383 'get_string': self.get_string, | 392 'get_string': self.get_string, |
| 384 'has_string': self.has_string, | 393 'has_string': self.has_string, |
| 385 'get_page_content': self.get_page_content, | 394 'get_page_content': self.get_page_content, |
| 395 'get_pages_metadata': self.get_pages_metadata, | |
| 386 } | 396 } |
| 387 | 397 |
| 388 for dirname, dictionary in [('filters', filters), | 398 for dirname, dictionary in [('filters', filters), |
| 389 ('globals', globals)]: | 399 ('globals', globals)]: |
| 390 for filename in self._params['source'].list_files(dirname): | 400 for filename in self._params['source'].list_files(dirname): |
| 391 root, ext = os.path.splitext(filename) | 401 root, ext = os.path.splitext(filename) |
| 392 if ext.lower() != '.py': | 402 if ext.lower() != '.py': |
| 393 continue | 403 continue |
| 394 | 404 |
| 395 path = os.path.join(dirname, filename) | 405 path = os.path.join(dirname, filename) |
| (...skipping 68 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 464 locale = self._params['locale'] | 474 locale = self._params['locale'] |
| 465 | 475 |
| 466 locale, url = self._params['source'].resolve_link(page, locale) | 476 locale, url = self._params['source'].resolve_link(page, locale) |
| 467 return jinja2.Markup('<a{}>'.format(''.join( | 477 return jinja2.Markup('<a{}>'.format(''.join( |
| 468 ' {}="{}"'.format(name, jinja2.escape(value)) for name, value in [ | 478 ' {}="{}"'.format(name, jinja2.escape(value)) for name, value in [ |
| 469 ('href', url), | 479 ('href', url), |
| 470 ('hreflang', locale) | 480 ('hreflang', locale) |
| 471 ] + attrs.items() | 481 ] + attrs.items() |
| 472 ))) | 482 ))) |
| 473 | 483 |
| 484 def get_pages_metadata(self, filters=None): | |
| 485 if not isinstance(filters, dict) and filters: | |
|
Vasily Kuznetsov
2017/06/27 13:32:15
This code seems to allow things like `filters = []
Jon Sonesen
2017/06/28 14:31:19
Acknowledged.
Jon Sonesen
2017/07/03 09:06:06
Done.
| |
| 486 raise TypeError('Filters are not a dictionary') | |
| 487 | |
| 488 return_data = [] | |
| 489 for page_name, _format in self._params['source'].list_pages(): | |
| 490 data, filename = self._params['source'].read_page(page_name, | |
| 491 _format) | |
| 492 page_data = {'page': page_name} | |
| 493 for name, value, i in get_page_metadata(page_name, data): | |
| 494 page_data[name] = value | |
| 495 if self.filter_metadata(filters, page_data) is True: | |
| 496 return_data.append(page_data) | |
| 497 return return_data | |
| 498 | |
| 499 def filter_metadata(self, filters, metadata): | |
| 500 # if only the page key is in the metadata then there | |
| 501 # was no user defined metadata | |
| 502 if metadata.keys() == ['page']: | |
|
Vasily Kuznetsov
2017/06/27 13:32:15
Is this a requirement that such metadata should be
Jon Sonesen
2017/06/28 14:31:19
This is done to keep a uniform api, the previous i
Jon Sonesen
2017/07/03 09:06:06
Still waiting on julians opinion, i guess i get wh
juliandoucette
2017/07/03 21:48:51
I don't understand the question. Can you please ex
Vasily Kuznetsov
2017/07/04 07:43:48
The question is: if a page has no explicit metadat
juliandoucette
2017/07/04 09:57:24
Or that I queried by page name?
Vasily Kuznetsov
2017/07/04 10:23:34
The content is not in the dictionary currently. It
| |
| 503 return False | |
| 504 if filters is None: | |
| 505 return True | |
| 506 for filter_name, filter_value in filters.items(): | |
| 507 if filter_name not in metadata: | |
| 508 return False | |
| 509 if isinstance(metadata[filter_name], list): | |
| 510 if isinstance(filter_value, basestring): | |
| 511 filter_value = [filter_value] | |
| 512 for option in filter_value: | |
| 513 if str(option) not in metadata[filter_name]: | |
| 514 return False | |
| 515 elif filter_value != metadata[filter_name]: | |
| 516 return False | |
| 517 return True | |
| 518 | |
| 474 def toclist(self, content): | 519 def toclist(self, content): |
| 475 toc_re = r'<h(\d)\s[^<>]*\bid="([^<>"]+)"[^<>]*>(.*?)</h\1>' | 520 toc_re = r'<h(\d)\s[^<>]*\bid="([^<>"]+)"[^<>]*>(.*?)</h\1>' |
| 476 flat = [] | 521 flat = [] |
| 477 for match in re.finditer(toc_re, content, re.S): | 522 for match in re.finditer(toc_re, content, re.S): |
| 478 flat.append({ | 523 flat.append({ |
| 479 'level': int(match.group(1)), | 524 'level': int(match.group(1)), |
| 480 'anchor': jinja2.Markup(match.group(2)).unescape(), | 525 'anchor': jinja2.Markup(match.group(2)).unescape(), |
| 481 'title': jinja2.Markup(match.group(3)).unescape(), | 526 'title': jinja2.Markup(match.group(3)).unescape(), |
| 482 'subitems': [], | 527 'subitems': [], |
| 483 }) | 528 }) |
| 484 | 529 |
| 485 structured = [] | 530 structured = [] |
| 486 stack = [{'level': 0, 'subitems': structured}] | 531 stack = [{'level': 0, 'subitems': structured}] |
| 487 for item in flat: | 532 for item in flat: |
| 488 while stack[-1]['level'] >= item['level']: | 533 while stack[-1]['level'] >= item['level']: |
| 489 stack.pop() | 534 stack.pop() |
| 490 stack[-1]['subitems'].append(item) | 535 stack[-1]['subitems'].append(item) |
| 491 stack.append(item) | 536 stack.append(item) |
| 492 return structured | 537 return structured |
| 493 | 538 |
| 494 converters = { | 539 converters = { |
| 495 'html': RawConverter, | 540 'html': RawConverter, |
| 496 'md': MarkdownConverter, | 541 'md': MarkdownConverter, |
| 497 'tmpl': TemplateConverter, | 542 'tmpl': TemplateConverter, |
| 498 } | 543 } |
| OLD | NEW |