Left: | ||
Right: |
OLD | NEW |
---|---|
1 # This file is part of the Adblock Plus web scripts, | 1 # This file is part of the Adblock Plus web scripts, |
2 # Copyright (C) 2006-2017 eyeo GmbH | 2 # Copyright (C) 2006-2017 eyeo GmbH |
3 # | 3 # |
4 # Adblock Plus is free software: you can redistribute it and/or modify | 4 # Adblock Plus is free software: you can redistribute it and/or modify |
5 # it under the terms of the GNU General Public License version 3 as | 5 # it under the terms of the GNU General Public License version 3 as |
6 # published by the Free Software Foundation. | 6 # published by the Free Software Foundation. |
7 # | 7 # |
8 # Adblock Plus is distributed in the hope that it will be useful, | 8 # Adblock Plus is distributed in the hope that it will be useful, |
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of | 9 # but WITHOUT ANY WARRANTY; without even the implied warranty of |
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | 10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
(...skipping 98 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
109 # the document. | 109 # the document. |
110 self._append_text(data) | 110 self._append_text(data) |
111 | 111 |
112 def handle_entityref(self, name): | 112 def handle_entityref(self, name): |
113 self._append_text(self.unescape('&{};'.format(name))) | 113 self._append_text(self.unescape('&{};'.format(name))) |
114 | 114 |
115 def handle_charref(self, name): | 115 def handle_charref(self, name): |
116 self._append_text(self.unescape('&#{};'.format(name))) | 116 self._append_text(self.unescape('&#{};'.format(name))) |
117 | 117 |
118 | 118 |
119 def get_page_metadata(page, data): | |
Vasily Kuznetsov
2017/07/03 17:42:44
Perhaps this function should be renamed now since
juliandoucette
2017/07/03 21:55:54
[`get`, `query`, `pages`, `get_pages`, ...] I lik
Vasily Kuznetsov
2017/07/04 07:43:48
Note that this is not the function that gets expos
juliandoucette
2017/07/04 09:57:25
Oh, sorry. I meant the get_pages_metadata function
Vasily Kuznetsov
2017/07/04 10:23:34
Currently the content is not included in metadata
juliandoucette
2017/07/04 10:42:56
Definitely more confusing.
I don't really care if
| |
120 """Generator which gets per page metadata and cleaned page content""" | |
Vasily Kuznetsov
2017/07/03 17:42:44
Whenever possible, it's best to write docstrings i
Jon Sonesen
2017/07/04 15:02:38
Acknowledged.
| |
121 page_data = {'page': page} | |
122 lines = data.splitlines(True) | |
123 for i, line in enumerate(lines): | |
124 if not re.search(r'^\s*[\w\-]+\s*=', line): | |
125 break | |
126 name, value = line.split('=', 1) | |
127 value = value.strip() | |
128 if value.startswith('[') and value.endswith(']'): | |
129 value = [element.strip() for element in value[1:-1].split(',')] | |
130 lines[i] = '\n' | |
131 page_data[name.strip()] = value | |
132 return page_data, ''.join(lines) | |
133 | |
134 | |
119 class Converter: | 135 class Converter: |
120 whitelist = {'a', 'em', 'sup', 'strong', 'code', 'span'} | 136 whitelist = {'a', 'em', 'sup', 'strong', 'code', 'span'} |
121 missing_translations = 0 | 137 missing_translations = 0 |
122 total_translations = 0 | 138 total_translations = 0 |
123 | 139 |
124 def __init__(self, params, key='pagedata'): | 140 def __init__(self, params, key='pagedata'): |
125 self._params = params | 141 self._params = params |
126 self._key = key | 142 self._key = key |
127 self._attribute_parser = AttributeParser(self.whitelist) | 143 self._attribute_parser = AttributeParser(self.whitelist) |
128 self._seen_defaults = {} | 144 self._seen_defaults = {} |
129 | 145 |
130 # Read in any parameters specified at the beginning of the file | 146 # Read in any parameters specified at the beginning of the file |
147 # and override converter defaults with page specific params | |
131 data, filename = params[key] | 148 data, filename = params[key] |
132 lines = data.splitlines(True) | 149 page_data, cleaned_page = get_page_metadata(params['page'], data) |
Vasily Kuznetsov
2017/07/03 17:42:44
I think the variable naming is somewhat confusing
Jon Sonesen
2017/07/04 14:58:06
Agree here, ack
| |
133 for i, line in enumerate(lines): | 150 params.update(page_data) |
134 if not re.search(r'^\s*[\w\-]+\s*=', line): | 151 params[key] = (cleaned_page, filename) |
135 break | |
136 name, value = line.split('=', 1) | |
137 params[name.strip()] = value.strip() | |
138 lines[i] = '\n' | |
139 params[key] = (''.join(lines), filename) | |
140 | 152 |
141 def localize_string( | 153 def localize_string( |
142 self, page, name, default, comment, localedata, escapes): | 154 self, page, name, default, comment, localedata, escapes): |
143 | 155 |
144 def escape(s): | 156 def escape(s): |
145 return re.sub(r'.', | 157 return re.sub(r'.', |
146 lambda match: escapes.get(match.group(0), | 158 lambda match: escapes.get(match.group(0), |
147 match.group(0)), | 159 match.group(0)), |
148 s, flags=re.S) | 160 s, flags=re.S) |
149 | 161 |
(...skipping 226 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
376 filters = { | 388 filters = { |
377 'translate': self.translate, | 389 'translate': self.translate, |
378 'linkify': self.linkify, | 390 'linkify': self.linkify, |
379 'toclist': self.toclist, | 391 'toclist': self.toclist, |
380 } | 392 } |
381 | 393 |
382 globals = { | 394 globals = { |
383 'get_string': self.get_string, | 395 'get_string': self.get_string, |
384 'has_string': self.has_string, | 396 'has_string': self.has_string, |
385 'get_page_content': self.get_page_content, | 397 'get_page_content': self.get_page_content, |
398 'get_pages_metadata': self.get_pages_metadata, | |
386 } | 399 } |
387 | 400 |
388 for dirname, dictionary in [('filters', filters), | 401 for dirname, dictionary in [('filters', filters), |
389 ('globals', globals)]: | 402 ('globals', globals)]: |
390 for filename in self._params['source'].list_files(dirname): | 403 for filename in self._params['source'].list_files(dirname): |
391 root, ext = os.path.splitext(filename) | 404 root, ext = os.path.splitext(filename) |
392 if ext.lower() != '.py': | 405 if ext.lower() != '.py': |
393 continue | 406 continue |
394 | 407 |
395 path = os.path.join(dirname, filename) | 408 path = os.path.join(dirname, filename) |
(...skipping 68 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
464 locale = self._params['locale'] | 477 locale = self._params['locale'] |
465 | 478 |
466 locale, url = self._params['source'].resolve_link(page, locale) | 479 locale, url = self._params['source'].resolve_link(page, locale) |
467 return jinja2.Markup('<a{}>'.format(''.join( | 480 return jinja2.Markup('<a{}>'.format(''.join( |
468 ' {}="{}"'.format(name, jinja2.escape(value)) for name, value in [ | 481 ' {}="{}"'.format(name, jinja2.escape(value)) for name, value in [ |
469 ('href', url), | 482 ('href', url), |
470 ('hreflang', locale) | 483 ('hreflang', locale) |
471 ] + attrs.items() | 484 ] + attrs.items() |
472 ))) | 485 ))) |
473 | 486 |
487 def get_pages_metadata(self, filters=None): | |
488 if filters is not None and not isinstance(filters, dict): | |
489 raise TypeError('Filters are not a dictionary') | |
490 | |
491 return_data = [] | |
492 for page_name, _format in self._params['source'].list_pages(): | |
493 data, filename = self._params['source'].read_page(page_name, | |
494 _format) | |
495 page_data, cleaned_page = get_page_metadata(page_name, data) | |
Vasily Kuznetsov
2017/07/03 17:42:44
We can just take the first part of the tuple that
Jon Sonesen
2017/07/04 14:58:06
Acknowledged.
| |
496 if self.filter_metadata(filters, page_data) is True: | |
497 return_data.append(page_data) | |
498 return return_data | |
499 | |
500 def filter_metadata(self, filters, metadata): | |
501 # if only the page key is in the metadata then there | |
502 # was no user defined metadata | |
503 if metadata.keys() == ['page']: | |
504 return False | |
505 if filters is None: | |
506 return True | |
507 for filter_name, filter_value in filters.items(): | |
508 if filter_name not in metadata: | |
509 return False | |
510 if isinstance(metadata[filter_name], list): | |
511 if isinstance(filter_value, basestring): | |
512 filter_value = [filter_value] | |
513 for option in filter_value: | |
514 if str(option) not in metadata[filter_name]: | |
515 return False | |
516 elif filter_value != metadata[filter_name]: | |
517 return False | |
518 return True | |
519 | |
474 def toclist(self, content): | 520 def toclist(self, content): |
475 toc_re = r'<h(\d)\s[^<>]*\bid="([^<>"]+)"[^<>]*>(.*?)</h\1>' | 521 toc_re = r'<h(\d)\s[^<>]*\bid="([^<>"]+)"[^<>]*>(.*?)</h\1>' |
476 flat = [] | 522 flat = [] |
477 for match in re.finditer(toc_re, content, re.S): | 523 for match in re.finditer(toc_re, content, re.S): |
478 flat.append({ | 524 flat.append({ |
479 'level': int(match.group(1)), | 525 'level': int(match.group(1)), |
480 'anchor': jinja2.Markup(match.group(2)).unescape(), | 526 'anchor': jinja2.Markup(match.group(2)).unescape(), |
481 'title': jinja2.Markup(match.group(3)).unescape(), | 527 'title': jinja2.Markup(match.group(3)).unescape(), |
482 'subitems': [], | 528 'subitems': [], |
483 }) | 529 }) |
484 | 530 |
485 structured = [] | 531 structured = [] |
486 stack = [{'level': 0, 'subitems': structured}] | 532 stack = [{'level': 0, 'subitems': structured}] |
487 for item in flat: | 533 for item in flat: |
488 while stack[-1]['level'] >= item['level']: | 534 while stack[-1]['level'] >= item['level']: |
489 stack.pop() | 535 stack.pop() |
490 stack[-1]['subitems'].append(item) | 536 stack[-1]['subitems'].append(item) |
491 stack.append(item) | 537 stack.append(item) |
492 return structured | 538 return structured |
493 | 539 |
494 converters = { | 540 converters = { |
495 'html': RawConverter, | 541 'html': RawConverter, |
496 'md': MarkdownConverter, | 542 'md': MarkdownConverter, |
497 'tmpl': TemplateConverter, | 543 'tmpl': TemplateConverter, |
498 } | 544 } |
OLD | NEW |