Left: | ||
Right: |
LEFT | RIGHT |
---|---|
1 # This file is part of the Adblock Plus web scripts, | 1 # This file is part of the Adblock Plus web scripts, |
2 # Copyright (C) 2006-2017 eyeo GmbH | 2 # Copyright (C) 2006-2017 eyeo GmbH |
3 # | 3 # |
4 # Adblock Plus is free software: you can redistribute it and/or modify | 4 # Adblock Plus is free software: you can redistribute it and/or modify |
5 # it under the terms of the GNU General Public License version 3 as | 5 # it under the terms of the GNU General Public License version 3 as |
6 # published by the Free Software Foundation. | 6 # published by the Free Software Foundation. |
7 # | 7 # |
8 # Adblock Plus is distributed in the hope that it will be useful, | 8 # Adblock Plus is distributed in the hope that it will be useful, |
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of | 9 # but WITHOUT ANY WARRANTY; without even the implied warranty of |
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | 10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
(...skipping 98 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
109 # the document. | 109 # the document. |
110 self._append_text(data) | 110 self._append_text(data) |
111 | 111 |
112 def handle_entityref(self, name): | 112 def handle_entityref(self, name): |
113 self._append_text(self.unescape('&{};'.format(name))) | 113 self._append_text(self.unescape('&{};'.format(name))) |
114 | 114 |
115 def handle_charref(self, name): | 115 def handle_charref(self, name): |
116 self._append_text(self.unescape('&#{};'.format(name))) | 116 self._append_text(self.unescape('&#{};'.format(name))) |
117 | 117 |
118 | 118 |
119 def get_page_metadata(page, data): | 119 def parse_page_content(page, data): |
120 """Generator which gets per page metadata and corresponding line indices""" | 120 """Separate page content into metadata (dict) and body text (str)""" |
121 for i, line in enumerate(data.splitlines()): | 121 page_data = {'page': page} |
122 lines = data.splitlines(True) | |
123 for i, line in enumerate(lines): | |
122 if not re.search(r'^\s*[\w\-]+\s*=', line): | 124 if not re.search(r'^\s*[\w\-]+\s*=', line): |
123 break | 125 break |
124 name, value = line.split('=', 1) | 126 name, value = line.split('=', 1) |
125 value = value.strip() | 127 value = value.strip() |
126 if value.startswith('[') and value.endswith(']'): | 128 if value.startswith('[') and value.endswith(']'): |
127 value = [element.strip() for element in value[1:-1].split(',')] | 129 value = [element.strip() for element in value[1:-1].split(',')] |
128 yield name.strip(), value, i | 130 lines[i] = '\n' |
131 page_data[name.strip()] = value | |
132 return page_data, ''.join(lines) | |
129 | 133 |
130 | 134 |
131 class Converter: | 135 class Converter: |
132 whitelist = {'a', 'em', 'sup', 'strong', 'code', 'span'} | 136 whitelist = {'a', 'em', 'sup', 'strong', 'code', 'span'} |
133 missing_translations = 0 | 137 missing_translations = 0 |
134 total_translations = 0 | 138 total_translations = 0 |
135 | 139 |
136 def __init__(self, params, key='pagedata'): | 140 def __init__(self, params, key='pagedata'): |
137 self._params = params | 141 self._params = params |
138 self._key = key | 142 self._key = key |
139 self._attribute_parser = AttributeParser(self.whitelist) | 143 self._attribute_parser = AttributeParser(self.whitelist) |
140 self._seen_defaults = {} | 144 self._seen_defaults = {} |
141 | 145 |
142 # Read in any parameters specified at the beginning of the file | 146 # Read in any parameters specified at the beginning of the file |
147 # and override converter defaults with page specific params | |
143 data, filename = params[key] | 148 data, filename = params[key] |
144 lines = data.splitlines(True) | 149 page_data, body_text = parse_page_content(params['page'], data) |
145 for name, value, i in get_page_metadata(params['page'], data): | 150 params.update(page_data) |
Vasily Kuznetsov
2017/06/27 13:32:15
This refactoring moved the code into a separate fu
Jon Sonesen
2017/06/28 14:31:19
The line index is required to strip the metadata f
Jon Sonesen
2017/07/03 09:06:06
Done.
| |
146 params[name] = value | 151 params[key] = (body_text, filename) |
147 lines[i] = '\n' | |
148 params[key] = (''.join(lines), filename) | |
149 | 152 |
150 def localize_string( | 153 def localize_string( |
151 self, page, name, default, comment, localedata, escapes): | 154 self, page, name, default, comment, localedata, escapes): |
152 | 155 |
153 def escape(s): | 156 def escape(s): |
154 return re.sub(r'.', | 157 return re.sub(r'.', |
155 lambda match: escapes.get(match.group(0), | 158 lambda match: escapes.get(match.group(0), |
156 match.group(0)), | 159 match.group(0)), |
157 s, flags=re.S) | 160 s, flags=re.S) |
158 | 161 |
(...skipping 316 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
475 | 478 |
476 locale, url = self._params['source'].resolve_link(page, locale) | 479 locale, url = self._params['source'].resolve_link(page, locale) |
477 return jinja2.Markup('<a{}>'.format(''.join( | 480 return jinja2.Markup('<a{}>'.format(''.join( |
478 ' {}="{}"'.format(name, jinja2.escape(value)) for name, value in [ | 481 ' {}="{}"'.format(name, jinja2.escape(value)) for name, value in [ |
479 ('href', url), | 482 ('href', url), |
480 ('hreflang', locale) | 483 ('hreflang', locale) |
481 ] + attrs.items() | 484 ] + attrs.items() |
482 ))) | 485 ))) |
483 | 486 |
484 def get_pages_metadata(self, filters=None): | 487 def get_pages_metadata(self, filters=None): |
485 if not isinstance(filters, dict) and filters: | 488 if filters is not None and not isinstance(filters, dict): |
Vasily Kuznetsov
2017/06/27 13:32:15
This code seems to allow things like `filters = []
Jon Sonesen
2017/06/28 14:31:19
Acknowledged.
Jon Sonesen
2017/07/03 09:06:06
Done.
| |
486 raise TypeError('Filters are not a dictionary') | 489 raise TypeError('Filters are not a dictionary') |
487 | 490 |
488 return_data = [] | 491 return_data = [] |
489 for page_name, _format in self._params['source'].list_pages(): | 492 for page_name, _format in self._params['source'].list_pages(): |
490 data, filename = self._params['source'].read_page(page_name, | 493 data, filename = self._params['source'].read_page(page_name, |
491 _format) | 494 _format) |
492 page_data = {'page': page_name} | 495 page_data = parse_page_content(page_name, data)[0] |
493 for name, value, i in get_page_metadata(page_name, data): | |
494 page_data[name] = value | |
495 if self.filter_metadata(filters, page_data) is True: | 496 if self.filter_metadata(filters, page_data) is True: |
496 return_data.append(page_data) | 497 return_data.append(page_data) |
497 return return_data | 498 return return_data |
498 | 499 |
499 def filter_metadata(self, filters, metadata): | 500 def filter_metadata(self, filters, metadata): |
500 # if only the page key is in the metadata then there | 501 # if only the page key is in the metadata then there |
501 # was no user defined metadata | 502 # was no user defined metadata |
502 if metadata.keys() == ['page']: | 503 if metadata.keys() == ['page']: |
Vasily Kuznetsov
2017/06/27 13:32:15
Is this a requirement that such metadata should be
Jon Sonesen
2017/06/28 14:31:19
This is done to keep a uniform api, the previous i
Jon Sonesen
2017/07/03 09:06:06
Still waiting on julians opinion, i guess i get wh
juliandoucette
2017/07/03 21:48:51
I don't understand the question. Can you please ex
Vasily Kuznetsov
2017/07/04 07:43:48
The question is: if a page has no explicit metadat
juliandoucette
2017/07/04 09:57:24
Or that I queried by page name?
Vasily Kuznetsov
2017/07/04 10:23:34
The content is not in the dictionary currently. It
| |
503 return False | 504 return False |
504 if filters is None: | 505 if filters is None: |
505 return True | 506 return True |
506 for filter_name, filter_value in filters.items(): | 507 for filter_name, filter_value in filters.items(): |
507 if filter_name not in metadata: | 508 if filter_name not in metadata: |
508 return False | 509 return False |
509 if isinstance(metadata[filter_name], list): | 510 if isinstance(metadata[filter_name], list): |
510 if isinstance(filter_value, basestring): | 511 if isinstance(filter_value, basestring): |
511 filter_value = [filter_value] | 512 filter_value = [filter_value] |
512 for option in filter_value: | 513 for option in filter_value: |
(...skipping 21 matching lines...) Expand all Loading... | |
534 stack.pop() | 535 stack.pop() |
535 stack[-1]['subitems'].append(item) | 536 stack[-1]['subitems'].append(item) |
536 stack.append(item) | 537 stack.append(item) |
537 return structured | 538 return structured |
538 | 539 |
539 converters = { | 540 converters = { |
540 'html': RawConverter, | 541 'html': RawConverter, |
541 'md': MarkdownConverter, | 542 'md': MarkdownConverter, |
542 'tmpl': TemplateConverter, | 543 'tmpl': TemplateConverter, |
543 } | 544 } |
LEFT | RIGHT |