Left: | ||
Right: |
OLD | NEW |
---|---|
1 # This file is part of the Adblock Plus web scripts, | 1 # This file is part of the Adblock Plus web scripts, |
2 # Copyright (C) 2006-present eyeo GmbH | 2 # Copyright (C) 2006-present eyeo GmbH |
3 # | 3 # |
4 # Adblock Plus is free software: you can redistribute it and/or modify | 4 # Adblock Plus is free software: you can redistribute it and/or modify |
5 # it under the terms of the GNU General Public License version 3 as | 5 # it under the terms of the GNU General Public License version 3 as |
6 # published by the Free Software Foundation. | 6 # published by the Free Software Foundation. |
7 # | 7 # |
8 # Adblock Plus is distributed in the hope that it will be useful, | 8 # Adblock Plus is distributed in the hope that it will be useful, |
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of | 9 # but WITHOUT ANY WARRANTY; without even the implied warranty of |
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | 10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
11 # GNU General Public License for more details. | 11 # GNU General Public License for more details. |
12 # | 12 # |
13 # You should have received a copy of the GNU General Public License | 13 # You should have received a copy of the GNU General Public License |
14 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. | 14 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. |
15 | 15 |
16 from __future__ import unicode_literals | 16 from __future__ import unicode_literals |
17 | 17 |
18 import os | 18 import os |
19 import HTMLParser | 19 import HTMLParser |
20 import re | 20 import re |
21 import urlparse | 21 import urlparse |
22 | 22 |
23 import jinja2 | 23 import jinja2 |
24 import markdown | 24 import markdown |
25 | 25 |
26 from cms import utils | |
Vasily Kuznetsov
2017/10/27 18:35:01
It makes more sense for `utils` to be imported int
mathias
2017/10/30 15:37:00
Acknowledged.
| |
26 | 27 |
27 # Monkey-patch Markdown's isBlockLevel function to ensure that no paragraphs | 28 # Monkey-patch Markdown's isBlockLevel function to ensure that no paragraphs |
28 # are inserted into the <head> tag | 29 # are inserted into the <head> tag |
29 orig_isBlockLevel = markdown.util.isBlockLevel | 30 orig_isBlockLevel = markdown.util.isBlockLevel |
30 | 31 |
31 | 32 |
32 def isBlockLevel(tag): | 33 def isBlockLevel(tag): |
33 if tag == 'head': | 34 if tag == 'head': |
34 return True | 35 return True |
35 return orig_isBlockLevel(tag) | 36 return orig_isBlockLevel(tag) |
(...skipping 75 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
111 # the document. | 112 # the document. |
112 self._append_text(data) | 113 self._append_text(data) |
113 | 114 |
114 def handle_entityref(self, name): | 115 def handle_entityref(self, name): |
115 self._append_text(self.unescape('&{};'.format(name))) | 116 self._append_text(self.unescape('&{};'.format(name))) |
116 | 117 |
117 def handle_charref(self, name): | 118 def handle_charref(self, name): |
118 self._append_text(self.unescape('&#{};'.format(name))) | 119 self._append_text(self.unescape('&#{};'.format(name))) |
119 | 120 |
120 | 121 |
121 def parse_page_content(page, data): | |
Vasily Kuznetsov
2017/10/27 18:35:01
This function has nothing to do with converters, i
| |
122 """Separate page content into metadata (dict) and body text (str)""" | |
123 page_data = {'page': page} | |
124 lines = data.splitlines(True) | |
125 for i, line in enumerate(lines): | |
126 if line.strip() in {'<!--', '-->'}: | |
127 lines[i] = '' | |
128 continue | |
129 if not re.search(r'^\s*[\w\-]+\s*=', line): | |
130 break | |
131 name, value = line.split('=', 1) | |
132 value = value.strip() | |
133 if value.startswith('[') and value.endswith(']'): | |
134 value = [element.strip() for element in value[1:-1].split(',')] | |
135 lines[i] = '\n' | |
136 page_data[name.strip()] = value | |
137 return page_data, ''.join(lines) | |
138 | |
139 | |
140 class Converter: | 122 class Converter: |
141 whitelist = {'a', 'em', 'sup', 'strong', 'code', 'span'} | 123 whitelist = {'a', 'em', 'sup', 'strong', 'code', 'span'} |
142 missing_translations = 0 | 124 missing_translations = 0 |
143 total_translations = 0 | 125 total_translations = 0 |
144 | 126 |
145 def __init__(self, params, key='pagedata'): | 127 def __init__(self, data, filename, params): |
Vasily Kuznetsov
2017/10/27 18:35:01
This signature is rather cryptic, what we want to
| |
128 self._data = data | |
129 self._filename = filename | |
146 self._params = params | 130 self._params = params |
147 self._key = key | |
148 self._attribute_parser = AttributeParser(self.whitelist) | 131 self._attribute_parser = AttributeParser(self.whitelist) |
149 self._seen_defaults = {} | 132 self._seen_defaults = {} |
150 | 133 |
151 # Read in any parameters specified at the beginning of the file | |
152 # and override converter defaults with page specific params | |
153 data, filename = params[key] | |
Vasily Kuznetsov
2017/10/27 18:35:01
This is a side effect that is not related to conve
| |
154 page_data, body_text = parse_page_content(params['page'], data) | |
155 params.update(page_data) | |
156 params[key] = (body_text, filename) | |
157 | |
158 def localize_string( | 134 def localize_string( |
159 self, page, name, default, comment, localedata, escapes): | 135 self, page, name, default, comment, localedata, escapes): |
160 | 136 |
161 def escape(s): | 137 def escape(s): |
162 return re.sub(r'.', | 138 return re.sub(r'.', |
163 lambda match: escapes.get(match.group(0), | 139 lambda match: escapes.get(match.group(0), |
164 match.group(0)), | 140 match.group(0)), |
165 s, flags=re.S) | 141 s, flags=re.S) |
166 | 142 |
167 def re_escape(s): | 143 def re_escape(s): |
(...skipping 120 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
288 return text | 264 return text |
289 | 265 |
290 include_start_regex = '<' | 266 include_start_regex = '<' |
291 include_end_regex = '>' | 267 include_end_regex = '>' |
292 | 268 |
293 def resolve_includes(self, text): | 269 def resolve_includes(self, text): |
294 def resolve_include(match): | 270 def resolve_include(match): |
295 name = match.group(1) | 271 name = match.group(1) |
296 for format_, converter_class in converters.iteritems(): | 272 for format_, converter_class in converters.iteritems(): |
297 if self._params['source'].has_include(name, format_): | 273 if self._params['source'].has_include(name, format_): |
298 self._params['includedata'] = ( | 274 data, filename = ( |
Vasily Kuznetsov
2017/10/27 18:35:01
This key is not used anywhere, so we don't need to
| |
299 self._params['source'].read_include(name, format_)) | 275 self._params['source'].read_include(name, format_)) |
300 | 276 |
301 converter = converter_class(self._params, | 277 # XXX: allowing includes to modify params of the whole page |
302 key='includedata') | 278 # seems like a bad idea but we have to support this because |
279 # it's used by www.adblockplus.org. | |
280 metadata, rest = utils.extract_page_metadata(data) | |
Vasily Kuznetsov
2017/10/27 18:35:01
We have to maintain the ability of includes to wri
mathias
2017/10/30 15:37:00
Acknowledged.
| |
281 self._params.update(metadata) | |
282 | |
283 converter = converter_class(rest, filename, self._params) | |
303 result = converter() | 284 result = converter() |
304 self.missing_translations += converter.missing_translations | 285 self.missing_translations += converter.missing_translations |
305 self.total_translations += converter.total_translations | 286 self.total_translations += converter.total_translations |
306 return result | 287 return result |
307 raise Exception('Failed to resolve include {}' | 288 raise Exception('Failed to resolve include {}' |
308 ' on page {}'.format(name, self._params['page'])) | 289 ' on page {}'.format(name, self._params['page'])) |
309 | 290 |
310 return re.sub( | 291 return re.sub( |
311 r'{}\?\s*include\s+([^\s<>"]+)\s*\?{}'.format( | 292 r'{}\?\s*include\s+([^\s<>"]+)\s*\?{}'.format( |
312 self.include_start_regex, | 293 self.include_start_regex, |
313 self.include_end_regex | 294 self.include_end_regex |
314 ), | 295 ), |
315 resolve_include, | 296 resolve_include, |
316 text | 297 text |
317 ) | 298 ) |
318 | 299 |
319 def __call__(self): | 300 def __call__(self): |
320 result = self.get_html(*self._params[self._key]) | 301 result = self.get_html(self._data, self._filename) |
321 result = self.resolve_includes(result) | 302 return self.resolve_includes(result) |
322 if self._key == 'pagedata': | |
Vasily Kuznetsov
2017/10/27 18:35:01
The return types of the two branches of if are dif
| |
323 head = [] | |
324 | |
325 def add_to_head(match): | |
326 head.append(match.group(1)) | |
327 return '' | |
328 body = re.sub(r'<head>(.*?)</head>', add_to_head, result, | |
329 flags=re.S) | |
330 return ''.join(head), body | |
331 return result | |
332 | 303 |
333 | 304 |
334 class RawConverter(Converter): | 305 class RawConverter(Converter): |
335 def get_html(self, source, filename): | 306 def get_html(self, source, filename): |
336 result = self.insert_localized_strings(source, html_escapes) | 307 result = self.insert_localized_strings(source, html_escapes) |
337 result = self.process_links(result) | 308 result = self.process_links(result) |
338 return result | 309 return result |
339 | 310 |
340 | 311 |
341 class MarkdownConverter(Converter): | 312 class MarkdownConverter(Converter): |
(...skipping 123 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
465 )) | 436 )) |
466 | 437 |
467 def has_string(self, name, page=None): | 438 def has_string(self, name, page=None): |
468 if page is None: | 439 if page is None: |
469 page = self._params['page'] | 440 page = self._params['page'] |
470 | 441 |
471 localedata = self._get_locale_data(page) | 442 localedata = self._get_locale_data(page) |
472 return name in localedata | 443 return name in localedata |
473 | 444 |
474 def get_page_content(self, page, locale=None): | 445 def get_page_content(self, page, locale=None): |
475 from cms.utils import get_page_params | |
476 | |
477 if locale is None: | 446 if locale is None: |
478 locale = self._params['locale'] | 447 locale = self._params['locale'] |
479 return get_page_params(self._params['source'], locale, page) | 448 return utils.get_page_params(self._params['source'], locale, page) |
480 | 449 |
481 def linkify(self, page, locale=None, **attrs): | 450 def linkify(self, page, locale=None, **attrs): |
482 if locale is None: | 451 if locale is None: |
483 locale = self._params['locale'] | 452 locale = self._params['locale'] |
484 | 453 |
485 locale, url = self._params['source'].resolve_link(page, locale) | 454 locale, url = self._params['source'].resolve_link(page, locale) |
486 return jinja2.Markup('<a{}>'.format(''.join( | 455 return jinja2.Markup('<a{}>'.format(''.join( |
487 ' {}="{}"'.format(name, jinja2.escape(value)) for name, value in [ | 456 ' {}="{}"'.format(name, jinja2.escape(value)) for name, value in [ |
488 ('href', url), | 457 ('href', url), |
489 ('hreflang', locale) | 458 ('hreflang', locale) |
490 ] + attrs.items() | 459 ] + attrs.items() |
491 ))) | 460 ))) |
492 | 461 |
493 def get_pages_metadata(self, filters=None): | 462 def get_pages_metadata(self, filters=None): |
494 if filters is not None and not isinstance(filters, dict): | 463 if filters is not None and not isinstance(filters, dict): |
495 raise TypeError('Filters are not a dictionary') | 464 raise TypeError('Filters are not a dictionary') |
496 | 465 |
497 return_data = [] | 466 return_data = [] |
498 for page_name, _format in self._params['source'].list_pages(): | 467 for page_name, _format in self._params['source'].list_pages(): |
499 data, filename = self._params['source'].read_page(page_name, | 468 data, filename = self._params['source'].read_page(page_name, |
500 _format) | 469 _format) |
501 page_data = parse_page_content(page_name, data)[0] | 470 page_data = utils.extract_page_metadata(data)[0] |
471 page_data['page'] = page_name | |
mathias
2017/10/30 15:37:00
Shouldn't this use setdefault(), in order to allow
Vasily Kuznetsov
2017/11/07 17:08:29
Yeah, you're right, this would be needed to preser
| |
502 if self.filter_metadata(filters, page_data) is True: | 472 if self.filter_metadata(filters, page_data) is True: |
503 return_data.append(page_data) | 473 return_data.append(page_data) |
504 return return_data | 474 return return_data |
505 | 475 |
506 def filter_metadata(self, filters, metadata): | 476 def filter_metadata(self, filters, metadata): |
507 # if only the page key is in the metadata then there | 477 # if only the page key is in the metadata then there |
508 # was no user defined metadata | 478 # was no user defined metadata |
509 if metadata.keys() == ['page']: | 479 if metadata.keys() == ['page']: |
510 return False | 480 return False |
511 if filters is None: | 481 if filters is None: |
(...skipping 46 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
558 stack[-1]['subitems'].append(item) | 528 stack[-1]['subitems'].append(item) |
559 stack.append(item) | 529 stack.append(item) |
560 return structured | 530 return structured |
561 | 531 |
562 | 532 |
563 converters = { | 533 converters = { |
564 'html': RawConverter, | 534 'html': RawConverter, |
565 'md': MarkdownConverter, | 535 'md': MarkdownConverter, |
566 'tmpl': TemplateConverter, | 536 'tmpl': TemplateConverter, |
567 } | 537 } |
OLD | NEW |