| Index: cms/converters.py |
| =================================================================== |
| --- a/cms/converters.py |
| +++ b/cms/converters.py |
| @@ -18,16 +18,17 @@ |
| import os |
| import HTMLParser |
| import re |
| import urlparse |
| import jinja2 |
| import markdown |
| +from cms import utils |
|
Vasily Kuznetsov
2017/10/27 18:35:01
It makes more sense for `utils` to be imported int
mathias
2017/10/30 15:37:00
Acknowledged.
|
| # Monkey-patch Markdown's isBlockLevel function to ensure that no paragraphs |
| # are inserted into the <head> tag |
| orig_isBlockLevel = markdown.util.isBlockLevel |
| def isBlockLevel(tag): |
| if tag == 'head': |
| @@ -113,53 +114,28 @@ |
| def handle_entityref(self, name): |
| self._append_text(self.unescape('&{};'.format(name))) |
| def handle_charref(self, name): |
| self._append_text(self.unescape('&#{};'.format(name))) |
| -def parse_page_content(page, data): |
|
Vasily Kuznetsov
2017/10/27 18:35:01
This function has nothing to do with converters, i
|
| - """Separate page content into metadata (dict) and body text (str)""" |
| - page_data = {'page': page} |
| - lines = data.splitlines(True) |
| - for i, line in enumerate(lines): |
| - if line.strip() in {'<!--', '-->'}: |
| - lines[i] = '' |
| - continue |
| - if not re.search(r'^\s*[\w\-]+\s*=', line): |
| - break |
| - name, value = line.split('=', 1) |
| - value = value.strip() |
| - if value.startswith('[') and value.endswith(']'): |
| - value = [element.strip() for element in value[1:-1].split(',')] |
| - lines[i] = '\n' |
| - page_data[name.strip()] = value |
| - return page_data, ''.join(lines) |
| - |
| - |
| class Converter: |
| whitelist = {'a', 'em', 'sup', 'strong', 'code', 'span'} |
| missing_translations = 0 |
| total_translations = 0 |
| - def __init__(self, params, key='pagedata'): |
|
Vasily Kuznetsov
2017/10/27 18:35:01
This signature is rather cryptic, what we want to
|
| + def __init__(self, data, filename, params): |
| + self._data = data |
| + self._filename = filename |
| self._params = params |
| - self._key = key |
| self._attribute_parser = AttributeParser(self.whitelist) |
| self._seen_defaults = {} |
| - # Read in any parameters specified at the beginning of the file |
| - # and override converter defaults with page specific params |
| - data, filename = params[key] |
|
Vasily Kuznetsov
2017/10/27 18:35:01
This is a side effect that is not related to conve
|
| - page_data, body_text = parse_page_content(params['page'], data) |
| - params.update(page_data) |
| - params[key] = (body_text, filename) |
| - |
| def localize_string( |
| self, page, name, default, comment, localedata, escapes): |
| def escape(s): |
| return re.sub(r'.', |
| lambda match: escapes.get(match.group(0), |
| match.group(0)), |
| s, flags=re.S) |
| @@ -290,21 +266,26 @@ |
| include_start_regex = '<' |
| include_end_regex = '>' |
| def resolve_includes(self, text): |
| def resolve_include(match): |
| name = match.group(1) |
| for format_, converter_class in converters.iteritems(): |
| if self._params['source'].has_include(name, format_): |
| - self._params['includedata'] = ( |
|
Vasily Kuznetsov
2017/10/27 18:35:01
This key is not used anywhere, so we don't need to
|
| + data, filename = ( |
| self._params['source'].read_include(name, format_)) |
| - converter = converter_class(self._params, |
| - key='includedata') |
| + # XXX: allowing includes to modify params of the whole page |
| + # seems like a bad idea but we have to support this because |
| + # it's used by www.adblockplus.org. |
| + metadata, rest = utils.extract_page_metadata(data) |
|
Vasily Kuznetsov
2017/10/27 18:35:01
We have to maintain the ability of includes to wri
mathias
2017/10/30 15:37:00
Acknowledged.
|
| + self._params.update(metadata) |
| + |
| + converter = converter_class(rest, filename, self._params) |
| result = converter() |
| self.missing_translations += converter.missing_translations |
| self.total_translations += converter.total_translations |
| return result |
| raise Exception('Failed to resolve include {}' |
| ' on page {}'.format(name, self._params['page'])) |
| return re.sub( |
| @@ -312,28 +293,18 @@ |
| self.include_start_regex, |
| self.include_end_regex |
| ), |
| resolve_include, |
| text |
| ) |
| def __call__(self): |
| - result = self.get_html(*self._params[self._key]) |
| - result = self.resolve_includes(result) |
| - if self._key == 'pagedata': |
|
Vasily Kuznetsov
2017/10/27 18:35:01
The return types of the two branches of if are dif
|
| - head = [] |
| - |
| - def add_to_head(match): |
| - head.append(match.group(1)) |
| - return '' |
| - body = re.sub(r'<head>(.*?)</head>', add_to_head, result, |
| - flags=re.S) |
| - return ''.join(head), body |
| - return result |
| + result = self.get_html(self._data, self._filename) |
| + return self.resolve_includes(result) |
| class RawConverter(Converter): |
| def get_html(self, source, filename): |
| result = self.insert_localized_strings(source, html_escapes) |
| result = self.process_links(result) |
| return result |
| @@ -467,21 +438,19 @@ |
| def has_string(self, name, page=None): |
| if page is None: |
| page = self._params['page'] |
| localedata = self._get_locale_data(page) |
| return name in localedata |
| def get_page_content(self, page, locale=None): |
| - from cms.utils import get_page_params |
| - |
| if locale is None: |
| locale = self._params['locale'] |
| - return get_page_params(self._params['source'], locale, page) |
| + return utils.get_page_params(self._params['source'], locale, page) |
| def linkify(self, page, locale=None, **attrs): |
| if locale is None: |
| locale = self._params['locale'] |
| locale, url = self._params['source'].resolve_link(page, locale) |
| return jinja2.Markup('<a{}>'.format(''.join( |
| ' {}="{}"'.format(name, jinja2.escape(value)) for name, value in [ |
| @@ -493,17 +462,18 @@ |
| def get_pages_metadata(self, filters=None): |
| if filters is not None and not isinstance(filters, dict): |
| raise TypeError('Filters are not a dictionary') |
| return_data = [] |
| for page_name, _format in self._params['source'].list_pages(): |
| data, filename = self._params['source'].read_page(page_name, |
| _format) |
| - page_data = parse_page_content(page_name, data)[0] |
| + page_data = utils.extract_page_metadata(data)[0] |
| + page_data['page'] = page_name |
|
mathias
2017/10/30 15:37:00
Shouldn't this use setdefault(), in order to allow
Vasily Kuznetsov
2017/11/07 17:08:29
Yeah, you're right, this would be needed to preser
|
| if self.filter_metadata(filters, page_data) is True: |
| return_data.append(page_data) |
| return return_data |
| def filter_metadata(self, filters, metadata): |
| # if only the page key is in the metadata then there |
| # was no user defined metadata |
| if metadata.keys() == ['page']: |