Index: cms/converters.py |
=================================================================== |
--- a/cms/converters.py |
+++ b/cms/converters.py |
@@ -18,16 +18,17 @@ |
import os |
import HTMLParser |
import re |
import urlparse |
import jinja2 |
import markdown |
+from cms import utils |
Vasily Kuznetsov
2017/10/27 18:35:01
It makes more sense for `utils` to be imported int
mathias
2017/10/30 15:37:00
Acknowledged.
|
# Monkey-patch Markdown's isBlockLevel function to ensure that no paragraphs |
# are inserted into the <head> tag |
orig_isBlockLevel = markdown.util.isBlockLevel |
def isBlockLevel(tag): |
if tag == 'head': |
@@ -113,53 +114,28 @@ |
def handle_entityref(self, name): |
self._append_text(self.unescape('&{};'.format(name))) |
def handle_charref(self, name): |
self._append_text(self.unescape('&#{};'.format(name))) |
-def parse_page_content(page, data): |
Vasily Kuznetsov
2017/10/27 18:35:01
This function has nothing to do with converters, i
|
- """Separate page content into metadata (dict) and body text (str)""" |
- page_data = {'page': page} |
- lines = data.splitlines(True) |
- for i, line in enumerate(lines): |
- if line.strip() in {'<!--', '-->'}: |
- lines[i] = '' |
- continue |
- if not re.search(r'^\s*[\w\-]+\s*=', line): |
- break |
- name, value = line.split('=', 1) |
- value = value.strip() |
- if value.startswith('[') and value.endswith(']'): |
- value = [element.strip() for element in value[1:-1].split(',')] |
- lines[i] = '\n' |
- page_data[name.strip()] = value |
- return page_data, ''.join(lines) |
- |
- |
class Converter: |
whitelist = {'a', 'em', 'sup', 'strong', 'code', 'span'} |
missing_translations = 0 |
total_translations = 0 |
- def __init__(self, params, key='pagedata'): |
Vasily Kuznetsov
2017/10/27 18:35:01
This signature is rather cryptic, what we want to
|
+ def __init__(self, data, filename, params): |
+ self._data = data |
+ self._filename = filename |
self._params = params |
- self._key = key |
self._attribute_parser = AttributeParser(self.whitelist) |
self._seen_defaults = {} |
- # Read in any parameters specified at the beginning of the file |
- # and override converter defaults with page specific params |
- data, filename = params[key] |
Vasily Kuznetsov
2017/10/27 18:35:01
This is a side effect that is not related to conve
|
- page_data, body_text = parse_page_content(params['page'], data) |
- params.update(page_data) |
- params[key] = (body_text, filename) |
- |
def localize_string( |
self, page, name, default, comment, localedata, escapes): |
def escape(s): |
return re.sub(r'.', |
lambda match: escapes.get(match.group(0), |
match.group(0)), |
s, flags=re.S) |
@@ -290,21 +266,26 @@ |
include_start_regex = '<' |
include_end_regex = '>' |
def resolve_includes(self, text): |
def resolve_include(match): |
name = match.group(1) |
for format_, converter_class in converters.iteritems(): |
if self._params['source'].has_include(name, format_): |
- self._params['includedata'] = ( |
Vasily Kuznetsov
2017/10/27 18:35:01
This key is not used anywhere, so we don't need to
|
+ data, filename = ( |
self._params['source'].read_include(name, format_)) |
- converter = converter_class(self._params, |
- key='includedata') |
+ # XXX: allowing includes to modify params of the whole page |
+ # seems like a bad idea but we have to support this because |
+ # it's used by www.adblockplus.org. |
+ metadata, rest = utils.extract_page_metadata(data) |
Vasily Kuznetsov
2017/10/27 18:35:01
We have to maintain the ability of includes to wri
mathias
2017/10/30 15:37:00
Acknowledged.
|
+ self._params.update(metadata) |
+ |
+ converter = converter_class(rest, filename, self._params) |
result = converter() |
self.missing_translations += converter.missing_translations |
self.total_translations += converter.total_translations |
return result |
raise Exception('Failed to resolve include {}' |
' on page {}'.format(name, self._params['page'])) |
return re.sub( |
@@ -312,28 +293,18 @@ |
self.include_start_regex, |
self.include_end_regex |
), |
resolve_include, |
text |
) |
def __call__(self): |
- result = self.get_html(*self._params[self._key]) |
- result = self.resolve_includes(result) |
- if self._key == 'pagedata': |
Vasily Kuznetsov
2017/10/27 18:35:01
The return types of the two branches of if are dif
|
- head = [] |
- |
- def add_to_head(match): |
- head.append(match.group(1)) |
- return '' |
- body = re.sub(r'<head>(.*?)</head>', add_to_head, result, |
- flags=re.S) |
- return ''.join(head), body |
- return result |
+ result = self.get_html(self._data, self._filename) |
+ return self.resolve_includes(result) |
class RawConverter(Converter): |
def get_html(self, source, filename): |
result = self.insert_localized_strings(source, html_escapes) |
result = self.process_links(result) |
return result |
@@ -467,21 +438,19 @@ |
def has_string(self, name, page=None): |
if page is None: |
page = self._params['page'] |
localedata = self._get_locale_data(page) |
return name in localedata |
def get_page_content(self, page, locale=None): |
- from cms.utils import get_page_params |
- |
if locale is None: |
locale = self._params['locale'] |
- return get_page_params(self._params['source'], locale, page) |
+ return utils.get_page_params(self._params['source'], locale, page) |
def linkify(self, page, locale=None, **attrs): |
if locale is None: |
locale = self._params['locale'] |
locale, url = self._params['source'].resolve_link(page, locale) |
return jinja2.Markup('<a{}>'.format(''.join( |
' {}="{}"'.format(name, jinja2.escape(value)) for name, value in [ |
@@ -493,17 +462,18 @@ |
def get_pages_metadata(self, filters=None): |
if filters is not None and not isinstance(filters, dict): |
raise TypeError('Filters are not a dictionary') |
return_data = [] |
for page_name, _format in self._params['source'].list_pages(): |
data, filename = self._params['source'].read_page(page_name, |
_format) |
- page_data = parse_page_content(page_name, data)[0] |
+ page_data = utils.extract_page_metadata(data)[0] |
+ page_data['page'] = page_name |
mathias
2017/10/30 15:37:00
Shouldn't this use setdefault(), in order to allow
Vasily Kuznetsov
2017/11/07 17:08:29
Yeah, you're right, this would be needed to preser
|
if self.filter_metadata(filters, page_data) is True: |
return_data.append(page_data) |
return return_data |
def filter_metadata(self, filters, metadata): |
# if only the page key is in the metadata then there |
# was no user defined metadata |
if metadata.keys() == ['page']: |