Index: cms/converters.py |
=================================================================== |
--- a/cms/converters.py |
+++ b/cms/converters.py |
@@ -16,26 +16,26 @@ |
import os |
import HTMLParser |
import re |
import jinja2 |
import markdown |
-# Monkey-patch Markdown's isBlockLevel function to ensure that no paragraphs are |
-# inserted into the <head> tag |
+# Monkey-patch Markdown's isBlockLevel function to ensure that no paragraphs |
+# are inserted into the <head> tag |
orig_isBlockLevel = markdown.util.isBlockLevel |
def isBlockLevel(tag): |
if tag == 'head': |
return True |
- else: |
- return orig_isBlockLevel(tag) |
+ return orig_isBlockLevel(tag) |
+ |
markdown.util.isBlockLevel = isBlockLevel |
html_escapes = { |
'<': '<', |
'>': '>', |
'&': '&', |
'"': '"', |
"'": ''', |
@@ -60,59 +60,63 @@ |
self._pagename = pagename |
# Force-escape ampersands, otherwise the parser will autocomplete bogus |
# entities. |
text = re.sub(r'&(?!\S+;)', '&', text) |
try: |
self.feed(text) |
- return ''.join(self._string), self._attrs, [''.join(s) for s in self._fixed_strings] |
+ return (''.join(self._string), |
+ self._attrs, [''.join(s) for s in self._fixed_strings]) |
finally: |
self._string = None |
self._attrs = None |
self._pagename = None |
self._inside_fixed = False |
self._fixed_strings = None |
def handle_starttag(self, tag, attrs): |
if self._inside_fixed: |
- raise Exception("Unexpected HTML tag '%s' inside a fixed string on page %s" % (tag, self._pagename)) |
- elif tag == 'fix': |
+ raise Exception("Unexpected HTML tag '{}' inside a fixed string" |
+ 'on page {}'.format(tag, self._pagename)) |
+ if tag == 'fix': |
self._inside_fixed = True |
self._fixed_strings.append([]) |
- elif tag in self._whitelist: |
+ if tag in self._whitelist: |
self._attrs.setdefault(tag, []).append(attrs) |
- self._string.append('<%s>' % tag) |
+ self._string.append('<{}>'.format(tag)) |
else: |
- raise Exception("Unexpected HTML tag '%s' in localizable string on page %s" % (tag, self._pagename)) |
+ raise Exception("Unexpected HTML tag '{}' inside a fixed string" |
+ 'on page {}'.format(tag, self._pagename)) |
def handle_endtag(self, tag): |
if tag == 'fix': |
- self._string.append('{%d}' % len(self._fixed_strings)) |
+ self._string.append('{{{}}}'.format(self._fixed_strings)) |
self._inside_fixed = False |
else: |
- self._string.append('</%s>' % tag) |
+ self._string.append('</{}>'.format(tag)) |
def _append_text(self, s): |
if self._inside_fixed: |
self._fixed_strings[-1].append(s) |
else: |
self._string.append(s) |
def handle_data(self, data): |
- # Note: lack of escaping here is intentional. The result is a locale string, |
- # HTML escaping is applied when this string is inserted into the document. |
+ # Note: lack of escaping here is intentional. The result is a locale |
+ # string, HTML escaping is applied when this string is inserted into |
+ # the document. |
self._append_text(data) |
def handle_entityref(self, name): |
- self._append_text(self.unescape('&%s;' % name)) |
+ self._append_text(self.unescape('&{};'.format(name))) |
def handle_charref(self, name): |
- self._append_text(self.unescape('&#%s;' % name)) |
+ self._append_text(self.unescape('&#{};'.format(name))) |
class Converter: |
whitelist = {'a', 'em', 'sup', 'strong', 'code', 'span'} |
missing_translations = 0 |
total_translations = 0 |
def __init__(self, params, key='pagedata'): |
@@ -127,101 +131,109 @@ |
for i, line in enumerate(lines): |
if not re.search(r'^\s*[\w\-]+\s*=', line): |
break |
name, value = line.split('=', 1) |
params[name.strip()] = value.strip() |
lines[i] = '\n' |
params[key] = (''.join(lines), filename) |
- def localize_string(self, page, name, default, comment, localedata, escapes): |
+ def localize_string( |
+ self, page, name, default, comment, localedata, escapes): |
+ |
def escape(s): |
return re.sub(r'.', |
- lambda match: escapes.get(match.group(0), match.group(0)), |
+ lambda match: escapes.get(match.group(0), |
+ match.group(0)), |
s, flags=re.S) |
def re_escape(s): |
return re.escape(escape(s)) |
# Handle duplicated strings |
if default: |
self._seen_defaults[(page, name)] = (default, comment) |
else: |
try: |
default, comment = self._seen_defaults[(page, name)] |
except KeyError: |
- raise Exception('Text not yet defined for string %s on page %s' % |
- (name, page)) |
+ raise Exception('Text not yet defined for string {} on page' |
+ '{}'.format(name, page)) |
# Extract tag attributes from default string |
- default, saved_attributes, fixed_strings = self._attribute_parser.parse(default, self._params['page']) |
+ default, saved_attributes, fixed_strings = ( |
+ self._attribute_parser.parse(default, self._params['page'])) |
# Get translation |
locale = self._params['locale'] |
if locale == self._params['defaultlocale']: |
result = default |
elif name in localedata: |
result = localedata[name].strip() |
else: |
result = default |
self.missing_translations += 1 |
self.total_translations += 1 |
- # Perform callback with the string if required, e.g. for the translations script |
+ # Perform callback with the string if required, e.g. for the |
+ # translations script |
callback = self._params['localized_string_callback'] |
if callback: |
callback(page, locale, name, result, comment, fixed_strings) |
# Insert fixed strings |
for i, fixed_string in enumerate(fixed_strings, 1): |
- result = result.replace('{%d}' % i, fixed_string) |
+ result = result.replace('{{{%d}}}'.format(i), fixed_string) |
# Insert attributes |
result = escape(result) |
def stringify_attribute((name, value)): |
- return '%s="%s"' % ( |
+ return '{}="{}"'.format( |
escape(name), |
escape(self.insert_localized_strings(value, {})) |
) |
for tag in self.whitelist: |
- allowed_contents = '(?:[^<>]|%s)' % '|'.join(( |
- '<(?:%s[^<>]*?|/%s)>' % (t, t) |
+ allowed_contents = '(?:[^<>]|{})'.format('|').join(( |
+ '<(?:{}[^<>]*?|/{})>'.format(t, t) |
for t in map(re.escape, self.whitelist - {tag}) |
)) |
saved = saved_attributes.get(tag, []) |
for attrs in saved: |
attrs = map(stringify_attribute, attrs) |
result = re.sub( |
- r'%s(%s*?)%s' % (re_escape('<%s>' % tag), allowed_contents, |
- re_escape('</%s>' % tag)), |
- lambda match: r'<%s%s>%s</%s>' % ( |
+ r'{}({}*?){}'.format(re_escape('<{}>'.format(tag)), |
+ allowed_contents, |
+ re_escape('</{}>'.format(tag))), |
+ lambda match: r'<{}{}>{}</{}>'.format( |
tag, |
' ' + ' '.join(attrs) if attrs else '', |
match.group(1), |
tag |
), |
result, 1, flags=re.S |
) |
result = re.sub( |
- r'%s(%s*?)%s' % (re_escape('<%s>' % tag), allowed_contents, |
- re_escape('</%s>' % tag)), |
- r'<%s>\1</%s>' % (tag, tag), |
+ r'{}({}*?){}'.format(re_escape('<{}>'.format(tag)), |
+ allowed_contents, |
+ re_escape('</{}>'.format(tag))), |
+ r'<{}>\1</{}>'.format(tag, tag), |
result, flags=re.S |
) |
return result |
def insert_localized_strings(self, text, escapes, to_html=lambda s: s): |
def lookup_string(match): |
name, comment, default = match.groups() |
if default: |
default = to_html(default).strip() |
return self.localize_string(self._params['page'], name, default, |
- comment, self._params['localedata'], escapes) |
+ comment, self._params['localedata'], |
+ escapes) |
return re.sub( |
r'{{\s*' |
r'([\w\-]+)' # String ID |
r'(?:(?:\[(.*?)\])?' # Optional comment |
r'\s+' |
r'((?:(?!{{).|' # Translatable text |
r'{{(?:(?!}}).)*}}' # Nested translation |
@@ -233,98 +245,106 @@ |
flags=re.S |
) |
def process_links(self, text): |
def process_link(match): |
pre, attr, url, post = match.groups() |
url = jinja2.Markup(url).unescape() |
- locale, new_url = self._params['source'].resolve_link(url, self._params['locale']) |
- if new_url != None: |
+ locale, new_url = ( |
+ self._params['source'] |
+ .resolve_link(url, self._params['locale'])) |
+ |
+ if new_url is not None: |
url = new_url |
if attr == 'href': |
- post += ' hreflang="%s"' % jinja2.Markup.escape(locale) |
+ post += ' hreflang="{}"'\ |
+ .format(jinja2.Markup.escape(locale)) |
return ''.join((pre, jinja2.Markup.escape(url), post)) |
- text = re.sub(r'(<a\s[^<>]*\b(href)=\")([^<>\"]+)(\")', process_link, text) |
- text = re.sub(r'(<img\s[^<>]*\b(src)=\")([^<>\"]+)(\")', process_link, text) |
+ text = re.sub(r'(<a\s[^<>]*\b(href)=\")([^<>\"]+)(\")', |
+ process_link, text) |
+ text = re.sub(r'(<img\s[^<>]*\b(src)=\")([^<>\"]+)(\")', |
+ process_link, text) |
return text |
include_start_regex = '<' |
include_end_regex = '>' |
def resolve_includes(self, text): |
def resolve_include(match): |
- global converters |
name = match.group(1) |
- for format, converter_class in converters.iteritems(): |
- if self._params['source'].has_include(name, format): |
- self._params['includedata'] = self._params['source'].read_include(name, format) |
- converter = converter_class(self._params, key='includedata') |
+ for format_, converter_class in converters.iteritems(): |
+ if self._params['source'].has_include(name, format_): |
+ self._params['includedata'] = ( |
+ self._params['source'].read_include(name, format)) |
+ |
+ converter = converter_class(self._params, |
+ key='includedata') |
result = converter() |
self.missing_translations += converter.missing_translations |
self.total_translations += converter.total_translations |
return result |
- raise Exception('Failed to resolve include %s on page %s' % (name, self._params['page'])) |
+ raise Exception('Failed to resolve include {}' |
+ 'on page {}'.format(name, self._params['page'])) |
return re.sub( |
- r'%s\?\s*include\s+([^\s<>"]+)\s*\?%s' % ( |
+ r'{}\?\s*include\s+([^\s<>"]+)\s*\?{}'.format( |
self.include_start_regex, |
self.include_end_regex |
), |
resolve_include, |
text |
) |
def __call__(self): |
result = self.get_html(*self._params[self._key]) |
result = self.resolve_includes(result) |
if self._key == 'pagedata': |
head = [] |
def add_to_head(match): |
head.append(match.group(1)) |
return '' |
- body = re.sub(r'<head>(.*?)</head>', add_to_head, result, flags=re.S) |
+ body = re.sub(r'<head>(.*?)</head>', add_to_head, result, |
+ flags=re.S) |
return ''.join(head), body |
- else: |
- return result |
+ return result |
class RawConverter(Converter): |
def get_html(self, source, filename): |
result = self.insert_localized_strings(source, html_escapes) |
result = self.process_links(result) |
return result |
class MarkdownConverter(Converter): |
- include_start_regex = r'(?:%s|%s)' % ( |
+ include_start_regex = r'(?:{}|{})'.format( |
Converter.include_start_regex, |
re.escape(jinja2.escape(Converter.include_start_regex)) |
) |
- include_end_regex = r'(?:%s|%s)' % ( |
+ include_end_regex = r'(?:{}|{})'.format( |
Converter.include_end_regex, |
re.escape(jinja2.escape(Converter.include_end_regex)) |
) |
def get_html(self, source, filename): |
def remove_unnecessary_entities(match): |
char = unichr(int(match.group(1))) |
if char in html_escapes: |
return match.group(0) |
- else: |
- return char |
+ return char |
escapes = {} |
md = markdown.Markdown(output='html5', extensions=['extra']) |
for char in md.ESCAPED_CHARS: |
- escapes[char] = '&#' + str(ord(char)) + ';' |
+ escapes[char] = '&#{};'.format(str(ord(char))) |
for key, value in html_escapes.iteritems(): |
escapes[key] = value |
md.preprocessors['html_block'].markdown_in_raw = True |
def to_html(s): |
return re.sub(r'</?p>', '', md.convert(s)) |
@@ -357,32 +377,36 @@ |
'toclist': self.toclist, |
} |
globals = { |
'get_string': self.get_string, |
'get_page_content': self.get_page_content, |
} |
- for dirname, dictionary in [('filters', filters), ('globals', globals)]: |
+ for dirname, dictionary in [('filters', filters), |
+ ('globals', globals)]: |
for filename in self._params['source'].list_files(dirname): |
root, ext = os.path.splitext(filename) |
if ext.lower() != '.py': |
continue |
- path = '%s/%s' % (dirname, filename) |
+ path = os.path.join(dirname, filename) |
namespace = self._params['source'].exec_file(path) |
name = os.path.basename(root) |
try: |
dictionary[name] = namespace[name] |
except KeyError: |
- raise Exception('Expected symbol %r not found in %r' % (name, path)) |
+ raise Exception('Expected symbol {} not found' |
+ 'in {}'.format(name, path)) |
- self._env = jinja2.Environment(loader=SourceTemplateLoader(self._params['source']), autoescape=True) |
+ self._env = jinja2.Environment( |
+ loader=SourceTemplateLoader(self._params['source']), |
+ autoescape=True) |
self._env.filters.update(filters) |
self._env.globals.update(globals) |
def get_html(self, source, filename): |
env = self._env |
code = env.compile(source, None, filename) |
template = jinja2.Template.from_code(env, code, env.globals) |
@@ -404,17 +428,18 @@ |
self._params['page'], name, default, comment, |
self._params['localedata'], html_escapes |
)) |
def get_string(self, name, page=None): |
if page is None: |
page = self._params['page'] |
- localedata = self._params['source'].read_locale(self._params['locale'], page) |
+ localedata = self._params['source'].read_locale(self._params['locale'], |
+ page) |
default = localedata[name] |
return jinja2.Markup(self.localize_string( |
page, name, default, '', localedata, html_escapes |
)) |
def get_page_content(self, page, locale=None): |
from cms.utils import get_page_params |
@@ -422,26 +447,27 @@ |
locale = self._params['locale'] |
return get_page_params(self._params['source'], locale, page) |
def linkify(self, page, locale=None, **attrs): |
if locale is None: |
locale = self._params['locale'] |
locale, url = self._params['source'].resolve_link(page, locale) |
- return jinja2.Markup('<a%s>' % ''.join( |
- ' %s="%s"' % (name, jinja2.escape(value)) for name, value in [ |
+ return jinja2.Markup('<a{}>'.format(''.join( |
+ ' {}="{}"'.format(name, jinja2.escape(value)) for name, value in [ |
('href', url), |
('hreflang', locale) |
] + attrs.items() |
- )) |
+ ))) |
def toclist(self, content): |
+ toc_re = r'<h(\d)\s[^<>]*\bid="([^<>"]+)"[^<>]*>(.*?)</h\1>' |
flat = [] |
- for match in re.finditer(r'<h(\d)\s[^<>]*\bid="([^<>"]+)"[^<>]*>(.*?)</h\1>', content, re.S): |
+ for match in re.finditer(toc_re, content, re.S): |
flat.append({ |
'level': int(match.group(1)), |
'anchor': jinja2.Markup(match.group(2)).unescape(), |
'title': jinja2.Markup(match.group(3)).unescape(), |
'subitems': [], |
}) |
structured = [] |