Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Unified Diff: cms/converters.py

Issue 29378736: NoIssue - Improves converters.py PEP8 compliance (Closed) Base URL: https://hg.adblockplus.org/cms
Patch Set: Created March 8, 2017, 6:06 p.m.
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « no previous file | tox.ini » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: cms/converters.py
===================================================================
--- a/cms/converters.py
+++ b/cms/converters.py
@@ -16,26 +16,26 @@
import os
import HTMLParser
import re
import jinja2
import markdown
-# Monkey-patch Markdown's isBlockLevel function to ensure that no paragraphs are
-# inserted into the <head> tag
+# Monkey-patch Markdown's isBlockLevel function to ensure that no paragraphs
+# are inserted into the <head> tag
orig_isBlockLevel = markdown.util.isBlockLevel
def isBlockLevel(tag):
if tag == 'head':
return True
- else:
- return orig_isBlockLevel(tag)
+ return orig_isBlockLevel(tag)
+
markdown.util.isBlockLevel = isBlockLevel
html_escapes = {
'<': '&lt;',
'>': '&gt;',
'&': '&amp;',
'"': '&quot;',
"'": '&#39;',
@@ -60,59 +60,63 @@
self._pagename = pagename
# Force-escape ampersands, otherwise the parser will autocomplete bogus
# entities.
text = re.sub(r'&(?!\S+;)', '&amp;', text)
try:
self.feed(text)
- return ''.join(self._string), self._attrs, [''.join(s) for s in self._fixed_strings]
+ return (''.join(self._string),
+ self._attrs, [''.join(s) for s in self._fixed_strings])
finally:
self._string = None
self._attrs = None
self._pagename = None
self._inside_fixed = False
self._fixed_strings = None
def handle_starttag(self, tag, attrs):
if self._inside_fixed:
- raise Exception("Unexpected HTML tag '%s' inside a fixed string on page %s" % (tag, self._pagename))
- elif tag == 'fix':
+ raise Exception("Unexpected HTML tag '{}' inside a fixed string"
+ 'on page {}'.format(tag, self._pagename))
+ if tag == 'fix':
self._inside_fixed = True
self._fixed_strings.append([])
- elif tag in self._whitelist:
+ if tag in self._whitelist:
self._attrs.setdefault(tag, []).append(attrs)
- self._string.append('<%s>' % tag)
+ self._string.append('<{}>'.format(tag))
else:
- raise Exception("Unexpected HTML tag '%s' in localizable string on page %s" % (tag, self._pagename))
+ raise Exception("Unexpected HTML tag '{}' inside a fixed string"
+ 'on page {}'.format(tag, self._pagename))
def handle_endtag(self, tag):
if tag == 'fix':
- self._string.append('{%d}' % len(self._fixed_strings))
+ self._string.append('{{{}}}'.format(self._fixed_strings))
self._inside_fixed = False
else:
- self._string.append('</%s>' % tag)
+ self._string.append('</{}>'.format(tag))
def _append_text(self, s):
if self._inside_fixed:
self._fixed_strings[-1].append(s)
else:
self._string.append(s)
def handle_data(self, data):
- # Note: lack of escaping here is intentional. The result is a locale string,
- # HTML escaping is applied when this string is inserted into the document.
+ # Note: lack of escaping here is intentional. The result is a locale
+ # string, HTML escaping is applied when this string is inserted into
+ # the document.
self._append_text(data)
def handle_entityref(self, name):
- self._append_text(self.unescape('&%s;' % name))
+ self._append_text(self.unescape('&{};'.format(name)))
def handle_charref(self, name):
- self._append_text(self.unescape('&#%s;' % name))
+ self._append_text(self.unescape('&#{};'.format(name)))
class Converter:
whitelist = {'a', 'em', 'sup', 'strong', 'code', 'span'}
missing_translations = 0
total_translations = 0
def __init__(self, params, key='pagedata'):
@@ -127,101 +131,109 @@
for i, line in enumerate(lines):
if not re.search(r'^\s*[\w\-]+\s*=', line):
break
name, value = line.split('=', 1)
params[name.strip()] = value.strip()
lines[i] = '\n'
params[key] = (''.join(lines), filename)
- def localize_string(self, page, name, default, comment, localedata, escapes):
+ def localize_string(
+ self, page, name, default, comment, localedata, escapes):
+
def escape(s):
return re.sub(r'.',
- lambda match: escapes.get(match.group(0), match.group(0)),
+ lambda match: escapes.get(match.group(0),
+ match.group(0)),
s, flags=re.S)
def re_escape(s):
return re.escape(escape(s))
# Handle duplicated strings
if default:
self._seen_defaults[(page, name)] = (default, comment)
else:
try:
default, comment = self._seen_defaults[(page, name)]
except KeyError:
- raise Exception('Text not yet defined for string %s on page %s' %
- (name, page))
+ raise Exception('Text not yet defined for string {} on page'
+ '{}'.format(name, page))
# Extract tag attributes from default string
- default, saved_attributes, fixed_strings = self._attribute_parser.parse(default, self._params['page'])
+ default, saved_attributes, fixed_strings = (
+ self._attribute_parser.parse(default, self._params['page']))
# Get translation
locale = self._params['locale']
if locale == self._params['defaultlocale']:
result = default
elif name in localedata:
result = localedata[name].strip()
else:
result = default
self.missing_translations += 1
self.total_translations += 1
- # Perform callback with the string if required, e.g. for the translations script
+ # Perform callback with the string if required, e.g. for the
+ # translations script
callback = self._params['localized_string_callback']
if callback:
callback(page, locale, name, result, comment, fixed_strings)
# Insert fixed strings
for i, fixed_string in enumerate(fixed_strings, 1):
- result = result.replace('{%d}' % i, fixed_string)
+ result = result.replace('{{{%d}}}'.format(i), fixed_string)
# Insert attributes
result = escape(result)
def stringify_attribute((name, value)):
- return '%s="%s"' % (
+ return '{}="{}"'.format(
escape(name),
escape(self.insert_localized_strings(value, {}))
)
for tag in self.whitelist:
- allowed_contents = '(?:[^<>]|%s)' % '|'.join((
- '<(?:%s[^<>]*?|/%s)>' % (t, t)
+ allowed_contents = '(?:[^<>]|{})'.format('|').join((
+ '<(?:{}[^<>]*?|/{})>'.format(t, t)
for t in map(re.escape, self.whitelist - {tag})
))
saved = saved_attributes.get(tag, [])
for attrs in saved:
attrs = map(stringify_attribute, attrs)
result = re.sub(
- r'%s(%s*?)%s' % (re_escape('<%s>' % tag), allowed_contents,
- re_escape('</%s>' % tag)),
- lambda match: r'<%s%s>%s</%s>' % (
+ r'{}({}*?){}'.format(re_escape('<{}>'.format(tag)),
+ allowed_contents,
+ re_escape('</{}>'.format(tag))),
+ lambda match: r'<{}{}>{}</{}>'.format(
tag,
' ' + ' '.join(attrs) if attrs else '',
match.group(1),
tag
),
result, 1, flags=re.S
)
result = re.sub(
- r'%s(%s*?)%s' % (re_escape('<%s>' % tag), allowed_contents,
- re_escape('</%s>' % tag)),
- r'<%s>\1</%s>' % (tag, tag),
+ r'{}({}*?){}'.format(re_escape('<{}>'.format(tag)),
+ allowed_contents,
+ re_escape('</{}>'.format(tag))),
+ r'<{}>\1</{}>'.format(tag, tag),
result, flags=re.S
)
return result
def insert_localized_strings(self, text, escapes, to_html=lambda s: s):
def lookup_string(match):
name, comment, default = match.groups()
if default:
default = to_html(default).strip()
return self.localize_string(self._params['page'], name, default,
- comment, self._params['localedata'], escapes)
+ comment, self._params['localedata'],
+ escapes)
return re.sub(
r'{{\s*'
r'([\w\-]+)' # String ID
r'(?:(?:\[(.*?)\])?' # Optional comment
r'\s+'
r'((?:(?!{{).|' # Translatable text
r'{{(?:(?!}}).)*}}' # Nested translation
@@ -233,98 +245,106 @@
flags=re.S
)
def process_links(self, text):
def process_link(match):
pre, attr, url, post = match.groups()
url = jinja2.Markup(url).unescape()
- locale, new_url = self._params['source'].resolve_link(url, self._params['locale'])
- if new_url != None:
+ locale, new_url = (
+ self._params['source']
+ .resolve_link(url, self._params['locale']))
+
+ if new_url is not None:
url = new_url
if attr == 'href':
- post += ' hreflang="%s"' % jinja2.Markup.escape(locale)
+ post += ' hreflang="{}"'\
+ .format(jinja2.Markup.escape(locale))
return ''.join((pre, jinja2.Markup.escape(url), post))
- text = re.sub(r'(<a\s[^<>]*\b(href)=\")([^<>\"]+)(\")', process_link, text)
- text = re.sub(r'(<img\s[^<>]*\b(src)=\")([^<>\"]+)(\")', process_link, text)
+ text = re.sub(r'(<a\s[^<>]*\b(href)=\")([^<>\"]+)(\")',
+ process_link, text)
+ text = re.sub(r'(<img\s[^<>]*\b(src)=\")([^<>\"]+)(\")',
+ process_link, text)
return text
include_start_regex = '<'
include_end_regex = '>'
def resolve_includes(self, text):
def resolve_include(match):
- global converters
name = match.group(1)
- for format, converter_class in converters.iteritems():
- if self._params['source'].has_include(name, format):
- self._params['includedata'] = self._params['source'].read_include(name, format)
- converter = converter_class(self._params, key='includedata')
+ for format_, converter_class in converters.iteritems():
+ if self._params['source'].has_include(name, format_):
+ self._params['includedata'] = (
+ self._params['source'].read_include(name, format))
+
+ converter = converter_class(self._params, key='includedata'
Vasily Kuznetsov 2017/03/09 12:33:27 This looks kind of awkward with just the closing p
Jon Sonesen 2017/03/09 14:04:17 Yep, will do
+ )
result = converter()
self.missing_translations += converter.missing_translations
self.total_translations += converter.total_translations
return result
- raise Exception('Failed to resolve include %s on page %s' % (name, self._params['page']))
+ raise Exception('Failed to resolve include {}'
+ 'on page {}'.format(name, self._params['page']))
return re.sub(
- r'%s\?\s*include\s+([^\s<>"]+)\s*\?%s' % (
+ r'{}\?\s*include\s+([^\s<>"]+)\s*\?{}'.format(
self.include_start_regex,
self.include_end_regex
),
resolve_include,
text
)
def __call__(self):
result = self.get_html(*self._params[self._key])
result = self.resolve_includes(result)
if self._key == 'pagedata':
head = []
def add_to_head(match):
head.append(match.group(1))
return ''
- body = re.sub(r'<head>(.*?)</head>', add_to_head, result, flags=re.S)
+ body = re.sub(r'<head>(.*?)</head>', add_to_head, result,
+ flags=re.S)
return ''.join(head), body
- else:
- return result
+ return result
class RawConverter(Converter):
def get_html(self, source, filename):
result = self.insert_localized_strings(source, html_escapes)
result = self.process_links(result)
return result
class MarkdownConverter(Converter):
- include_start_regex = r'(?:%s|%s)' % (
+ include_start_regex = r'(?:{}|{})'.format(
Converter.include_start_regex,
re.escape(jinja2.escape(Converter.include_start_regex))
)
- include_end_regex = r'(?:%s|%s)' % (
+ include_end_regex = r'(?:{}|{})'.format(
Converter.include_end_regex,
re.escape(jinja2.escape(Converter.include_end_regex))
)
def get_html(self, source, filename):
def remove_unnecessary_entities(match):
char = unichr(int(match.group(1)))
if char in html_escapes:
return match.group(0)
- else:
- return char
+ return char
escapes = {}
md = markdown.Markdown(output='html5', extensions=['extra'])
for char in md.ESCAPED_CHARS:
- escapes[char] = '&#' + str(ord(char)) + ';'
+ escapes[char] = '&#{};'.format(str(ord(char)))
for key, value in html_escapes.iteritems():
escapes[key] = value
md.preprocessors['html_block'].markdown_in_raw = True
def to_html(s):
return re.sub(r'</?p>', '', md.convert(s))
@@ -357,32 +377,36 @@
'toclist': self.toclist,
}
globals = {
'get_string': self.get_string,
'get_page_content': self.get_page_content,
}
- for dirname, dictionary in [('filters', filters), ('globals', globals)]:
+ for dirname, dictionary in [('filters', filters),
+ ('globals', globals)]:
for filename in self._params['source'].list_files(dirname):
root, ext = os.path.splitext(filename)
if ext.lower() != '.py':
continue
- path = '%s/%s' % (dirname, filename)
+ path = os.path.join(dirname, filename)
namespace = self._params['source'].exec_file(path)
name = os.path.basename(root)
try:
dictionary[name] = namespace[name]
except KeyError:
- raise Exception('Expected symbol %r not found in %r' % (name, path))
+ raise Exception('Expected symbol {} not found'
+ 'in {}'.format(name, path))
- self._env = jinja2.Environment(loader=SourceTemplateLoader(self._params['source']), autoescape=True)
+ self._env = jinja2.Environment(
+ loader=SourceTemplateLoader(self._params['source']),
+ autoescape=True)
self._env.filters.update(filters)
self._env.globals.update(globals)
def get_html(self, source, filename):
env = self._env
code = env.compile(source, None, filename)
template = jinja2.Template.from_code(env, code, env.globals)
@@ -404,17 +428,18 @@
self._params['page'], name, default, comment,
self._params['localedata'], html_escapes
))
def get_string(self, name, page=None):
if page is None:
page = self._params['page']
- localedata = self._params['source'].read_locale(self._params['locale'], page)
+ localedata = self._params['source'].read_locale(self._params['locale'],
+ page)
default = localedata[name]
return jinja2.Markup(self.localize_string(
page, name, default, '', localedata, html_escapes
))
def get_page_content(self, page, locale=None):
from cms.utils import get_page_params
@@ -422,26 +447,27 @@
locale = self._params['locale']
return get_page_params(self._params['source'], locale, page)
def linkify(self, page, locale=None, **attrs):
if locale is None:
locale = self._params['locale']
locale, url = self._params['source'].resolve_link(page, locale)
- return jinja2.Markup('<a%s>' % ''.join(
- ' %s="%s"' % (name, jinja2.escape(value)) for name, value in [
+ return jinja2.Markup('<a{}>'.format(''.join(
+ ' {}="{}"'.format(name, jinja2.escape(value)) for name, value in [
('href', url),
('hreflang', locale)
] + attrs.items()
- ))
+ )))
def toclist(self, content):
+ toc_re = r'<h(\d)\s[^<>]*\bid="([^<>"]+)"[^<>]*>(.*?)</h\1>'
flat = []
- for match in re.finditer(r'<h(\d)\s[^<>]*\bid="([^<>"]+)"[^<>]*>(.*?)</h\1>', content, re.S):
+ for match in re.finditer(toc_re, content, re.S):
flat.append({
'level': int(match.group(1)),
'anchor': jinja2.Markup(match.group(2)).unescape(),
'title': jinja2.Markup(match.group(3)).unescape(),
'subitems': [],
})
structured = []
« no previous file with comments | « no previous file | tox.ini » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld