Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Unified Diff: cms/converters.py

Issue 5694103719247872: Issue 2133 - Allow to specify default translation inline in pages rather than in a separate file (Closed)
Patch Set: Created March 12, 2015, 7:34 p.m.
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: cms/converters.py
===================================================================
--- a/cms/converters.py
+++ b/cms/converters.py
@@ -10,17 +10,24 @@
# Adblock Plus is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.
-import os, imp, re, jinja2, markdown
+import os
+import HTMLParser
+import imp
+import re
+
+import jinja2
+import markdown
+
# Monkey-patch Markdown's isBlockLevel function to ensure that no paragraphs are
# inserted into the <head> tag
orig_isBlockLevel = markdown.util.isBlockLevel
def isBlockLevel(tag):
if tag == "head":
return True
else:
@@ -30,76 +37,121 @@ markdown.util.isBlockLevel = isBlockLeve
html_escapes = {
"<": "&lt;",
">": "&gt;",
"&": "&amp;",
"\"": "&quot;",
"'": "&#39;",
}
+class AttributeParser(HTMLParser.HTMLParser):
Sebastian Noack 2015/03/12 20:33:46 That makes actually sense, using a proper parser i
Wladimir Palant 2015/03/12 20:57:02 I actually tested this - invalid HTML will be norm
+ _string = None
+ _attrs = None
+
+ def __init__(self, whitelist):
+ self._whitelist = whitelist
+
+ def parse(self, text, pagename):
+ self.reset()
+ self._string = ""
Sebastian Noack 2015/03/12 20:33:46 I'd rather use a list here, joining it when done.
Wladimir Palant 2015/03/12 20:57:02 Done.
+ self._attrs = {}
+ self._pagename = pagename
+
+ try:
+ self.feed(text)
+ return self._string, self._attrs
+ finally:
+ self._string = None
+ self._attrs = None
+ self._pagename = None
+
+ def handle_starttag(self, tag, attrs):
+ if tag not in self._whitelist:
+ raise Exception("Unexpected HTML tag '%s' in localizable string on page %s" % (tag, self._pagename))
+ self._attrs.setdefault(tag, []).append(attrs)
+ self._string += "<%s>" % tag
+
+ def handle_endtag(self, tag):
+ self._string += "</%s>" % tag
+
+ def handle_data(self, data):
+ # Note: lack of escaping here is intentional. The result is a locale string,
+ # HTML escaping is applied when this string is inserted into the document.
+ self._string += data
+
+ def handle_entityref(self, name):
+ self._string += self.unescape("&%s;" % name)
+
+ def handle_charref(self, name):
+ self._string += self.unescape("&#%s;" % name)
+
class Converter:
+ whitelist = set(["a", "em", "strong"])
+
def __init__(self, params, key="pagedata"):
self._params = params
self._key = key
+ self._attribute_parser = AttributeParser(self.whitelist)
# Read in any parameters specified at the beginning of the file
lines = params[key].splitlines(True)
while lines and re.search(r"^\s*[\w\-]+\s*=", lines[0]):
name, value = lines.pop(0).split("=", 1)
params[name.strip()] = value.strip()
params[key] = "".join(lines)
- def localize_string(self, name, localedata, escapes, links=[]):
+ def localize_string(self, name, default, localedata, escapes):
def escape(s):
return re.sub(r".",
lambda match: escapes.get(match.group(0), match.group(0)),
s, flags=re.S)
def re_escape(s):
return re.escape(escape(s))
- try:
+ # Extract tag attributes from default string
+ default, saved_attributes = self._attribute_parser.parse(default, self._params["page"])
+
+ # Get translation
+ if self._params["locale"] != self._params["defaultlocale"] and name in localedata:
result = localedata[name].strip()
- except KeyError:
- raise Exception("Lookup failed for string %s used on page %s" % (name, self._params["page"]))
+ else:
+ result = default
- # Insert links
+ # Insert attributes
result = escape(result)
- while links:
+ for tag in self.whitelist:
+ saved = saved_attributes.get(tag, [])
+ for attrs in saved:
+ attrs = map(lambda (name, value): '%s="%s"' % (escape(name), escape(value)), attrs)
+ result = re.sub(
+ r"%s([^<>]*?)%s" % (re_escape("<%s>" % tag), re_escape("</%s>" % tag)),
+ r'<%s %s>\1</%s>' % (tag, " ".join(attrs), tag),
+ result, 1, flags=re.S
+ )
result = re.sub(
- r"%s([^<>]*?)%s" % (re_escape("<a>"), re_escape("</a>")),
- r'<a href="%s">\1</a>' % links.pop(0),
- result, 1, flags=re.S
+ r"%s([^<>]*?)%s" % (re_escape("<%s>" % tag), re_escape("</%s>" % tag)),
+ r"<%s>\1</%s>" % (tag, tag),
+ result, flags=re.S
)
-
- # <strong> and <em> tags are allowed
- result = re.sub(
- r"%s([^<>]*?)%s" % (re_escape("<strong>"), re_escape("</strong>")),
- r"<strong>\1</strong>",
- result, flags=re.S
- )
- result = re.sub(
- r"%s([^<>]*?)%s" % (re_escape("<em>"), re_escape("</em>")),
- r"<em>\1</em>",
- result, flags=re.S
- )
return result
- def insert_localized_strings(self, text, escapes):
+ def insert_localized_strings(self, text, escapes, to_html=lambda s: s):
def lookup_string(match):
- name, links = match.groups()
- if links:
- links = map(unicode.strip, links.strip("()").split(","))
- else:
- links = []
- return self.localize_string(name, self._params["localedata"], escapes, links)
+ name, comment, default = match.groups()
+ default = to_html(default).strip()
+
+ # Note: We currently ignore the comment, it is only relevant when
+ # generating the master translation.
+ return self.localize_string(name, default, self._params["localedata"], escapes)
return re.sub(
- r"\$([\w\-]+)(\([^()$]+\))?\$",
+ r"\{\{\s*([\w\-]+)(?:\[(.*?)\])?\s+(.*?)\}\}",
lookup_string,
- text
+ text,
+ flags=re.S
)
def process_links(self, text):
def process_link(match):
pre, attr, url, post = match.groups()
url = jinja2.Markup(url).unescape()
locale, new_url = self._params["source"].resolve_link(url, self._params["locale"])
@@ -121,17 +173,17 @@ class Converter:
def resolve_include(match):
global converters
name = match.group(1)
for format, converter_class in converters.iteritems():
if self._params["source"].has_include(name, format):
self._params["includedata"] = self._params["source"].read_include(name, format)
converter = converter_class(self._params, key="includedata")
return converter()
- raise Exception("Failed to resolve include %s in page %s" % (name, self._params["page"]))
+ raise Exception("Failed to resolve include %s on page %s" % (name, self._params["page"]))
return re.sub(
r'%s\?\s*include\s+([^\s<>"]+)\s*\?%s' % (
self.include_start_regex,
self.include_end_regex
),
resolve_include,
text
@@ -178,17 +230,20 @@ class MarkdownConverter(Converter):
for char in markdown.Markdown.ESCAPED_CHARS:
escapes[char] = "&#" + str(ord(char)) + ";"
for key, value in html_escapes.iteritems():
escapes[key] = value
md = markdown.Markdown(output="html5", extensions=["attr_list"])
md.preprocessors["html_block"].markdown_in_raw = True
- result = self.insert_localized_strings(source, escapes)
+ def to_html(s):
+ return re.sub(r'</?p>', '', md.convert(s))
+
+ result = self.insert_localized_strings(source, escapes, to_html)
result = md.convert(result)
result = re.sub(r"&#(\d+);", remove_unnecessary_entities, result)
result = self.process_links(result)
return result
class TemplateConverter(Converter):
class _SourceLoader(jinja2.BaseLoader):
def __init__(self, source):
@@ -204,16 +259,20 @@ class TemplateConverter(Converter):
Converter.__init__(self, *args, **kwargs)
filters = {
"translate": self.translate,
"linkify": self.linkify,
"toclist": self.toclist,
}
+ globals = {
+ "get_string": self.get_string,
+ }
+
for filename in self._params["source"].list_files("filters"):
root, ext = os.path.splitext(filename)
if ext.lower() != ".py":
continue
path = "%s/%s" % ("filters", filename)
code = self._params["source"].read_file(path)
module = imp.new_module(root.replace("/", "."))
@@ -222,30 +281,35 @@ class TemplateConverter(Converter):
func = os.path.basename(root)
if not hasattr(module, func):
raise Exception("Expected function %s not found in filter file %s" % (func, filename))
filters[func] = getattr(module, func)
filters[func].module_ref = module # Prevent garbage collection
self._env = jinja2.Environment(loader=self._SourceLoader(self._params["source"]), autoescape=True)
self._env.filters.update(filters)
+ self._env.globals.update(globals)
def get_html(self, source):
template = self._env.from_string(source)
return template.render(self._params)
- def translate(self, name, page=None, links=[]):
- if page == None:
- localedata = self._params["localedata"]
- else:
- localedata = self._params["source"].read_locale(self._params["locale"], page)
- return jinja2.Markup(self.localize_string(name, localedata, html_escapes, links=links))
+ def translate(self, default, name, comment=None):
+ # Note: We currently ignore the comment, it is only relevant when
+ # generating the master translation.
+ localedata = self._params["localedata"]
+ return jinja2.Markup(self.localize_string(name, default, localedata, html_escapes))
+
+ def get_string(self, name, page):
+ localedata = self._params["source"].read_locale(self._params["locale"], page)
+ default = localedata[name]
+ return jinja2.Markup(self.localize_string(name, default, localedata, html_escapes))
def linkify(self, page, locale=None, **attrs):
- if locale == None:
+ if locale is None:
locale = self._params["locale"]
locale, url = self._params["source"].resolve_link(page, locale)
return jinja2.Markup('<a%s>' % ''.join(
' %s="%s"' % (name, jinja2.escape(value)) for name, value in [
('href', url),
('hreflang', locale)
] + attrs.items()
« no previous file with comments | « cms/bin/generate_static_pages.py ('k') | cms/sources.py » ('j') | cms/sources.py » ('J')

Powered by Google App Engine
This is Rietveld