Index: cms/converters.py |
=================================================================== |
--- a/cms/converters.py |
+++ b/cms/converters.py |
@@ -10,17 +10,24 @@ |
# Adblock Plus is distributed in the hope that it will be useful, |
# but WITHOUT ANY WARRANTY; without even the implied warranty of |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
# GNU General Public License for more details. |
# |
# You should have received a copy of the GNU General Public License |
# along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. |
-import os, imp, re, jinja2, markdown |
+import os |
+import HTMLParser |
+import imp |
+import re |
+ |
+import jinja2 |
+import markdown |
+ |
# Monkey-patch Markdown's isBlockLevel function to ensure that no paragraphs are |
# inserted into the <head> tag |
orig_isBlockLevel = markdown.util.isBlockLevel |
def isBlockLevel(tag): |
if tag == "head": |
return True |
else: |
@@ -30,76 +37,121 @@ markdown.util.isBlockLevel = isBlockLeve |
html_escapes = { |
"<": "<", |
">": ">", |
"&": "&", |
"\"": """, |
"'": "'", |
} |
+class AttributeParser(HTMLParser.HTMLParser): |
Sebastian Noack
2015/03/12 20:33:46
That makes actually sense, using a proper parser i
Wladimir Palant
2015/03/12 20:57:02
I actually tested this - invalid HTML will be norm
|
+ _string = None |
+ _attrs = None |
+ |
+ def __init__(self, whitelist): |
+ self._whitelist = whitelist |
+ |
+ def parse(self, text, pagename): |
+ self.reset() |
+ self._string = "" |
Sebastian Noack
2015/03/12 20:33:46
I'd rather use a list here, joining it when done.
Wladimir Palant
2015/03/12 20:57:02
Done.
|
+ self._attrs = {} |
+ self._pagename = pagename |
+ |
+ try: |
+ self.feed(text) |
+ return self._string, self._attrs |
+ finally: |
+ self._string = None |
+ self._attrs = None |
+ self._pagename = None |
+ |
+ def handle_starttag(self, tag, attrs): |
+ if tag not in self._whitelist: |
+ raise Exception("Unexpected HTML tag '%s' in localizable string on page %s" % (tag, self._pagename)) |
+ self._attrs.setdefault(tag, []).append(attrs) |
+ self._string += "<%s>" % tag |
+ |
+ def handle_endtag(self, tag): |
+ self._string += "</%s>" % tag |
+ |
+ def handle_data(self, data): |
+ # Note: lack of escaping here is intentional. The result is a locale string, |
+ # HTML escaping is applied when this string is inserted into the document. |
+ self._string += data |
+ |
+ def handle_entityref(self, name): |
+ self._string += self.unescape("&%s;" % name) |
+ |
+ def handle_charref(self, name): |
+ self._string += self.unescape("&#%s;" % name) |
+ |
class Converter: |
+ whitelist = set(["a", "em", "strong"]) |
+ |
def __init__(self, params, key="pagedata"): |
self._params = params |
self._key = key |
+ self._attribute_parser = AttributeParser(self.whitelist) |
# Read in any parameters specified at the beginning of the file |
lines = params[key].splitlines(True) |
while lines and re.search(r"^\s*[\w\-]+\s*=", lines[0]): |
name, value = lines.pop(0).split("=", 1) |
params[name.strip()] = value.strip() |
params[key] = "".join(lines) |
- def localize_string(self, name, localedata, escapes, links=[]): |
+ def localize_string(self, name, default, localedata, escapes): |
def escape(s): |
return re.sub(r".", |
lambda match: escapes.get(match.group(0), match.group(0)), |
s, flags=re.S) |
def re_escape(s): |
return re.escape(escape(s)) |
- try: |
+ # Extract tag attributes from default string |
+ default, saved_attributes = self._attribute_parser.parse(default, self._params["page"]) |
+ |
+ # Get translation |
+ if self._params["locale"] != self._params["defaultlocale"] and name in localedata: |
result = localedata[name].strip() |
- except KeyError: |
- raise Exception("Lookup failed for string %s used on page %s" % (name, self._params["page"])) |
+ else: |
+ result = default |
- # Insert links |
+ # Insert attributes |
result = escape(result) |
- while links: |
+ for tag in self.whitelist: |
+ saved = saved_attributes.get(tag, []) |
+ for attrs in saved: |
+ attrs = map(lambda (name, value): '%s="%s"' % (escape(name), escape(value)), attrs) |
+ result = re.sub( |
+ r"%s([^<>]*?)%s" % (re_escape("<%s>" % tag), re_escape("</%s>" % tag)), |
+ r'<%s %s>\1</%s>' % (tag, " ".join(attrs), tag), |
+ result, 1, flags=re.S |
+ ) |
result = re.sub( |
- r"%s([^<>]*?)%s" % (re_escape("<a>"), re_escape("</a>")), |
- r'<a href="%s">\1</a>' % links.pop(0), |
- result, 1, flags=re.S |
+ r"%s([^<>]*?)%s" % (re_escape("<%s>" % tag), re_escape("</%s>" % tag)), |
+ r"<%s>\1</%s>" % (tag, tag), |
+ result, flags=re.S |
) |
- |
- # <strong> and <em> tags are allowed |
- result = re.sub( |
- r"%s([^<>]*?)%s" % (re_escape("<strong>"), re_escape("</strong>")), |
- r"<strong>\1</strong>", |
- result, flags=re.S |
- ) |
- result = re.sub( |
- r"%s([^<>]*?)%s" % (re_escape("<em>"), re_escape("</em>")), |
- r"<em>\1</em>", |
- result, flags=re.S |
- ) |
return result |
- def insert_localized_strings(self, text, escapes): |
+ def insert_localized_strings(self, text, escapes, to_html=lambda s: s): |
def lookup_string(match): |
- name, links = match.groups() |
- if links: |
- links = map(unicode.strip, links.strip("()").split(",")) |
- else: |
- links = [] |
- return self.localize_string(name, self._params["localedata"], escapes, links) |
+ name, comment, default = match.groups() |
+ default = to_html(default).strip() |
+ |
+ # Note: We currently ignore the comment, it is only relevant when |
+ # generating the master translation. |
+ return self.localize_string(name, default, self._params["localedata"], escapes) |
return re.sub( |
- r"\$([\w\-]+)(\([^()$]+\))?\$", |
+ r"\{\{\s*([\w\-]+)(?:\[(.*?)\])?\s+(.*?)\}\}", |
lookup_string, |
- text |
+ text, |
+ flags=re.S |
) |
def process_links(self, text): |
def process_link(match): |
pre, attr, url, post = match.groups() |
url = jinja2.Markup(url).unescape() |
locale, new_url = self._params["source"].resolve_link(url, self._params["locale"]) |
@@ -121,17 +173,17 @@ class Converter: |
def resolve_include(match): |
global converters |
name = match.group(1) |
for format, converter_class in converters.iteritems(): |
if self._params["source"].has_include(name, format): |
self._params["includedata"] = self._params["source"].read_include(name, format) |
converter = converter_class(self._params, key="includedata") |
return converter() |
- raise Exception("Failed to resolve include %s in page %s" % (name, self._params["page"])) |
+ raise Exception("Failed to resolve include %s on page %s" % (name, self._params["page"])) |
return re.sub( |
r'%s\?\s*include\s+([^\s<>"]+)\s*\?%s' % ( |
self.include_start_regex, |
self.include_end_regex |
), |
resolve_include, |
text |
@@ -178,17 +230,20 @@ class MarkdownConverter(Converter): |
for char in markdown.Markdown.ESCAPED_CHARS: |
escapes[char] = "&#" + str(ord(char)) + ";" |
for key, value in html_escapes.iteritems(): |
escapes[key] = value |
md = markdown.Markdown(output="html5", extensions=["attr_list"]) |
md.preprocessors["html_block"].markdown_in_raw = True |
- result = self.insert_localized_strings(source, escapes) |
+ def to_html(s): |
+ return re.sub(r'</?p>', '', md.convert(s)) |
+ |
+ result = self.insert_localized_strings(source, escapes, to_html) |
result = md.convert(result) |
result = re.sub(r"&#(\d+);", remove_unnecessary_entities, result) |
result = self.process_links(result) |
return result |
class TemplateConverter(Converter): |
class _SourceLoader(jinja2.BaseLoader): |
def __init__(self, source): |
@@ -204,16 +259,20 @@ class TemplateConverter(Converter): |
Converter.__init__(self, *args, **kwargs) |
filters = { |
"translate": self.translate, |
"linkify": self.linkify, |
"toclist": self.toclist, |
} |
+ globals = { |
+ "get_string": self.get_string, |
+ } |
+ |
for filename in self._params["source"].list_files("filters"): |
root, ext = os.path.splitext(filename) |
if ext.lower() != ".py": |
continue |
path = "%s/%s" % ("filters", filename) |
code = self._params["source"].read_file(path) |
module = imp.new_module(root.replace("/", ".")) |
@@ -222,30 +281,35 @@ class TemplateConverter(Converter): |
func = os.path.basename(root) |
if not hasattr(module, func): |
raise Exception("Expected function %s not found in filter file %s" % (func, filename)) |
filters[func] = getattr(module, func) |
filters[func].module_ref = module # Prevent garbage collection |
self._env = jinja2.Environment(loader=self._SourceLoader(self._params["source"]), autoescape=True) |
self._env.filters.update(filters) |
+ self._env.globals.update(globals) |
def get_html(self, source): |
template = self._env.from_string(source) |
return template.render(self._params) |
- def translate(self, name, page=None, links=[]): |
- if page == None: |
- localedata = self._params["localedata"] |
- else: |
- localedata = self._params["source"].read_locale(self._params["locale"], page) |
- return jinja2.Markup(self.localize_string(name, localedata, html_escapes, links=links)) |
+ def translate(self, default, name, comment=None): |
+ # Note: We currently ignore the comment, it is only relevant when |
+ # generating the master translation. |
+ localedata = self._params["localedata"] |
+ return jinja2.Markup(self.localize_string(name, default, localedata, html_escapes)) |
+ |
+ def get_string(self, name, page): |
+ localedata = self._params["source"].read_locale(self._params["locale"], page) |
+ default = localedata[name] |
+ return jinja2.Markup(self.localize_string(name, default, localedata, html_escapes)) |
def linkify(self, page, locale=None, **attrs): |
- if locale == None: |
+ if locale is None: |
locale = self._params["locale"] |
locale, url = self._params["source"].resolve_link(page, locale) |
return jinja2.Markup('<a%s>' % ''.join( |
' %s="%s"' % (name, jinja2.escape(value)) for name, value in [ |
('href', url), |
('hreflang', locale) |
] + attrs.items() |