cms/converters.py - Issue 5694103719247872: Issue 2133 - Allow to specify default translation inline in pages rather than in a separate file

Unified Diff: cms/converters.py

Issue 5694103719247872: Issue 2133 - Allow to specify default translation inline in pages rather than in a separate file (Closed)

Patch Set: Created March 12, 2015, 7:34 p.m.

Use n/p to move between diff chunks; N/P to move between comments.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: cms/converters.py

===================================================================

--- a/cms/converters.py

+++ b/cms/converters.py

@@ -10,17 +10,24 @@

# Adblock Plus is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License

# along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.

-import os, imp, re, jinja2, markdown

+import os

+import HTMLParser

+import imp

+import re

+import jinja2

+import markdown

# Monkey-patch Markdown's isBlockLevel function to ensure that no paragraphs are

# inserted into the <head> tag

orig_isBlockLevel = markdown.util.isBlockLevel

def isBlockLevel(tag):

if tag == "head":

return True

else:

@@ -30,76 +37,121 @@ markdown.util.isBlockLevel = isBlockLeve

html_escapes = {

"<": "<",

">": ">",

"&": "&",

"\"": """,

"'": "'",

}

+class AttributeParser(HTMLParser.HTMLParser):

Sebastian Noack 2015/03/12 20:33:46 That makes actually sense, using a proper parser i

Wladimir Palant 2015/03/12 20:57:02 I actually tested this - invalid HTML will be norm

+ _string = None

+ _attrs = None

+ def __init__(self, whitelist):

+ self._whitelist = whitelist

+ def parse(self, text, pagename):

+ self.reset()

+ self._string = ""

Sebastian Noack 2015/03/12 20:33:46 I'd rather use a list here, joining it when done.

Wladimir Palant 2015/03/12 20:57:02 Done.

+ self._attrs = {}

+ self._pagename = pagename

+ try:

+ self.feed(text)

+ return self._string, self._attrs

+ finally:

+ self._string = None

+ self._attrs = None

+ self._pagename = None

+ def handle_starttag(self, tag, attrs):

+ if tag not in self._whitelist:

+ raise Exception("Unexpected HTML tag '%s' in localizable string on page %s" % (tag, self._pagename))

+ self._attrs.setdefault(tag, []).append(attrs)

+ self._string += "<%s>" % tag

+ def handle_endtag(self, tag):

+ self._string += "</%s>" % tag

+ def handle_data(self, data):

+ # Note: lack of escaping here is intentional. The result is a locale string,

+ # HTML escaping is applied when this string is inserted into the document.

+ self._string += data

+ def handle_entityref(self, name):

+ self._string += self.unescape("&%s;" % name)

+ def handle_charref(self, name):

+ self._string += self.unescape("&#%s;" % name)

class Converter:

+ whitelist = set(["a", "em", "strong"])

def __init__(self, params, key="pagedata"):

self._params = params

self._key = key

+ self._attribute_parser = AttributeParser(self.whitelist)

# Read in any parameters specified at the beginning of the file

lines = params[key].splitlines(True)

while lines and re.search(r"^\s*[\w\-]+\s*=", lines[0]):

name, value = lines.pop(0).split("=", 1)

params[name.strip()] = value.strip()

params[key] = "".join(lines)

- def localize_string(self, name, localedata, escapes, links=[]):

+ def localize_string(self, name, default, localedata, escapes):

def escape(s):

return re.sub(r".",

lambda match: escapes.get(match.group(0), match.group(0)),

s, flags=re.S)

def re_escape(s):

return re.escape(escape(s))

- try:

+ # Extract tag attributes from default string

+ default, saved_attributes = self._attribute_parser.parse(default, self._params["page"])

+ # Get translation

+ if self._params["locale"] != self._params["defaultlocale"] and name in localedata:

result = localedata[name].strip()

- except KeyError:

- raise Exception("Lookup failed for string %s used on page %s" % (name, self._params["page"]))

+ else:

+ result = default

- # Insert links

+ # Insert attributes

result = escape(result)

- while links:

+ for tag in self.whitelist:

+ saved = saved_attributes.get(tag, [])

+ for attrs in saved:

+ attrs = map(lambda (name, value): '%s="%s"' % (escape(name), escape(value)), attrs)

+ result = re.sub(

+ r"%s([^<>]*?)%s" % (re_escape("<%s>" % tag), re_escape("</%s>" % tag)),

+ r'<%s %s>\1</%s>' % (tag, " ".join(attrs), tag),

+ result, 1, flags=re.S

+ )

result = re.sub(

- r"%s([^<>]*?)%s" % (re_escape("<a>"), re_escape("</a>")),

- r'<a href="%s">\1</a>' % links.pop(0),

- result, 1, flags=re.S

+ r"%s([^<>]*?)%s" % (re_escape("<%s>" % tag), re_escape("</%s>" % tag)),

+ r"<%s>\1</%s>" % (tag, tag),

+ result, flags=re.S

)

- # and tags are allowed

- result = re.sub(

- r"%s([^<>]*?)%s" % (re_escape(""), re_escape("")),

- r"\1",

- result, flags=re.S

- )

- result = re.sub(

- r"%s([^<>]*?)%s" % (re_escape(""), re_escape("")),

- r"\1",

- result, flags=re.S

- )

return result

- def insert_localized_strings(self, text, escapes):

+ def insert_localized_strings(self, text, escapes, to_html=lambda s: s):

def lookup_string(match):

- name, links = match.groups()

- if links:

- links = map(unicode.strip, links.strip("()").split(","))

- else:

- links = []

- return self.localize_string(name, self._params["localedata"], escapes, links)

+ name, comment, default = match.groups()

+ default = to_html(default).strip()

+ # Note: We currently ignore the comment, it is only relevant when

+ # generating the master translation.

+ return self.localize_string(name, default, self._params["localedata"], escapes)

return re.sub(

- r"\$([\w\-]+)($[^()$]+$)?\$",

+ r"\{\{\s*([\w\-]+)(?:\[(.*?)\])?\s+(.*?)\}\}",

lookup_string,

- text

+ text,

+ flags=re.S

)

def process_links(self, text):

def process_link(match):

pre, attr, url, post = match.groups()

url = jinja2.Markup(url).unescape()

locale, new_url = self._params["source"].resolve_link(url, self._params["locale"])

@@ -121,17 +173,17 @@ class Converter:

def resolve_include(match):

global converters

name = match.group(1)

for format, converter_class in converters.iteritems():

if self._params["source"].has_include(name, format):

self._params["includedata"] = self._params["source"].read_include(name, format)

converter = converter_class(self._params, key="includedata")

return converter()

- raise Exception("Failed to resolve include %s in page %s" % (name, self._params["page"]))

+ raise Exception("Failed to resolve include %s on page %s" % (name, self._params["page"]))

return re.sub(

r'%s\?\s*include\s+([^\s<>"]+)\s*\?%s' % (

self.include_start_regex,

self.include_end_regex

resolve_include,

text

@@ -178,17 +230,20 @@ class MarkdownConverter(Converter):

for char in markdown.Markdown.ESCAPED_CHARS:

escapes[char] = "&#" + str(ord(char)) + ";"

for key, value in html_escapes.iteritems():

escapes[key] = value

md = markdown.Markdown(output="html5", extensions=["attr_list"])

md.preprocessors["html_block"].markdown_in_raw = True

- result = self.insert_localized_strings(source, escapes)

+ def to_html(s):

+ return re.sub(r'</?p>', '', md.convert(s))

+ result = self.insert_localized_strings(source, escapes, to_html)

result = md.convert(result)

result = re.sub(r"&#(\d+);", remove_unnecessary_entities, result)

result = self.process_links(result)

return result

class TemplateConverter(Converter):

class _SourceLoader(jinja2.BaseLoader):

def __init__(self, source):

@@ -204,16 +259,20 @@ class TemplateConverter(Converter):

Converter.__init__(self, *args, **kwargs)

filters = {

"translate": self.translate,

"linkify": self.linkify,

"toclist": self.toclist,

}

+ globals = {

+ "get_string": self.get_string,

+ }

for filename in self._params["source"].list_files("filters"):

root, ext = os.path.splitext(filename)

if ext.lower() != ".py":

continue

path = "%s/%s" % ("filters", filename)

code = self._params["source"].read_file(path)

module = imp.new_module(root.replace("/", "."))

@@ -222,30 +281,35 @@ class TemplateConverter(Converter):

func = os.path.basename(root)

if not hasattr(module, func):

raise Exception("Expected function %s not found in filter file %s" % (func, filename))

filters[func] = getattr(module, func)

filters[func].module_ref = module # Prevent garbage collection

self._env = jinja2.Environment(loader=self._SourceLoader(self._params["source"]), autoescape=True)

self._env.filters.update(filters)

+ self._env.globals.update(globals)

def get_html(self, source):

template = self._env.from_string(source)

return template.render(self._params)

- def translate(self, name, page=None, links=[]):

- if page == None:

- localedata = self._params["localedata"]

- else:

- localedata = self._params["source"].read_locale(self._params["locale"], page)

- return jinja2.Markup(self.localize_string(name, localedata, html_escapes, links=links))

+ def translate(self, default, name, comment=None):

+ # Note: We currently ignore the comment, it is only relevant when

+ # generating the master translation.

+ localedata = self._params["localedata"]

+ return jinja2.Markup(self.localize_string(name, default, localedata, html_escapes))

+ def get_string(self, name, page):

+ localedata = self._params["source"].read_locale(self._params["locale"], page)

+ default = localedata[name]

+ return jinja2.Markup(self.localize_string(name, default, localedata, html_escapes))

def linkify(self, page, locale=None, **attrs):

- if locale == None:

+ if locale is None:

locale = self._params["locale"]

locale, url = self._params["source"].resolve_link(page, locale)

return jinja2.Markup('<a%s>' % ''.join(

' %s="%s"' % (name, jinja2.escape(value)) for name, value in [

('href', url),

('hreflang', locale)

] + attrs.items()

« no previous file with comments | « cms/bin/generate_static_pages.py ('k') | cms/sources.py » ('j') | cms/sources.py » ('J')