Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Side by Side Diff: cms/converters.py

Issue 5694103719247872: Issue 2133 - Allow to specify default translation inline in pages rather than in a separate file (Closed)
Patch Set: Created March 12, 2015, 7:34 p.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View unified diff | Download patch
OLDNEW
1 # coding: utf-8 1 # coding: utf-8
2 2
3 # This file is part of the Adblock Plus web scripts, 3 # This file is part of the Adblock Plus web scripts,
4 # Copyright (C) 2006-2015 Eyeo GmbH 4 # Copyright (C) 2006-2015 Eyeo GmbH
5 # 5 #
6 # Adblock Plus is free software: you can redistribute it and/or modify 6 # Adblock Plus is free software: you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License version 3 as 7 # it under the terms of the GNU General Public License version 3 as
8 # published by the Free Software Foundation. 8 # published by the Free Software Foundation.
9 # 9 #
10 # Adblock Plus is distributed in the hope that it will be useful, 10 # Adblock Plus is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of 11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details. 13 # GNU General Public License for more details.
14 # 14 #
15 # You should have received a copy of the GNU General Public License 15 # You should have received a copy of the GNU General Public License
16 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. 16 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.
17 17
18 import os, imp, re, jinja2, markdown 18 import os
19 import HTMLParser
20 import imp
21 import re
22
23 import jinja2
24 import markdown
25
19 26
20 # Monkey-patch Markdown's isBlockLevel function to ensure that no paragraphs are 27 # Monkey-patch Markdown's isBlockLevel function to ensure that no paragraphs are
21 # inserted into the <head> tag 28 # inserted into the <head> tag
22 orig_isBlockLevel = markdown.util.isBlockLevel 29 orig_isBlockLevel = markdown.util.isBlockLevel
23 def isBlockLevel(tag): 30 def isBlockLevel(tag):
24 if tag == "head": 31 if tag == "head":
25 return True 32 return True
26 else: 33 else:
27 return orig_isBlockLevel(tag) 34 return orig_isBlockLevel(tag)
28 markdown.util.isBlockLevel = isBlockLevel 35 markdown.util.isBlockLevel = isBlockLevel
29 36
30 html_escapes = { 37 html_escapes = {
31 "<": "&lt;", 38 "<": "&lt;",
32 ">": "&gt;", 39 ">": "&gt;",
33 "&": "&amp;", 40 "&": "&amp;",
34 "\"": "&quot;", 41 "\"": "&quot;",
35 "'": "&#39;", 42 "'": "&#39;",
36 } 43 }
37 44
45 class AttributeParser(HTMLParser.HTMLParser):
Sebastian Noack 2015/03/12 20:33:46 That makes actually sense, using a proper parser i
Wladimir Palant 2015/03/12 20:57:02 I actually tested this - invalid HTML will be norm
46 _string = None
47 _attrs = None
48
49 def __init__(self, whitelist):
50 self._whitelist = whitelist
51
52 def parse(self, text, pagename):
53 self.reset()
54 self._string = ""
Sebastian Noack 2015/03/12 20:33:46 I'd rather use a list here, joining it when done.
Wladimir Palant 2015/03/12 20:57:02 Done.
55 self._attrs = {}
56 self._pagename = pagename
57
58 try:
59 self.feed(text)
60 return self._string, self._attrs
61 finally:
62 self._string = None
63 self._attrs = None
64 self._pagename = None
65
66 def handle_starttag(self, tag, attrs):
67 if tag not in self._whitelist:
68 raise Exception("Unexpected HTML tag '%s' in localizable string on page %s " % (tag, self._pagename))
69 self._attrs.setdefault(tag, []).append(attrs)
70 self._string += "<%s>" % tag
71
72 def handle_endtag(self, tag):
73 self._string += "</%s>" % tag
74
75 def handle_data(self, data):
76 # Note: lack of escaping here is intentional. The result is a locale string,
77 # HTML escaping is applied when this string is inserted into the document.
78 self._string += data
79
80 def handle_entityref(self, name):
81 self._string += self.unescape("&%s;" % name)
82
83 def handle_charref(self, name):
84 self._string += self.unescape("&#%s;" % name)
85
38 class Converter: 86 class Converter:
87 whitelist = set(["a", "em", "strong"])
88
39 def __init__(self, params, key="pagedata"): 89 def __init__(self, params, key="pagedata"):
40 self._params = params 90 self._params = params
41 self._key = key 91 self._key = key
92 self._attribute_parser = AttributeParser(self.whitelist)
42 93
43 # Read in any parameters specified at the beginning of the file 94 # Read in any parameters specified at the beginning of the file
44 lines = params[key].splitlines(True) 95 lines = params[key].splitlines(True)
45 while lines and re.search(r"^\s*[\w\-]+\s*=", lines[0]): 96 while lines and re.search(r"^\s*[\w\-]+\s*=", lines[0]):
46 name, value = lines.pop(0).split("=", 1) 97 name, value = lines.pop(0).split("=", 1)
47 params[name.strip()] = value.strip() 98 params[name.strip()] = value.strip()
48 params[key] = "".join(lines) 99 params[key] = "".join(lines)
49 100
50 def localize_string(self, name, localedata, escapes, links=[]): 101 def localize_string(self, name, default, localedata, escapes):
51 def escape(s): 102 def escape(s):
52 return re.sub(r".", 103 return re.sub(r".",
53 lambda match: escapes.get(match.group(0), match.group(0)), 104 lambda match: escapes.get(match.group(0), match.group(0)),
54 s, flags=re.S) 105 s, flags=re.S)
55 def re_escape(s): 106 def re_escape(s):
56 return re.escape(escape(s)) 107 return re.escape(escape(s))
57 108
58 try: 109 # Extract tag attributes from default string
110 default, saved_attributes = self._attribute_parser.parse(default, self._para ms["page"])
111
112 # Get translation
113 if self._params["locale"] != self._params["defaultlocale"] and name in local edata:
59 result = localedata[name].strip() 114 result = localedata[name].strip()
60 except KeyError: 115 else:
61 raise Exception("Lookup failed for string %s used on page %s" % (name, sel f._params["page"])) 116 result = default
62 117
63 # Insert links 118 # Insert attributes
64 result = escape(result) 119 result = escape(result)
65 while links: 120 for tag in self.whitelist:
121 saved = saved_attributes.get(tag, [])
122 for attrs in saved:
123 attrs = map(lambda (name, value): '%s="%s"' % (escape(name), escape(valu e)), attrs)
124 result = re.sub(
125 r"%s([^<>]*?)%s" % (re_escape("<%s>" % tag), re_escape("</%s>" % tag)) ,
126 r'<%s %s>\1</%s>' % (tag, " ".join(attrs), tag),
127 result, 1, flags=re.S
128 )
66 result = re.sub( 129 result = re.sub(
67 r"%s([^<>]*?)%s" % (re_escape("<a>"), re_escape("</a>")), 130 r"%s([^<>]*?)%s" % (re_escape("<%s>" % tag), re_escape("</%s>" % tag)),
68 r'<a href="%s">\1</a>' % links.pop(0), 131 r"<%s>\1</%s>" % (tag, tag),
69 result, 1, flags=re.S 132 result, flags=re.S
70 ) 133 )
71
72 # <strong> and <em> tags are allowed
73 result = re.sub(
74 r"%s([^<>]*?)%s" % (re_escape("<strong>"), re_escape("</strong>")),
75 r"<strong>\1</strong>",
76 result, flags=re.S
77 )
78 result = re.sub(
79 r"%s([^<>]*?)%s" % (re_escape("<em>"), re_escape("</em>")),
80 r"<em>\1</em>",
81 result, flags=re.S
82 )
83 return result 134 return result
84 135
85 def insert_localized_strings(self, text, escapes): 136 def insert_localized_strings(self, text, escapes, to_html=lambda s: s):
86 def lookup_string(match): 137 def lookup_string(match):
87 name, links = match.groups() 138 name, comment, default = match.groups()
88 if links: 139 default = to_html(default).strip()
89 links = map(unicode.strip, links.strip("()").split(",")) 140
90 else: 141 # Note: We currently ignore the comment, it is only relevant when
91 links = [] 142 # generating the master translation.
92 return self.localize_string(name, self._params["localedata"], escapes, lin ks) 143 return self.localize_string(name, default, self._params["localedata"], esc apes)
93 144
94 return re.sub( 145 return re.sub(
95 r"\$([\w\-]+)(\([^()$]+\))?\$", 146 r"\{\{\s*([\w\-]+)(?:\[(.*?)\])?\s+(.*?)\}\}",
96 lookup_string, 147 lookup_string,
97 text 148 text,
149 flags=re.S
98 ) 150 )
99 151
100 def process_links(self, text): 152 def process_links(self, text):
101 def process_link(match): 153 def process_link(match):
102 pre, attr, url, post = match.groups() 154 pre, attr, url, post = match.groups()
103 url = jinja2.Markup(url).unescape() 155 url = jinja2.Markup(url).unescape()
104 156
105 locale, new_url = self._params["source"].resolve_link(url, self._params["l ocale"]) 157 locale, new_url = self._params["source"].resolve_link(url, self._params["l ocale"])
106 if new_url != None: 158 if new_url != None:
107 url = new_url 159 url = new_url
(...skipping 11 matching lines...) Expand all
119 171
120 def resolve_includes(self, text): 172 def resolve_includes(self, text):
121 def resolve_include(match): 173 def resolve_include(match):
122 global converters 174 global converters
123 name = match.group(1) 175 name = match.group(1)
124 for format, converter_class in converters.iteritems(): 176 for format, converter_class in converters.iteritems():
125 if self._params["source"].has_include(name, format): 177 if self._params["source"].has_include(name, format):
126 self._params["includedata"] = self._params["source"].read_include(name , format) 178 self._params["includedata"] = self._params["source"].read_include(name , format)
127 converter = converter_class(self._params, key="includedata") 179 converter = converter_class(self._params, key="includedata")
128 return converter() 180 return converter()
129 raise Exception("Failed to resolve include %s in page %s" % (name, self._p arams["page"])) 181 raise Exception("Failed to resolve include %s on page %s" % (name, self._p arams["page"]))
130 182
131 return re.sub( 183 return re.sub(
132 r'%s\?\s*include\s+([^\s<>"]+)\s*\?%s' % ( 184 r'%s\?\s*include\s+([^\s<>"]+)\s*\?%s' % (
133 self.include_start_regex, 185 self.include_start_regex,
134 self.include_end_regex 186 self.include_end_regex
135 ), 187 ),
136 resolve_include, 188 resolve_include,
137 text 189 text
138 ) 190 )
139 191
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after
176 228
177 escapes = {} 229 escapes = {}
178 for char in markdown.Markdown.ESCAPED_CHARS: 230 for char in markdown.Markdown.ESCAPED_CHARS:
179 escapes[char] = "&#" + str(ord(char)) + ";" 231 escapes[char] = "&#" + str(ord(char)) + ";"
180 for key, value in html_escapes.iteritems(): 232 for key, value in html_escapes.iteritems():
181 escapes[key] = value 233 escapes[key] = value
182 234
183 md = markdown.Markdown(output="html5", extensions=["attr_list"]) 235 md = markdown.Markdown(output="html5", extensions=["attr_list"])
184 md.preprocessors["html_block"].markdown_in_raw = True 236 md.preprocessors["html_block"].markdown_in_raw = True
185 237
186 result = self.insert_localized_strings(source, escapes) 238 def to_html(s):
239 return re.sub(r'</?p>', '', md.convert(s))
240
241 result = self.insert_localized_strings(source, escapes, to_html)
187 result = md.convert(result) 242 result = md.convert(result)
188 result = re.sub(r"&#(\d+);", remove_unnecessary_entities, result) 243 result = re.sub(r"&#(\d+);", remove_unnecessary_entities, result)
189 result = self.process_links(result) 244 result = self.process_links(result)
190 return result 245 return result
191 246
192 class TemplateConverter(Converter): 247 class TemplateConverter(Converter):
193 class _SourceLoader(jinja2.BaseLoader): 248 class _SourceLoader(jinja2.BaseLoader):
194 def __init__(self, source): 249 def __init__(self, source):
195 self.source = source 250 self.source = source
196 251
197 def get_source(self, environment, template): 252 def get_source(self, environment, template):
198 try: 253 try:
199 return self.source.read_file(template + ".tmpl"), None, None 254 return self.source.read_file(template + ".tmpl"), None, None
200 except Exception: 255 except Exception:
201 raise jinja2.TemplateNotFound(template) 256 raise jinja2.TemplateNotFound(template)
202 257
203 def __init__(self, *args, **kwargs): 258 def __init__(self, *args, **kwargs):
204 Converter.__init__(self, *args, **kwargs) 259 Converter.__init__(self, *args, **kwargs)
205 260
206 filters = { 261 filters = {
207 "translate": self.translate, 262 "translate": self.translate,
208 "linkify": self.linkify, 263 "linkify": self.linkify,
209 "toclist": self.toclist, 264 "toclist": self.toclist,
210 } 265 }
211 266
267 globals = {
268 "get_string": self.get_string,
269 }
270
212 for filename in self._params["source"].list_files("filters"): 271 for filename in self._params["source"].list_files("filters"):
213 root, ext = os.path.splitext(filename) 272 root, ext = os.path.splitext(filename)
214 if ext.lower() != ".py": 273 if ext.lower() != ".py":
215 continue 274 continue
216 275
217 path = "%s/%s" % ("filters", filename) 276 path = "%s/%s" % ("filters", filename)
218 code = self._params["source"].read_file(path) 277 code = self._params["source"].read_file(path)
219 module = imp.new_module(root.replace("/", ".")) 278 module = imp.new_module(root.replace("/", "."))
220 exec code in module.__dict__ 279 exec code in module.__dict__
221 280
222 func = os.path.basename(root) 281 func = os.path.basename(root)
223 if not hasattr(module, func): 282 if not hasattr(module, func):
224 raise Exception("Expected function %s not found in filter file %s" % (fu nc, filename)) 283 raise Exception("Expected function %s not found in filter file %s" % (fu nc, filename))
225 filters[func] = getattr(module, func) 284 filters[func] = getattr(module, func)
226 filters[func].module_ref = module # Prevent garbage collection 285 filters[func].module_ref = module # Prevent garbage collection
227 286
228 self._env = jinja2.Environment(loader=self._SourceLoader(self._params["sourc e"]), autoescape=True) 287 self._env = jinja2.Environment(loader=self._SourceLoader(self._params["sourc e"]), autoescape=True)
229 self._env.filters.update(filters) 288 self._env.filters.update(filters)
289 self._env.globals.update(globals)
230 290
231 def get_html(self, source): 291 def get_html(self, source):
232 template = self._env.from_string(source) 292 template = self._env.from_string(source)
233 return template.render(self._params) 293 return template.render(self._params)
234 294
235 def translate(self, name, page=None, links=[]): 295 def translate(self, default, name, comment=None):
236 if page == None: 296 # Note: We currently ignore the comment, it is only relevant when
237 localedata = self._params["localedata"] 297 # generating the master translation.
238 else: 298 localedata = self._params["localedata"]
239 localedata = self._params["source"].read_locale(self._params["locale"], pa ge) 299 return jinja2.Markup(self.localize_string(name, default, localedata, html_es capes))
240 return jinja2.Markup(self.localize_string(name, localedata, html_escapes, li nks=links)) 300
301 def get_string(self, name, page):
302 localedata = self._params["source"].read_locale(self._params["locale"], page )
303 default = localedata[name]
304 return jinja2.Markup(self.localize_string(name, default, localedata, html_es capes))
241 305
242 def linkify(self, page, locale=None, **attrs): 306 def linkify(self, page, locale=None, **attrs):
243 if locale == None: 307 if locale is None:
244 locale = self._params["locale"] 308 locale = self._params["locale"]
245 309
246 locale, url = self._params["source"].resolve_link(page, locale) 310 locale, url = self._params["source"].resolve_link(page, locale)
247 return jinja2.Markup('<a%s>' % ''.join( 311 return jinja2.Markup('<a%s>' % ''.join(
248 ' %s="%s"' % (name, jinja2.escape(value)) for name, value in [ 312 ' %s="%s"' % (name, jinja2.escape(value)) for name, value in [
249 ('href', url), 313 ('href', url),
250 ('hreflang', locale) 314 ('hreflang', locale)
251 ] + attrs.items() 315 ] + attrs.items()
252 )) 316 ))
253 317
(...skipping 14 matching lines...) Expand all
268 stack.pop() 332 stack.pop()
269 stack[-1]["subitems"].append(item) 333 stack[-1]["subitems"].append(item)
270 stack.append(item) 334 stack.append(item)
271 return structured 335 return structured
272 336
273 converters = { 337 converters = {
274 "raw": RawConverter, 338 "raw": RawConverter,
275 "md": MarkdownConverter, 339 "md": MarkdownConverter,
276 "tmpl": TemplateConverter, 340 "tmpl": TemplateConverter,
277 } 341 }
OLDNEW
« no previous file with comments | « cms/bin/generate_static_pages.py ('k') | cms/sources.py » ('j') | cms/sources.py » ('J')

Powered by Google App Engine
This is Rietveld