cms/converters.py - Issue 5694103719247872: Issue 2133 - Allow to specify default translation inline in pages rather than in a separate file

Side by Side Diff: cms/converters.py

Issue 5694103719247872: Issue 2133 - Allow to specify default translation inline in pages rather than in a separate file (Closed)

Patch Set: Created March 12, 2015, 7:34 p.m.

Left:
Right:

Use n/p to move between diff chunks; N/P to move between comments.

Jump to:

View unified diff | Download patch

OLD	NEW
1 # coding: utf-8	1 # coding: utf-8

2	2

3 # This file is part of the Adblock Plus web scripts,	3 # This file is part of the Adblock Plus web scripts,

4 # Copyright (C) 2006-2015 Eyeo GmbH	4 # Copyright (C) 2006-2015 Eyeo GmbH

5 #	5 #

6 # Adblock Plus is free software: you can redistribute it and/or modify	6 # Adblock Plus is free software: you can redistribute it and/or modify

7 # it under the terms of the GNU General Public License version 3 as	7 # it under the terms of the GNU General Public License version 3 as

8 # published by the Free Software Foundation.	8 # published by the Free Software Foundation.

9 #	9 #

10 # Adblock Plus is distributed in the hope that it will be useful,	10 # Adblock Plus is distributed in the hope that it will be useful,

11 # but WITHOUT ANY WARRANTY; without even the implied warranty of	11 # but WITHOUT ANY WARRANTY; without even the implied warranty of

12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the	12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

13 # GNU General Public License for more details.	13 # GNU General Public License for more details.

14 #	14 #

15 # You should have received a copy of the GNU General Public License	15 # You should have received a copy of the GNU General Public License

16 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.	16 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.

17	17

18 import os, imp, re, jinja2, markdown	18 import os

	19 import HTMLParser

	20 import imp

	21 import re

	22

	23 import jinja2

	24 import markdown

	25

19	26

20 # Monkey-patch Markdown's isBlockLevel function to ensure that no paragraphs are	27 # Monkey-patch Markdown's isBlockLevel function to ensure that no paragraphs are

21 # inserted into the <head> tag	28 # inserted into the <head> tag

22 orig_isBlockLevel = markdown.util.isBlockLevel	29 orig_isBlockLevel = markdown.util.isBlockLevel

23 def isBlockLevel(tag):	30 def isBlockLevel(tag):

24 if tag == "head":	31 if tag == "head":

25 return True	32 return True

26 else:	33 else:

27 return orig_isBlockLevel(tag)	34 return orig_isBlockLevel(tag)

28 markdown.util.isBlockLevel = isBlockLevel	35 markdown.util.isBlockLevel = isBlockLevel

29	36

30 html_escapes = {	37 html_escapes = {

31 "<": "<",	38 "<": "<",

32 ">": ">",	39 ">": ">",

33 "&": "&",	40 "&": "&",

34 "\"": """,	41 "\"": """,

35 "'": "'",	42 "'": "'",

36 }	43 }

37	44

	45 class AttributeParser(HTMLParser.HTMLParser):
	Sebastian Noack 2015/03/12 20:33:46 That makes actually sense, using a proper parser i That makes actually sense, using a proper parser instead a regex. I'm just wondring whether it explodes if we get not 100% valid HTML, or whether this is a case we have to bother about here. Wladimir Palant 2015/03/12 20:57:02 I actually tested this - invalid HTML will be norm Show quoted text On 2015/03/12 20:33:46, Sebastian Noack wrote: > That makes actually sense, using a proper parser instead a regex. I'm just > wondring whether it explodes if we get not 100% valid HTML, or whether this is a > case we have to bother about here. I actually tested this - invalid HTML will be normalized, which is a nice side-effect.
	46 _string = None

	47 _attrs = None

	48

	49 def __init__(self, whitelist):

	50 self._whitelist = whitelist

	51

	52 def parse(self, text, pagename):

	53 self.reset()

	54 self._string = ""
	Sebastian Noack 2015/03/12 20:33:46 I'd rather use a list here, joining it when done. I'd rather use a list here, joining it when done. I know performance doesn't matter here, but Python folks just consider this kind of string concatenation ugly. ;) Wladimir Palant 2015/03/12 20:57:02 Done. Show quoted text On 2015/03/12 20:33:46, Sebastian Noack wrote: > I'd rather use a list here, joining it when done. I know performance doesn't > matter here, but Python folks just consider this kind of string concatenation > ugly. ;) Done.
	55 self._attrs = {}

	56 self._pagename = pagename

	57

	58 try:

	59 self.feed(text)

	60 return self._string, self._attrs

	61 finally:

	62 self._string = None

	63 self._attrs = None

	64 self._pagename = None

	65

	66 def handle_starttag(self, tag, attrs):

	67 if tag not in self._whitelist:

	68 raise Exception("Unexpected HTML tag '%s' in localizable string on page %s " % (tag, self._pagename))

	69 self._attrs.setdefault(tag, []).append(attrs)

	70 self._string += "<%s>" % tag

	71

	72 def handle_endtag(self, tag):

	73 self._string += "</%s>" % tag

	74

	75 def handle_data(self, data):

	76 # Note: lack of escaping here is intentional. The result is a locale string,

	77 # HTML escaping is applied when this string is inserted into the document.

	78 self._string += data

	79

	80 def handle_entityref(self, name):

	81 self._string += self.unescape("&%s;" % name)

	82

	83 def handle_charref(self, name):

	84 self._string += self.unescape("&#%s;" % name)

	85

38 class Converter:	86 class Converter:

	87 whitelist = set(["a", "em", "strong"])

	88

39 def __init__(self, params, key="pagedata"):	89 def __init__(self, params, key="pagedata"):

40 self._params = params	90 self._params = params

41 self._key = key	91 self._key = key

	92 self._attribute_parser = AttributeParser(self.whitelist)

42	93

43 # Read in any parameters specified at the beginning of the file	94 # Read in any parameters specified at the beginning of the file

44 lines = params[key].splitlines(True)	95 lines = params[key].splitlines(True)

45 while lines and re.search(r"^\s[\w\-]+\s=", lines[0]):	96 while lines and re.search(r"^\s[\w\-]+\s=", lines[0]):

46 name, value = lines.pop(0).split("=", 1)	97 name, value = lines.pop(0).split("=", 1)

47 params[name.strip()] = value.strip()	98 params[name.strip()] = value.strip()

48 params[key] = "".join(lines)	99 params[key] = "".join(lines)

49	100

50 def localize_string(self, name, localedata, escapes, links=[]):	101 def localize_string(self, name, default, localedata, escapes):

51 def escape(s):	102 def escape(s):

52 return re.sub(r".",	103 return re.sub(r".",

53 lambda match: escapes.get(match.group(0), match.group(0)),	104 lambda match: escapes.get(match.group(0), match.group(0)),

54 s, flags=re.S)	105 s, flags=re.S)

55 def re_escape(s):	106 def re_escape(s):

56 return re.escape(escape(s))	107 return re.escape(escape(s))

57	108

58 try:	109 # Extract tag attributes from default string

	110 default, saved_attributes = self._attribute_parser.parse(default, self._para ms["page"])

	111

	112 # Get translation

	113 if self._params["locale"] != self._params["defaultlocale"] and name in local edata:

59 result = localedata[name].strip()	114 result = localedata[name].strip()

60 except KeyError:	115 else:

61 raise Exception("Lookup failed for string %s used on page %s" % (name, sel f._params["page"]))	116 result = default

62	117

63 # Insert links	118 # Insert attributes

64 result = escape(result)	119 result = escape(result)

65 while links:	120 for tag in self.whitelist:

	121 saved = saved_attributes.get(tag, [])

	122 for attrs in saved:

	123 attrs = map(lambda (name, value): '%s="%s"' % (escape(name), escape(valu e)), attrs)

	124 result = re.sub(

	125 r"%s([^<>]*?)%s" % (re_escape("<%s>" % tag), re_escape("</%s>" % tag)) ,

	126 r'<%s %s>\1</%s>' % (tag, " ".join(attrs), tag),

	127 result, 1, flags=re.S

	128 )

66 result = re.sub(	129 result = re.sub(

67 r"%s([^<>]*?)%s" % (re_escape("<a>"), re_escape("</a>")),	130 r"%s([^<>]*?)%s" % (re_escape("<%s>" % tag), re_escape("</%s>" % tag)),

68 r'<a href="%s">\1</a>' % links.pop(0),	131 r"<%s>\1</%s>" % (tag, tag),

69 result, 1, flags=re.S	132 result, flags=re.S

70 )	133 )

71

72 # <strong> and <em> tags are allowed

73 result = re.sub(

74 r"%s([^<>]*?)%s" % (re_escape("<strong>"), re_escape("</strong>")),

75 r"<strong>\1</strong>",

76 result, flags=re.S

77 )

78 result = re.sub(

79 r"%s([^<>]*?)%s" % (re_escape("<em>"), re_escape("</em>")),

80 r"<em>\1</em>",

81 result, flags=re.S

82 )

83 return result	134 return result

84	135

85 def insert_localized_strings(self, text, escapes):	136 def insert_localized_strings(self, text, escapes, to_html=lambda s: s):

86 def lookup_string(match):	137 def lookup_string(match):

87 name, links = match.groups()	138 name, comment, default = match.groups()

88 if links:	139 default = to_html(default).strip()

89 links = map(unicode.strip, links.strip("()").split(","))	140

90 else:	141 # Note: We currently ignore the comment, it is only relevant when

91 links = []	142 # generating the master translation.

92 return self.localize_string(name, self._params["localedata"], escapes, lin ks)	143 return self.localize_string(name, default, self._params["localedata"], esc apes)

93	144

94 return re.sub(	145 return re.sub(

95 r"\$([\w\-]+)($[^()$]+$)?\$",	146 r"\{\{\s([\w\-]+)(?:\[(.?)\])?\s+(.*?)\}\}",

96 lookup_string,	147 lookup_string,

97 text	148 text,

	149 flags=re.S

98 )	150 )

99	151

100 def process_links(self, text):	152 def process_links(self, text):

101 def process_link(match):	153 def process_link(match):

102 pre, attr, url, post = match.groups()	154 pre, attr, url, post = match.groups()

103 url = jinja2.Markup(url).unescape()	155 url = jinja2.Markup(url).unescape()

104	156

105 locale, new_url = self._params["source"].resolve_link(url, self._params["l ocale"])	157 locale, new_url = self._params["source"].resolve_link(url, self._params["l ocale"])

106 if new_url != None:	158 if new_url != None:

107 url = new_url	159 url = new_url

(...skipping 11 matching lines...) Expand all Loading...
119	171

120 def resolve_includes(self, text):	172 def resolve_includes(self, text):

121 def resolve_include(match):	173 def resolve_include(match):

122 global converters	174 global converters

123 name = match.group(1)	175 name = match.group(1)

124 for format, converter_class in converters.iteritems():	176 for format, converter_class in converters.iteritems():

125 if self._params["source"].has_include(name, format):	177 if self._params["source"].has_include(name, format):

126 self._params["includedata"] = self._params["source"].read_include(name , format)	178 self._params["includedata"] = self._params["source"].read_include(name , format)

127 converter = converter_class(self._params, key="includedata")	179 converter = converter_class(self._params, key="includedata")

128 return converter()	180 return converter()

129 raise Exception("Failed to resolve include %s in page %s" % (name, self._p arams["page"]))	181 raise Exception("Failed to resolve include %s on page %s" % (name, self._p arams["page"]))

130	182

131 return re.sub(	183 return re.sub(

132 r'%s\?\sinclude\s+([^\s<>"]+)\s\?%s' % (	184 r'%s\?\sinclude\s+([^\s<>"]+)\s\?%s' % (

133 self.include_start_regex,	185 self.include_start_regex,

134 self.include_end_regex	186 self.include_end_regex

135 ),	187 ),

136 resolve_include,	188 resolve_include,

137 text	189 text

138 )	190 )

139	191

(...skipping 36 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
176	228

177 escapes = {}	229 escapes = {}

178 for char in markdown.Markdown.ESCAPED_CHARS:	230 for char in markdown.Markdown.ESCAPED_CHARS:

179 escapes[char] = "&#" + str(ord(char)) + ";"	231 escapes[char] = "&#" + str(ord(char)) + ";"

180 for key, value in html_escapes.iteritems():	232 for key, value in html_escapes.iteritems():

181 escapes[key] = value	233 escapes[key] = value

182	234

183 md = markdown.Markdown(output="html5", extensions=["attr_list"])	235 md = markdown.Markdown(output="html5", extensions=["attr_list"])

184 md.preprocessors["html_block"].markdown_in_raw = True	236 md.preprocessors["html_block"].markdown_in_raw = True

185	237

186 result = self.insert_localized_strings(source, escapes)	238 def to_html(s):

	239 return re.sub(r'</?p>', '', md.convert(s))

	240

	241 result = self.insert_localized_strings(source, escapes, to_html)

187 result = md.convert(result)	242 result = md.convert(result)

188 result = re.sub(r"&#(\d+);", remove_unnecessary_entities, result)	243 result = re.sub(r"&#(\d+);", remove_unnecessary_entities, result)

189 result = self.process_links(result)	244 result = self.process_links(result)

190 return result	245 return result

191	246

192 class TemplateConverter(Converter):	247 class TemplateConverter(Converter):

193 class _SourceLoader(jinja2.BaseLoader):	248 class _SourceLoader(jinja2.BaseLoader):

194 def __init__(self, source):	249 def __init__(self, source):

195 self.source = source	250 self.source = source

196	251

197 def get_source(self, environment, template):	252 def get_source(self, environment, template):

198 try:	253 try:

199 return self.source.read_file(template + ".tmpl"), None, None	254 return self.source.read_file(template + ".tmpl"), None, None

200 except Exception:	255 except Exception:

201 raise jinja2.TemplateNotFound(template)	256 raise jinja2.TemplateNotFound(template)

202	257

203 def __init__(self, args, *kwargs):	258 def __init__(self, args, *kwargs):

204 Converter.__init__(self, args, *kwargs)	259 Converter.__init__(self, args, *kwargs)

205	260

206 filters = {	261 filters = {

207 "translate": self.translate,	262 "translate": self.translate,

208 "linkify": self.linkify,	263 "linkify": self.linkify,

209 "toclist": self.toclist,	264 "toclist": self.toclist,

210 }	265 }

211	266

	267 globals = {

	268 "get_string": self.get_string,

	269 }

	270

212 for filename in self._params["source"].list_files("filters"):	271 for filename in self._params["source"].list_files("filters"):

213 root, ext = os.path.splitext(filename)	272 root, ext = os.path.splitext(filename)

214 if ext.lower() != ".py":	273 if ext.lower() != ".py":

215 continue	274 continue

216	275

217 path = "%s/%s" % ("filters", filename)	276 path = "%s/%s" % ("filters", filename)

218 code = self._params["source"].read_file(path)	277 code = self._params["source"].read_file(path)

219 module = imp.new_module(root.replace("/", "."))	278 module = imp.new_module(root.replace("/", "."))

220 exec code in module.__dict__	279 exec code in module.__dict__

221	280

222 func = os.path.basename(root)	281 func = os.path.basename(root)

223 if not hasattr(module, func):	282 if not hasattr(module, func):

224 raise Exception("Expected function %s not found in filter file %s" % (fu nc, filename))	283 raise Exception("Expected function %s not found in filter file %s" % (fu nc, filename))

225 filters[func] = getattr(module, func)	284 filters[func] = getattr(module, func)

226 filters[func].module_ref = module # Prevent garbage collection	285 filters[func].module_ref = module # Prevent garbage collection

227	286

228 self._env = jinja2.Environment(loader=self._SourceLoader(self._params["sourc e"]), autoescape=True)	287 self._env = jinja2.Environment(loader=self._SourceLoader(self._params["sourc e"]), autoescape=True)

229 self._env.filters.update(filters)	288 self._env.filters.update(filters)

	289 self._env.globals.update(globals)

230	290

231 def get_html(self, source):	291 def get_html(self, source):

232 template = self._env.from_string(source)	292 template = self._env.from_string(source)

233 return template.render(self._params)	293 return template.render(self._params)

234	294

235 def translate(self, name, page=None, links=[]):	295 def translate(self, default, name, comment=None):

236 if page == None:	296 # Note: We currently ignore the comment, it is only relevant when

237 localedata = self._params["localedata"]	297 # generating the master translation.

238 else:	298 localedata = self._params["localedata"]

239 localedata = self._params["source"].read_locale(self._params["locale"], pa ge)	299 return jinja2.Markup(self.localize_string(name, default, localedata, html_es capes))

240 return jinja2.Markup(self.localize_string(name, localedata, html_escapes, li nks=links))	300

	301 def get_string(self, name, page):

	302 localedata = self._params["source"].read_locale(self._params["locale"], page )

	303 default = localedata[name]

	304 return jinja2.Markup(self.localize_string(name, default, localedata, html_es capes))

241	305

242 def linkify(self, page, locale=None, **attrs):	306 def linkify(self, page, locale=None, **attrs):

243 if locale == None:	307 if locale is None:

244 locale = self._params["locale"]	308 locale = self._params["locale"]

245	309

246 locale, url = self._params["source"].resolve_link(page, locale)	310 locale, url = self._params["source"].resolve_link(page, locale)

247 return jinja2.Markup('<a%s>' % ''.join(	311 return jinja2.Markup('<a%s>' % ''.join(

248 ' %s="%s"' % (name, jinja2.escape(value)) for name, value in [	312 ' %s="%s"' % (name, jinja2.escape(value)) for name, value in [

249 ('href', url),	313 ('href', url),

250 ('hreflang', locale)	314 ('hreflang', locale)

251 ] + attrs.items()	315 ] + attrs.items()

252 ))	316 ))

253	317

(...skipping 14 matching lines...) Expand all Loading...
268 stack.pop()	332 stack.pop()

269 stack[-1]["subitems"].append(item)	333 stack[-1]["subitems"].append(item)

270 stack.append(item)	334 stack.append(item)

271 return structured	335 return structured

272	336

273 converters = {	337 converters = {

274 "raw": RawConverter,	338 "raw": RawConverter,

275 "md": MarkdownConverter,	339 "md": MarkdownConverter,

276 "tmpl": TemplateConverter,	340 "tmpl": TemplateConverter,

277 }	341 }

OLD	NEW

« no previous file with comments | « cms/bin/generate_static_pages.py ('k') | cms/sources.py » ('j') | cms/sources.py » ('J')