sitescripts/web/converters.py - Issue 17817001: Simple CMS as Anwiki replacement

Delta Between Two Patch Sets: sitescripts/web/converters.py

Issue 17817001: Simple CMS as Anwiki replacement (Closed)

Left Patch Set: Created Oct. 23, 2013, 1:52 p.m.

Right Patch Set: Fixed MIME type Created Nov. 4, 2013, 4:11 p.m.

Left:
Right:

Use n/p to move between diff chunks; N/P to move between comments.

Jump to:

Left: Side by side diff | Download
Right: Side by side diff | Download

LEFT	RIGHT
1 # coding: utf-8	1 # coding: utf-8

2	2

3 # This file is part of the Adblock Plus web scripts,	3 # This file is part of the Adblock Plus web scripts,

4 # Copyright (C) 2006-2013 Eyeo GmbH	4 # Copyright (C) 2006-2013 Eyeo GmbH

5 #	5 #

6 # Adblock Plus is free software: you can redistribute it and/or modify	6 # Adblock Plus is free software: you can redistribute it and/or modify

7 # it under the terms of the GNU General Public License version 3 as	7 # it under the terms of the GNU General Public License version 3 as

8 # published by the Free Software Foundation.	8 # published by the Free Software Foundation.

9 #	9 #

10 # Adblock Plus is distributed in the hope that it will be useful,	10 # Adblock Plus is distributed in the hope that it will be useful,

11 # but WITHOUT ANY WARRANTY; without even the implied warranty of	11 # but WITHOUT ANY WARRANTY; without even the implied warranty of

12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the	12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

13 # GNU General Public License for more details.	13 # GNU General Public License for more details.

14 #	14 #

15 # You should have received a copy of the GNU General Public License	15 # You should have received a copy of the GNU General Public License

16 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.	16 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.

17	17

18 import re, jinja2, markdown	18 import re, jinja2, markdown

19 from sitescripts.utils import cached, setupStderr, get_custom_template_environme nt	19 from ..utils import get_custom_template_environment

20	20

21 # Monkey-patch Markdown's isBlockLevel function to ensure that no paragraphs are	21 # Monkey-patch Markdown's isBlockLevel function to ensure that no paragraphs are

22 # inserted into the <head> tag	22 # inserted into the <head> tag

23 orig_isBlockLevel = markdown.util.isBlockLevel	23 orig_isBlockLevel = markdown.util.isBlockLevel

24 def isBlockLevel(tag):	24 def isBlockLevel(tag):

25 if tag == "head":	25 if tag == "head":

26 return True	26 return True

27 else:	27 else:

28 return orig_isBlockLevel(tag)	28 return orig_isBlockLevel(tag)

29 markdown.util.isBlockLevel = isBlockLevel	29 markdown.util.isBlockLevel = isBlockLevel

30	30

31 def split_head(text):	31 html_escapes = {

32 head = []	32 "<": "<",

33 def add_to_head(match):	33 ">": ">",

34 head.append(match.group(1))	34 "&": "&",

35 return ""	35 "\"": """,

36 body = re.sub(r"<head>(.*?)</head>",	36 "'": "'",

37 add_to_head, text, flags=re.S)	37 }

38 return "".join(head), body	38

39	39 class Converter:

40 class MarkdownConverter:	40 def __init__(self, params, key="pagedata"):

41 class Localizer(markdown.preprocessors.Preprocessor):	41 self._params = params

42 def __init__(self, params):	42 self._key = key

43 self._params = params	43

44	44 # Read in any parameters specified at the beginning of the file

45 self._escaped = set(markdown.Markdown.ESCAPED_CHARS)	45 lines = params[key].splitlines(True)

46 self._escaped.add("<");	46 while lines and re.search(r"^\s[\w\-]+\s=", lines[0]):

47 self._escaped.add(">");	47 name, value = lines.pop(0).split("=", 1)

48 self._escaped.add("&");	48 params[name.strip()] = value.strip()

49 self._escaped.add("\"");	49 params[key] = "".join(lines)

50 self._escaped.add("'");	50

51	51 def localize_string(self, name, localedata, escapes, links=[]):

52 def run(self, lines):	52 def escape(s):

53 new_lines = []	53 return re.sub(r".",

54 for line in lines:	54 lambda match: escapes.get(match.group(0), match.group(0)),

55 # Replace localized strings	55 s, flags=re.S)

56 new_lines.append(re.sub(r"\$([\w\-]+)($[^()$]+$)?\$",	56 def re_escape(s):

57 lambda match: self.lookup_string(match.group(1), match.group(2)),	57 return re.escape(escape(s))

58 line))	58

59 return new_lines	59 try:

60	60 result = localedata[name].strip()

61 def lookup_string(self, name, links):	61 except KeyError:

62 def escape(char):	62 raise Exception("Lookup failed for string %s used on page %s" % (name, sel f._params["page"]))

63 if char in self._escaped:	63

64 return "&#" + str(ord(char)) + ";"	64 # Insert links

65 else:	65 result = escape(result)

66 return char	66 while links:

67	67 result = re.sub(

68 try:	68 r"%s([^<>]*?)%s" % (re_escape("<a>"), re_escape("</a>")),

69 result = self._params["localedata"][name].strip()	69 r'<a href="%s">\1</a>' % links.pop(0),

70 except KeyError:	70 result, 1, flags=re.S

71 raise Exception("Lookup failed for string %s used on page %s" % (name, s elf._params["page"]))	71 )

72	72

73 result = re.sub(r".", lambda match: escape(match.group(0)), result, flags= re.S)	73 # <strong> and <em> tags are allowed

	74 result = re.sub(

	75 r"%s([^<>]*?)%s" % (re_escape("<strong>"), re_escape("</strong>")),

	76 r"<strong>\1</strong>",

	77 result, flags=re.S

	78 )

	79 result = re.sub(

	80 r"%s([^<>]*?)%s" % (re_escape("<em>"), re_escape("</em>")),

	81 r"<em>\1</em>",

	82 result, flags=re.S

	83 )

	84 return result

	85

	86 def insert_localized_strings(self, text, escapes):

	87 def lookup_string(match):

	88 name, links = match.groups()

74 if links:	89 if links:

75 links = map(unicode.strip, links.strip("()").split(","))	90 links = map(unicode.strip, links.strip("()").split(","))

76 while len(links):	91 else:

77 result = re.sub(r"<a>(.*?)</a>", r'<a href="%s">\1</a> ' % links.pop(0), result, 1, flags=re.S)	92 links = []

78 return result	93 return self.localize_string(name, self._params["localedata"], escapes, lin ks)

79	94

80 class Linkifier(markdown.postprocessors.Postprocessor):	95 return re.sub(

81 def __init__(self, params):	96 r"\$([\w\-]+)($[^()$]+$)?\$",

82 self._params = params	97 lookup_string,

83	98 text

84 def process_link(self, match):	99 )

	100

	101 def process_links(self, text):

	102 def process_link(match):

85 pre, attr, url, post = match.groups()	103 pre, attr, url, post = match.groups()

86 url = jinja2.Markup(url).unescape()	104 url = jinja2.Markup(url).unescape()

87	105

88 locale, new_url = self._params["source"].resolve_link(url, self._params["l ocale"])	106 locale, new_url = self._params["source"].resolve_link(url, self._params["l ocale"])

89 if new_url != None:	107 if new_url != None:

90 url = new_url	108 url = new_url

91 if attr == "href":	109 if attr == "href":

92 post += ' hreflang="%s"' % jinja2.Markup.escape(locale)	110 post += ' hreflang="%s"' % jinja2.Markup.escape(locale)

93	111

94 return "".join((pre, jinja2.Markup.escape(url), post))	112 return "".join((pre, jinja2.Markup.escape(url), post))

95	113

96 def run(self, text):	114 text = re.sub(r"(<a\s[^<>]*\b(href)=\")([^<>\"]+)(\")", process_link, text)

97 text = re.sub(r"(<a [^<>]*\b(href)=\")([^<>\"]+)(\")", self.process_link, text)	115 text = re.sub(r"(<img\s[^<>]*\b(src)=\")([^<>\"]+)(\")", process_link, text)

98 text = re.sub(r"(<img [^<>]*\b(src)=\")([^<>\"]+)(\")", self.process_link, text)	116 return text

99 return text	117

100	118 def resolve_includes(self, text):

101 def __init__(self, params, key="pagedata"):	119 def resolve_include(match):

102 self._params = params	120 global converters

103 self._splithead = key == "pagedata"	121 name = match.group(1)

104	122 for format, converter_class in converters.iteritems():

105 self._md = markdown.Markdown(output="html5", extensions=["attr_list"])	123 if self._params["source"].has_include(name, format):

106 self._md.preprocessors.add("localizer", self.Localizer(params), "_begin")	124 self._params["includedata"] = self._params["source"].read_include(name , format)

107 self._md.postprocessors.add("linkifier", self.Linkifier(params), "_end")	125 converter = converter_class(self._params, key="includedata")

108	126 return converter()

109 params["pagedata"] = params["pagedata"].decode("utf-8")	127 raise Exception("Failed to resolve include %s in page %s" % (name, self._p arams["page"]))

110	128

111 # Read in any parameters specified at the beginning of the file	129 return re.sub(r'<\?\sinclude\s+([^\s<>"]+)\s\?>', resolve_include, text)

112 lines = params["pagedata"].splitlines(True)

113 while len(lines) and re.search(r"^\s[\w\-]+\s=", lines[0]):

114 key, value = lines.pop(0).split("=", 1)

115 params[key.strip()] = value.strip()

116 params["pagedata"] = "".join(lines)

117	130

118 def __call__(self):	131 def __call__(self):

119 def beautify_entities(match):	132 result = self.get_html(self._params[self._key])

120 escape = {	133 result = self.resolve_includes(result)

121 "<": "<",	134 if self._key == "pagedata":

122 ">": ">",	135 head = []

123 "&": "&",	136 def add_to_head(match):

124 "\"": """,	137 head.append(match.group(1))

125 "'": "'",	138 return ""

126 }	139 body = re.sub(r"<head>(.*?)</head>", add_to_head, result, flags=re.S)

127 char = chr(int(match.group(1)))	140 return "".join(head), body

128 return escape.get(char, char)

129

130 result = self._md.convert(self._params["pagedata"])

131 result = re.sub(r"&#(\d+);", beautify_entities, result).encode("utf-8")

132

133 if self._splithead:

134 return split_head(result)

135 else:	141 else:

136 return result	142 return result

137	143

138 class TemplateConverter:	144 class RawConverter(Converter):

139 def __init__(self, params, key="pagedata"):	145 def get_html(self, source):

140 self._params = params	146 result = self.insert_localized_strings(source, html_escapes)

141 self._splithead = key == "pagedata"	147 result = self.process_links(result)

	148 return result

	149

	150 class MarkdownConverter(Converter):

	151 def get_html(self, source):

	152 def remove_unnecessary_entities(match):

	153 char = chr(int(match.group(1)))

	154 if char in html_escapes:

	155 return match.group(0)

	156 else:

	157 return char

	158

	159 escapes = {}

	160 for char in markdown.Markdown.ESCAPED_CHARS:

	161 escapes[char] = "&#" + str(ord(char)) + ";"

	162 for key, value in html_escapes.iteritems():

	163 escapes[key] = value

	164

	165 result = self.insert_localized_strings(source, escapes)

	166 result = markdown.Markdown(output="html5", extensions=["attr_list"]).convert (result)

	167 result = re.sub(r"&#(\d+);", remove_unnecessary_entities, result)

	168 result = self.process_links(result)

	169 return result

	170

	171 class TemplateConverter(Converter):

	172 def __init__(self, args, *kwargs):

	173 Converter.__init__(self, args, *kwargs)

	174

142 filters = {	175 filters = {

143 "translate": self.translate,	176 "translate": self.translate,

144 "linkify": self.linkify,	177 "linkify": self.linkify,

145 "toclist": self.toclist,	178 "toclist": self.toclist,

146 }	179 }

147 env = get_custom_template_environment(filters)	180 self._env = get_custom_template_environment(filters)

148 self._template = env.from_string(params[key].decode("utf-8"))	181

149	182 def get_html(self, source):

150 def __call__(self):	183 template = self._env.from_string(source)

151 result = self._template.render(self._params).encode("utf-8")	184 return template.render(self._params)

152 if self._splithead:	185

153 return split_head(result)	186 def translate(self, name, page=None, links=[]):

154 else:

155 return result

156

157 def translate(self, name, page=None):

158 if page == None:	187 if page == None:

159 localedata = self._params["localedata"]	188 localedata = self._params["localedata"]

160 else:	189 else:

161 localedata = self._params["source"].read_locale(self._params["locale"], pa ge)	190 localedata = self._params["source"].read_locale(self._params["locale"], pa ge)

162	191 return jinja2.Markup(self.localize_string(name, localedata, html_escapes, li nks=links))

163 try:

164 return localedata[name]

165 except KeyError:

166 raise Exception("Lookup failed for string %s used on page %s" % (name, sel f._params["page"]))

167	192

168 def linkify(self, page, locale=None):	193 def linkify(self, page, locale=None):

169 if locale == None:	194 if locale == None:

170 locale = self._params["locale"]	195 locale = self._params["locale"]

171	196

172 locale, url = self._params["source"].resolve_link(page, locale)	197 locale, url = self._params["source"].resolve_link(page, locale)

173 return jinja2.Markup('<a href="%s" hreflang="%s">' % (	198 return jinja2.Markup('<a href="%s" hreflang="%s">' % (

174 jinja2.Markup.escape(url),	199 jinja2.Markup.escape(url),

175 jinja2.Markup.escape(locale)	200 jinja2.Markup.escape(locale)

176 ))	201 ))

177	202

178 def toclist(self, content):	203 def toclist(self, content):

179 flat = []	204 flat = []

180 for match in re.finditer(r'<h(\d) [^<>]\bid="([^<>"]+)"[^<>]>(.*?)</h\1>', content, re.S):	205 for match in re.finditer(r'<h(\d)\s[^<>]\bid="([^<>"]+)"[^<>]>(.*?)</h\1>' , content, re.S):

181 flat.append({	206 flat.append({

182 "level": int(match.group(1)),	207 "level": int(match.group(1)),

183 "anchor": jinja2.Markup(match.group(2)).unescape(),	208 "anchor": jinja2.Markup(match.group(2)).unescape(),

184 "title": jinja2.Markup(match.group(3)).unescape(),	209 "title": jinja2.Markup(match.group(3)).unescape(),

185 "subitems": [],	210 "subitems": [],

186 })	211 })

187	212

188 structured = []	213 structured = []

189 stack = [{"level": 0, "subitems": structured}]	214 stack = [{"level": 0, "subitems": structured}]

190 for item in flat:	215 for item in flat:

191 while stack[-1]["level"] >= item["level"]:	216 while stack[-1]["level"] >= item["level"]:

192 stack.pop()	217 stack.pop()

193 stack[-1]["subitems"].append(item)	218 stack[-1]["subitems"].append(item)

194 stack.append(item)	219 stack.append(item)

195 return structured	220 return structured

196	221

197 converters = {	222 converters = {

	223 "raw": RawConverter,

198 "md": MarkdownConverter,	224 "md": MarkdownConverter,

199 "tmpl": TemplateConverter,	225 "tmpl": TemplateConverter,

200 }	226 }

LEFT	RIGHT