Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Delta Between Two Patch Sets: sitescripts/web/converters.py

Issue 17817001: Simple CMS as Anwiki replacement (Closed)
Left Patch Set: Created Oct. 23, 2013, 1:52 p.m.
Right Patch Set: Fixed MIME type Created Nov. 4, 2013, 4:11 p.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
Left: Side by side diff | Download
Right: Side by side diff | Download
« no previous file with change/comment | « sitescripts/web/bin/test_server.py ('k') | sitescripts/web/sources.py » ('j') | no next file with change/comment »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
LEFTRIGHT
1 # coding: utf-8 1 # coding: utf-8
2 2
3 # This file is part of the Adblock Plus web scripts, 3 # This file is part of the Adblock Plus web scripts,
4 # Copyright (C) 2006-2013 Eyeo GmbH 4 # Copyright (C) 2006-2013 Eyeo GmbH
5 # 5 #
6 # Adblock Plus is free software: you can redistribute it and/or modify 6 # Adblock Plus is free software: you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License version 3 as 7 # it under the terms of the GNU General Public License version 3 as
8 # published by the Free Software Foundation. 8 # published by the Free Software Foundation.
9 # 9 #
10 # Adblock Plus is distributed in the hope that it will be useful, 10 # Adblock Plus is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of 11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details. 13 # GNU General Public License for more details.
14 # 14 #
15 # You should have received a copy of the GNU General Public License 15 # You should have received a copy of the GNU General Public License
16 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. 16 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.
17 17
18 import re, jinja2, markdown 18 import re, jinja2, markdown
19 from sitescripts.utils import cached, setupStderr, get_custom_template_environme nt 19 from ..utils import get_custom_template_environment
20 20
21 # Monkey-patch Markdown's isBlockLevel function to ensure that no paragraphs are 21 # Monkey-patch Markdown's isBlockLevel function to ensure that no paragraphs are
22 # inserted into the <head> tag 22 # inserted into the <head> tag
23 orig_isBlockLevel = markdown.util.isBlockLevel 23 orig_isBlockLevel = markdown.util.isBlockLevel
24 def isBlockLevel(tag): 24 def isBlockLevel(tag):
25 if tag == "head": 25 if tag == "head":
26 return True 26 return True
27 else: 27 else:
28 return orig_isBlockLevel(tag) 28 return orig_isBlockLevel(tag)
29 markdown.util.isBlockLevel = isBlockLevel 29 markdown.util.isBlockLevel = isBlockLevel
30 30
31 def split_head(text): 31 html_escapes = {
32 head = [] 32 "<": "&lt;",
33 def add_to_head(match): 33 ">": "&gt;",
34 head.append(match.group(1)) 34 "&": "&amp;",
35 return "" 35 "\"": "&quot;",
36 body = re.sub(r"<head>(.*?)</head>", 36 "'": "&#39;",
37 add_to_head, text, flags=re.S) 37 }
38 return "".join(head), body 38
39 39 class Converter:
40 class MarkdownConverter: 40 def __init__(self, params, key="pagedata"):
41 class Localizer(markdown.preprocessors.Preprocessor): 41 self._params = params
42 def __init__(self, params): 42 self._key = key
43 self._params = params 43
44 44 # Read in any parameters specified at the beginning of the file
45 self._escaped = set(markdown.Markdown.ESCAPED_CHARS) 45 lines = params[key].splitlines(True)
46 self._escaped.add("<"); 46 while lines and re.search(r"^\s*[\w\-]+\s*=", lines[0]):
47 self._escaped.add(">"); 47 name, value = lines.pop(0).split("=", 1)
48 self._escaped.add("&"); 48 params[name.strip()] = value.strip()
49 self._escaped.add("\""); 49 params[key] = "".join(lines)
50 self._escaped.add("'"); 50
51 51 def localize_string(self, name, localedata, escapes, links=[]):
52 def run(self, lines): 52 def escape(s):
53 new_lines = [] 53 return re.sub(r".",
54 for line in lines: 54 lambda match: escapes.get(match.group(0), match.group(0)),
55 # Replace localized strings 55 s, flags=re.S)
56 new_lines.append(re.sub(r"\$([\w\-]+)(\([^()$]+\))?\$", 56 def re_escape(s):
57 lambda match: self.lookup_string(match.group(1), match.group(2)), 57 return re.escape(escape(s))
58 line)) 58
59 return new_lines 59 try:
60 60 result = localedata[name].strip()
61 def lookup_string(self, name, links): 61 except KeyError:
62 def escape(char): 62 raise Exception("Lookup failed for string %s used on page %s" % (name, sel f._params["page"]))
63 if char in self._escaped: 63
64 return "&#" + str(ord(char)) + ";" 64 # Insert links
65 else: 65 result = escape(result)
66 return char 66 while links:
67 67 result = re.sub(
68 try: 68 r"%s([^<>]*?)%s" % (re_escape("<a>"), re_escape("</a>")),
69 result = self._params["localedata"][name].strip() 69 r'<a href="%s">\1</a>' % links.pop(0),
70 except KeyError: 70 result, 1, flags=re.S
71 raise Exception("Lookup failed for string %s used on page %s" % (name, s elf._params["page"])) 71 )
72 72
73 result = re.sub(r".", lambda match: escape(match.group(0)), result, flags= re.S) 73 # <strong> and <em> tags are allowed
74 result = re.sub(
75 r"%s([^<>]*?)%s" % (re_escape("<strong>"), re_escape("</strong>")),
76 r"<strong>\1</strong>",
77 result, flags=re.S
78 )
79 result = re.sub(
80 r"%s([^<>]*?)%s" % (re_escape("<em>"), re_escape("</em>")),
81 r"<em>\1</em>",
82 result, flags=re.S
83 )
84 return result
85
86 def insert_localized_strings(self, text, escapes):
87 def lookup_string(match):
88 name, links = match.groups()
74 if links: 89 if links:
75 links = map(unicode.strip, links.strip("()").split(",")) 90 links = map(unicode.strip, links.strip("()").split(","))
76 while len(links): 91 else:
77 result = re.sub(r"&#60;a&#62;(.*?)&#60;/a&#62;", r'<a href="%s">\1</a> ' % links.pop(0), result, 1, flags=re.S) 92 links = []
78 return result 93 return self.localize_string(name, self._params["localedata"], escapes, lin ks)
79 94
80 class Linkifier(markdown.postprocessors.Postprocessor): 95 return re.sub(
81 def __init__(self, params): 96 r"\$([\w\-]+)(\([^()$]+\))?\$",
82 self._params = params 97 lookup_string,
83 98 text
84 def process_link(self, match): 99 )
100
101 def process_links(self, text):
102 def process_link(match):
85 pre, attr, url, post = match.groups() 103 pre, attr, url, post = match.groups()
86 url = jinja2.Markup(url).unescape() 104 url = jinja2.Markup(url).unescape()
87 105
88 locale, new_url = self._params["source"].resolve_link(url, self._params["l ocale"]) 106 locale, new_url = self._params["source"].resolve_link(url, self._params["l ocale"])
89 if new_url != None: 107 if new_url != None:
90 url = new_url 108 url = new_url
91 if attr == "href": 109 if attr == "href":
92 post += ' hreflang="%s"' % jinja2.Markup.escape(locale) 110 post += ' hreflang="%s"' % jinja2.Markup.escape(locale)
93 111
94 return "".join((pre, jinja2.Markup.escape(url), post)) 112 return "".join((pre, jinja2.Markup.escape(url), post))
95 113
96 def run(self, text): 114 text = re.sub(r"(<a\s[^<>]*\b(href)=\")([^<>\"]+)(\")", process_link, text)
97 text = re.sub(r"(<a [^<>]*\b(href)=\")([^<>\"]+)(\")", self.process_link, text) 115 text = re.sub(r"(<img\s[^<>]*\b(src)=\")([^<>\"]+)(\")", process_link, text)
98 text = re.sub(r"(<img [^<>]*\b(src)=\")([^<>\"]+)(\")", self.process_link, text) 116 return text
99 return text 117
100 118 def resolve_includes(self, text):
101 def __init__(self, params, key="pagedata"): 119 def resolve_include(match):
102 self._params = params 120 global converters
103 self._splithead = key == "pagedata" 121 name = match.group(1)
104 122 for format, converter_class in converters.iteritems():
105 self._md = markdown.Markdown(output="html5", extensions=["attr_list"]) 123 if self._params["source"].has_include(name, format):
106 self._md.preprocessors.add("localizer", self.Localizer(params), "_begin") 124 self._params["includedata"] = self._params["source"].read_include(name , format)
107 self._md.postprocessors.add("linkifier", self.Linkifier(params), "_end") 125 converter = converter_class(self._params, key="includedata")
108 126 return converter()
109 params["pagedata"] = params["pagedata"].decode("utf-8") 127 raise Exception("Failed to resolve include %s in page %s" % (name, self._p arams["page"]))
110 128
111 # Read in any parameters specified at the beginning of the file 129 return re.sub(r'<\?\s*include\s+([^\s<>"]+)\s*\?>', resolve_include, text)
112 lines = params["pagedata"].splitlines(True)
113 while len(lines) and re.search(r"^\s*[\w\-]+\s*=", lines[0]):
114 key, value = lines.pop(0).split("=", 1)
115 params[key.strip()] = value.strip()
116 params["pagedata"] = "".join(lines)
117 130
118 def __call__(self): 131 def __call__(self):
119 def beautify_entities(match): 132 result = self.get_html(self._params[self._key])
120 escape = { 133 result = self.resolve_includes(result)
121 "<": "&lt;", 134 if self._key == "pagedata":
122 ">": "&gt;", 135 head = []
123 "&": "&amp;", 136 def add_to_head(match):
124 "\"": "&quot;", 137 head.append(match.group(1))
125 "'": "&#39;", 138 return ""
126 } 139 body = re.sub(r"<head>(.*?)</head>", add_to_head, result, flags=re.S)
127 char = chr(int(match.group(1))) 140 return "".join(head), body
128 return escape.get(char, char)
129
130 result = self._md.convert(self._params["pagedata"])
131 result = re.sub(r"&#(\d+);", beautify_entities, result).encode("utf-8")
132
133 if self._splithead:
134 return split_head(result)
135 else: 141 else:
136 return result 142 return result
137 143
138 class TemplateConverter: 144 class RawConverter(Converter):
139 def __init__(self, params, key="pagedata"): 145 def get_html(self, source):
140 self._params = params 146 result = self.insert_localized_strings(source, html_escapes)
141 self._splithead = key == "pagedata" 147 result = self.process_links(result)
148 return result
149
150 class MarkdownConverter(Converter):
151 def get_html(self, source):
152 def remove_unnecessary_entities(match):
153 char = chr(int(match.group(1)))
154 if char in html_escapes:
155 return match.group(0)
156 else:
157 return char
158
159 escapes = {}
160 for char in markdown.Markdown.ESCAPED_CHARS:
161 escapes[char] = "&#" + str(ord(char)) + ";"
162 for key, value in html_escapes.iteritems():
163 escapes[key] = value
164
165 result = self.insert_localized_strings(source, escapes)
166 result = markdown.Markdown(output="html5", extensions=["attr_list"]).convert (result)
167 result = re.sub(r"&#(\d+);", remove_unnecessary_entities, result)
168 result = self.process_links(result)
169 return result
170
171 class TemplateConverter(Converter):
172 def __init__(self, *args, **kwargs):
173 Converter.__init__(self, *args, **kwargs)
174
142 filters = { 175 filters = {
143 "translate": self.translate, 176 "translate": self.translate,
144 "linkify": self.linkify, 177 "linkify": self.linkify,
145 "toclist": self.toclist, 178 "toclist": self.toclist,
146 } 179 }
147 env = get_custom_template_environment(filters) 180 self._env = get_custom_template_environment(filters)
148 self._template = env.from_string(params[key].decode("utf-8")) 181
149 182 def get_html(self, source):
150 def __call__(self): 183 template = self._env.from_string(source)
151 result = self._template.render(self._params).encode("utf-8") 184 return template.render(self._params)
152 if self._splithead: 185
153 return split_head(result) 186 def translate(self, name, page=None, links=[]):
154 else:
155 return result
156
157 def translate(self, name, page=None):
158 if page == None: 187 if page == None:
159 localedata = self._params["localedata"] 188 localedata = self._params["localedata"]
160 else: 189 else:
161 localedata = self._params["source"].read_locale(self._params["locale"], pa ge) 190 localedata = self._params["source"].read_locale(self._params["locale"], pa ge)
162 191 return jinja2.Markup(self.localize_string(name, localedata, html_escapes, li nks=links))
163 try:
164 return localedata[name]
165 except KeyError:
166 raise Exception("Lookup failed for string %s used on page %s" % (name, sel f._params["page"]))
167 192
168 def linkify(self, page, locale=None): 193 def linkify(self, page, locale=None):
169 if locale == None: 194 if locale == None:
170 locale = self._params["locale"] 195 locale = self._params["locale"]
171 196
172 locale, url = self._params["source"].resolve_link(page, locale) 197 locale, url = self._params["source"].resolve_link(page, locale)
173 return jinja2.Markup('<a href="%s" hreflang="%s">' % ( 198 return jinja2.Markup('<a href="%s" hreflang="%s">' % (
174 jinja2.Markup.escape(url), 199 jinja2.Markup.escape(url),
175 jinja2.Markup.escape(locale) 200 jinja2.Markup.escape(locale)
176 )) 201 ))
177 202
178 def toclist(self, content): 203 def toclist(self, content):
179 flat = [] 204 flat = []
180 for match in re.finditer(r'<h(\d) [^<>]*\bid="([^<>"]+)"[^<>]*>(.*?)</h\1>', content, re.S): 205 for match in re.finditer(r'<h(\d)\s[^<>]*\bid="([^<>"]+)"[^<>]*>(.*?)</h\1>' , content, re.S):
181 flat.append({ 206 flat.append({
182 "level": int(match.group(1)), 207 "level": int(match.group(1)),
183 "anchor": jinja2.Markup(match.group(2)).unescape(), 208 "anchor": jinja2.Markup(match.group(2)).unescape(),
184 "title": jinja2.Markup(match.group(3)).unescape(), 209 "title": jinja2.Markup(match.group(3)).unescape(),
185 "subitems": [], 210 "subitems": [],
186 }) 211 })
187 212
188 structured = [] 213 structured = []
189 stack = [{"level": 0, "subitems": structured}] 214 stack = [{"level": 0, "subitems": structured}]
190 for item in flat: 215 for item in flat:
191 while stack[-1]["level"] >= item["level"]: 216 while stack[-1]["level"] >= item["level"]:
192 stack.pop() 217 stack.pop()
193 stack[-1]["subitems"].append(item) 218 stack[-1]["subitems"].append(item)
194 stack.append(item) 219 stack.append(item)
195 return structured 220 return structured
196 221
197 converters = { 222 converters = {
223 "raw": RawConverter,
198 "md": MarkdownConverter, 224 "md": MarkdownConverter,
199 "tmpl": TemplateConverter, 225 "tmpl": TemplateConverter,
200 } 226 }
LEFTRIGHT

Powered by Google App Engine
This is Rietveld