Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Side by Side Diff: sitescripts/web/converters.py

Issue 17817001: Simple CMS as Anwiki replacement (Closed)
Patch Set: Completed functionality Created Oct. 24, 2013, 9:32 p.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 # coding: utf-8
2
3 # This file is part of the Adblock Plus web scripts,
4 # Copyright (C) 2006-2013 Eyeo GmbH
5 #
6 # Adblock Plus is free software: you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License version 3 as
8 # published by the Free Software Foundation.
9 #
10 # Adblock Plus is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
14 #
15 # You should have received a copy of the GNU General Public License
16 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.
17
18 import re, jinja2, markdown
19 from ..utils import get_custom_template_environment
20
21 # Monkey-patch Markdown's isBlockLevel function to ensure that no paragraphs are
22 # inserted into the <head> tag
23 orig_isBlockLevel = markdown.util.isBlockLevel
24 def isBlockLevel(tag):
25 if tag == "head":
26 return True
27 else:
28 return orig_isBlockLevel(tag)
29 markdown.util.isBlockLevel = isBlockLevel
30
31 html_escapes = {
32 "<": "&lt;",
33 ">": "&gt;",
34 "&": "&amp;",
35 "\"": "&quot;",
36 "'": "&#39;",
37 }
38
39 class Converter:
40 def __init__(self, params, key="pagedata"):
41 self._params = params
42 self._key = key
43
44 # Read in any parameters specified at the beginning of the file
45 lines = params[key].splitlines(True)
46 while len(lines) and re.search(r"^\s*[\w\-]+\s*=", lines[0]):
47 name, value = lines.pop(0).split("=", 1)
48 params[name.strip()] = value.strip()
49 params[key] = "".join(lines)
50
51 def insert_localized_strings(self, text, escapes):
52 def escape(s):
53 return re.sub(r".",
54 lambda match: escapes.get(match.group(0), match.group(0)),
55 s, flags=re.S)
56
57 def lookup_string(match):
58 name, links = match.groups()
59 try:
60 result = self._params["localedata"][name].strip()
61 except KeyError:
62 raise Exception("Lookup failed for string %s used on page %s" % (name, s elf._params["page"]))
63
64 result = escape(result)
65 if links:
66 links = map(unicode.strip, links.strip("()").split(","))
67 while len(links):
Sebastian Noack 2013/10/29 11:04:17 len() isn't needed here. Lists evaluate to True wh
68 result = re.sub(
69 r"%s(.*?)%s" % (escape("<a>"), escape("</a>")),
70 r'<a href="%s">\1</a>' % links.pop(0),
71 result, 1, flags=re.S
72 )
73 return result
74
75 return re.sub(
76 r"\$([\w\-]+)(\([^()$]+\))?\$",
77 lookup_string,
78 text
79 )
80
81 def process_links(self, text):
82 def process_link(match):
83 pre, attr, url, post = match.groups()
84 url = jinja2.Markup(url).unescape()
85
86 locale, new_url = self._params["source"].resolve_link(url, self._params["l ocale"])
87 if new_url != None:
88 url = new_url
89 if attr == "href":
90 post += ' hreflang="%s"' % jinja2.Markup.escape(locale)
91
92 return "".join((pre, jinja2.Markup.escape(url), post))
93
94 text = re.sub(r"(<a [^<>]*\b(href)=\")([^<>\"]+)(\")", process_link, text)
95 text = re.sub(r"(<img [^<>]*\b(src)=\")([^<>\"]+)(\")", process_link, text)
96 return text
97
98 def resolve_includes(self, text):
99 def resolve_include(match):
100 global converters
101 name = match.group(1)
102 for format, converter_class in converters.iteritems():
103 if self._params["source"].has_include(name, format):
104 self._params["includedata"] = self._params["source"].read_include(name , format)
105 converter = converter_class(self._params, key="includedata")
106 return converter()
107 raise Exception("Failed to resolve include %s in page %s" % (name, self._p arams["page"]))
108
109 return re.sub(r'<\?\s*include\s+([^\s<>"]+)\s*\?>', resolve_include, text)
110
111 def __call__(self):
112 result = self.get_html(self._params[self._key])
113 result = self.resolve_includes(result)
114 if self._key == "pagedata":
115 head = []
116 def add_to_head(match):
117 head.append(match.group(1))
118 return ""
119 body = re.sub(r"<head>(.*?)</head>", add_to_head, result, flags=re.S)
120 return "".join(head), body
121 else:
122 return result
123
124 class RawConverter(Converter):
125 def get_html(self, source):
126 result = self.insert_localized_strings(source, html_escapes)
127 result = self.process_links(result)
128 return result
129
130 class MarkdownConverter(Converter):
131 def get_html(self, source):
132 def remove_unnecessary_entities(match):
133 char = chr(int(match.group(1)))
134 if char in html_escapes:
135 return match.group(0)
136 else:
137 return char
138
139 escapes = {}
140 for char in markdown.Markdown.ESCAPED_CHARS:
141 escapes[char] = "&#" + str(ord(char)) + ";"
142 for key, value in html_escapes.iteritems():
143 escapes[key] = value
144
145 result = self.insert_localized_strings(source, escapes)
146 result = markdown.Markdown(output="html5", extensions=["attr_list"]).convert (result)
147 result = re.sub(r"&#(\d+);", remove_unnecessary_entities, result)
148 result = self.process_links(result)
149 return result
150
151 class TemplateConverter(Converter):
152 def __init__(self, *args, **kwargs):
153 Converter.__init__(self, *args, **kwargs)
154
155 filters = {
156 "translate": self.translate,
157 "linkify": self.linkify,
158 "toclist": self.toclist,
159 }
160 self._env = get_custom_template_environment(filters)
161
162 def get_html(self, source):
163 template = self._env.from_string(source)
164 return template.render(self._params)
165
166 def translate(self, name, page=None):
167 if page == None:
168 localedata = self._params["localedata"]
169 else:
170 localedata = self._params["source"].read_locale(self._params["locale"], pa ge)
171
172 try:
173 return localedata[name]
174 except KeyError:
175 raise Exception("Lookup failed for string %s used on page %s" % (name, sel f._params["page"]))
176
177 def linkify(self, page, locale=None):
178 if locale == None:
179 locale = self._params["locale"]
180
181 locale, url = self._params["source"].resolve_link(page, locale)
182 return jinja2.Markup('<a href="%s" hreflang="%s">' % (
183 jinja2.Markup.escape(url),
184 jinja2.Markup.escape(locale)
185 ))
186
187 def toclist(self, content):
188 flat = []
189 for match in re.finditer(r'<h(\d) [^<>]*\bid="([^<>"]+)"[^<>]*>(.*?)</h\1>', content, re.S):
Sebastian Noack 2013/10/29 11:04:17 I would use \s instead of a whitespace, after the
190 flat.append({
191 "level": int(match.group(1)),
192 "anchor": jinja2.Markup(match.group(2)).unescape(),
193 "title": jinja2.Markup(match.group(3)).unescape(),
194 "subitems": [],
195 })
196
197 structured = []
198 stack = [{"level": 0, "subitems": structured}]
199 for item in flat:
200 while stack[-1]["level"] >= item["level"]:
201 stack.pop()
202 stack[-1]["subitems"].append(item)
203 stack.append(item)
204 return structured
205
206 converters = {
207 "raw": RawConverter,
208 "md": MarkdownConverter,
209 "tmpl": TemplateConverter,
210 }
OLDNEW

Powered by Google App Engine
This is Rietveld