sitescripts/cms/converters.py - Issue 4921490642305024: Issue 2129 - Remove CMS implementation from sitescripts repository

Side by Side Diff: sitescripts/cms/converters.py

Issue 4921490642305024: Issue 2129 - Remove CMS implementation from sitescripts repository (Closed)

Patch Set: Created March 13, 2015, 5:05 p.m.

Left:
Right:

Use n/p to move between diff chunks; N/P to move between comments.

Jump to:

View unified diff | Download patch

OLD	NEW
	(Empty)
1 # coding: utf-8

2

3 # This file is part of the Adblock Plus web scripts,

4 # Copyright (C) 2006-2015 Eyeo GmbH

5 #

6 # Adblock Plus is free software: you can redistribute it and/or modify

7 # it under the terms of the GNU General Public License version 3 as

8 # published by the Free Software Foundation.

9 #

10 # Adblock Plus is distributed in the hope that it will be useful,

11 # but WITHOUT ANY WARRANTY; without even the implied warranty of

12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

13 # GNU General Public License for more details.

14 #

15 # You should have received a copy of the GNU General Public License

16 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.

17

18 import os, imp, re, jinja2, markdown

19 from ..utils import get_custom_template_environment

20

21 # Monkey-patch Markdown's isBlockLevel function to ensure that no paragraphs are

22 # inserted into the <head> tag

23 orig_isBlockLevel = markdown.util.isBlockLevel

24 def isBlockLevel(tag):

25 if tag == "head":

26 return True

27 else:

28 return orig_isBlockLevel(tag)

29 markdown.util.isBlockLevel = isBlockLevel

30

31 html_escapes = {

32 "<": "<",

33 ">": ">",

34 "&": "&",

35 "\"": """,

36 "'": "'",

37 }

38

39 class Converter:

40 def __init__(self, params, key="pagedata"):

41 self._params = params

42 self._key = key

43

44 # Read in any parameters specified at the beginning of the file

45 lines = params[key].splitlines(True)

46 while lines and re.search(r"^\s[\w\-]+\s=", lines[0]):

47 name, value = lines.pop(0).split("=", 1)

48 params[name.strip()] = value.strip()

49 params[key] = "".join(lines)

50

51 def localize_string(self, name, localedata, escapes, links=[]):

52 def escape(s):

53 return re.sub(r".",

54 lambda match: escapes.get(match.group(0), match.group(0)),

55 s, flags=re.S)

56 def re_escape(s):

57 return re.escape(escape(s))

58

59 try:

60 result = localedata[name].strip()

61 except KeyError:

62 raise Exception("Lookup failed for string %s used on page %s" % (name, sel f._params["page"]))

63

64 # Insert links

65 result = escape(result)

66 while links:

67 result = re.sub(

68 r"%s([^<>]*?)%s" % (re_escape("<a>"), re_escape("</a>")),

69 r'<a href="%s">\1</a>' % links.pop(0),

70 result, 1, flags=re.S

71 )

72

73 # <strong> and <em> tags are allowed

74 result = re.sub(

75 r"%s([^<>]*?)%s" % (re_escape("<strong>"), re_escape("</strong>")),

76 r"<strong>\1</strong>",

77 result, flags=re.S

78 )

79 result = re.sub(

80 r"%s([^<>]*?)%s" % (re_escape("<em>"), re_escape("</em>")),

81 r"<em>\1</em>",

82 result, flags=re.S

83 )

84 return result

85

86 def insert_localized_strings(self, text, escapes):

87 def lookup_string(match):

88 name, links = match.groups()

89 if links:

90 links = map(unicode.strip, links.strip("()").split(","))

91 else:

92 links = []

93 return self.localize_string(name, self._params["localedata"], escapes, lin ks)

94

95 return re.sub(

96 r"\$([\w\-]+)($[^()$]+$)?\$",

97 lookup_string,

98 text

99 )

100

101 def process_links(self, text):

102 def process_link(match):

103 pre, attr, url, post = match.groups()

104 url = jinja2.Markup(url).unescape()

105

106 locale, new_url = self._params["source"].resolve_link(url, self._params["l ocale"])

107 if new_url != None:

108 url = new_url

109 if attr == "href":

110 post += ' hreflang="%s"' % jinja2.Markup.escape(locale)

111

112 return "".join((pre, jinja2.Markup.escape(url), post))

113

114 text = re.sub(r"(<a\s[^<>]*\b(href)=\")([^<>\"]+)(\")", process_link, text)

115 text = re.sub(r"(<img\s[^<>]*\b(src)=\")([^<>\"]+)(\")", process_link, text)

116 return text

117

118 include_start_regex = '<'

119 include_end_regex = '>'

120

121 def resolve_includes(self, text):

122 def resolve_include(match):

123 global converters

124 name = match.group(1)

125 for format, converter_class in converters.iteritems():

126 if self._params["source"].has_include(name, format):

127 self._params["includedata"] = self._params["source"].read_include(name , format)

128 converter = converter_class(self._params, key="includedata")

129 return converter()

130 raise Exception("Failed to resolve include %s in page %s" % (name, self._p arams["page"]))

131

132 return re.sub(

133 r'%s\?\sinclude\s+([^\s<>"]+)\s\?%s' % (

134 self.include_start_regex,

135 self.include_end_regex

136 ),

137 resolve_include,

138 text

139 )

140

141 def __call__(self):

142 result = self.get_html(self._params[self._key])

143 result = self.resolve_includes(result)

144 if self._key == "pagedata":

145 head = []

146 def add_to_head(match):

147 head.append(match.group(1))

148 return ""

149 body = re.sub(r"<head>(.*?)</head>", add_to_head, result, flags=re.S)

150 return "".join(head), body

151 else:

152 return result

153

154 class RawConverter(Converter):

155 def get_html(self, source):

156 result = self.insert_localized_strings(source, html_escapes)

157 result = self.process_links(result)

158 return result

159

160 class MarkdownConverter(Converter):

161 include_start_regex = r'(?:%s\|%s)' % (

162 Converter.include_start_regex,

163 re.escape(jinja2.escape(Converter.include_start_regex))

164 )

165 include_end_regex = r'(?:%s\|%s)' % (

166 Converter.include_end_regex,

167 re.escape(jinja2.escape(Converter.include_end_regex))

168 )

169

170 def get_html(self, source):

171 def remove_unnecessary_entities(match):

172 char = unichr(int(match.group(1)))

173 if char in html_escapes:

174 return match.group(0)

175 else:

176 return char

177

178 escapes = {}

179 for char in markdown.Markdown.ESCAPED_CHARS:

180 escapes[char] = "&#" + str(ord(char)) + ";"

181 for key, value in html_escapes.iteritems():

182 escapes[key] = value

183

184 md = markdown.Markdown(output="html5", extensions=["attr_list"])

185 md.preprocessors["html_block"].markdown_in_raw = True

186

187 result = self.insert_localized_strings(source, escapes)

188 result = md.convert(result)

189 result = re.sub(r"&#(\d+);", remove_unnecessary_entities, result)

190 result = self.process_links(result)

191 return result

192

193 class TemplateConverter(Converter):

194 class _SourceLoader(jinja2.BaseLoader):

195 def __init__(self, source):

196 self.source = source

197

198 def get_source(self, environment, template):

199 try:

200 return self.source.read_file(template + ".tmpl"), None, None

201 except Exception:

202 raise jinja2.TemplateNotFound(template)

203

204 def __init__(self, args, *kwargs):

205 Converter.__init__(self, args, *kwargs)

206

207 filters = {

208 "translate": self.translate,

209 "linkify": self.linkify,

210 "toclist": self.toclist,

211 }

212

213 for filename in self._params["source"].list_files("filters"):

214 root, ext = os.path.splitext(filename)

215 if ext.lower() != ".py":

216 continue

217

218 path = "%s/%s" % ("filters", filename)

219 code = self._params["source"].read_file(path)

220 module = imp.new_module(root.replace("/", "."))

221 exec code in module.__dict__

222

223 func = os.path.basename(root)

224 if not hasattr(module, func):

225 raise Exception("Expected function %s not found in filter file %s" % (fu nc, filename))

226 filters[func] = getattr(module, func)

227 filters[func].module_ref = module # Prevent garbage collection

228

229 self._env = get_custom_template_environment(filters, self._SourceLoader(self ._params["source"]))

230

231 def get_html(self, source):

232 template = self._env.from_string(source)

233 return template.render(self._params)

234

235 def translate(self, name, page=None, links=[]):

236 if page == None:

237 localedata = self._params["localedata"]

238 else:

239 localedata = self._params["source"].read_locale(self._params["locale"], pa ge)

240 return jinja2.Markup(self.localize_string(name, localedata, html_escapes, li nks=links))

241

242 def linkify(self, page, locale=None, **attrs):

243 if locale == None:

244 locale = self._params["locale"]

245

246 locale, url = self._params["source"].resolve_link(page, locale)

247 return jinja2.Markup('<a%s>' % ''.join(

248 ' %s="%s"' % (name, jinja2.escape(value)) for name, value in [

249 ('href', url),

250 ('hreflang', locale)

251 ] + attrs.items()

252 ))

253

254 def toclist(self, content):

255 flat = []

256 for match in re.finditer(r'<h(\d)\s[^<>]\bid="([^<>"]+)"[^<>]>(.*?)</h\1>' , content, re.S):

257 flat.append({

258 "level": int(match.group(1)),

259 "anchor": jinja2.Markup(match.group(2)).unescape(),

260 "title": jinja2.Markup(match.group(3)).unescape(),

261 "subitems": [],

262 })

263

264 structured = []

265 stack = [{"level": 0, "subitems": structured}]

266 for item in flat:

267 while stack[-1]["level"] >= item["level"]:

268 stack.pop()

269 stack[-1]["subitems"].append(item)

270 stack.append(item)

271 return structured

272

273 converters = {

274 "raw": RawConverter,

275 "md": MarkdownConverter,

276 "tmpl": TemplateConverter,

277 }

OLD	NEW

« no previous file with comments | « sitescripts/cms/bin/test_server.py ('k') | sitescripts/cms/runserver.spec » ('j') | no next file with comments »