sitescripts/web/converters.py - Issue 17817001: Simple CMS as Anwiki replacement

Unified Diff: sitescripts/web/converters.py

Issue 17817001: Simple CMS as Anwiki replacement (Closed)

Patch Set: Fixed MIME type Created Nov. 4, 2013, 4:11 p.m.

Use n/p to move between diff chunks; N/P to move between comments.

Jump to:

View side-by-side diff with in-line comments

Index: sitescripts/web/converters.py

===================================================================

new file mode 100644

--- /dev/null

+++ b/sitescripts/web/converters.py

@@ -0,0 +1,226 @@

+# coding: utf-8

+# This file is part of the Adblock Plus web scripts,

+# Adblock Plus is free software: you can redistribute it and/or modify

+# it under the terms of the GNU General Public License version 3 as

+# published by the Free Software Foundation.

+# Adblock Plus is distributed in the hope that it will be useful,

+# but WITHOUT ANY WARRANTY; without even the implied warranty of

+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

+# GNU General Public License for more details.

+# You should have received a copy of the GNU General Public License

+# along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.

+import re, jinja2, markdown

+from ..utils import get_custom_template_environment

+# Monkey-patch Markdown's isBlockLevel function to ensure that no paragraphs are

+# inserted into the <head> tag

+orig_isBlockLevel = markdown.util.isBlockLevel

+def isBlockLevel(tag):

+ if tag == "head":

+ return True

+ else:

+ return orig_isBlockLevel(tag)

+markdown.util.isBlockLevel = isBlockLevel

+html_escapes = {

+ "<": "<",

+ ">": ">",

+ "&": "&",

+ "\"": """,

+ "'": "'",

+class Converter:

+ def __init__(self, params, key="pagedata"):

+ self._params = params

+ self._key = key

+ # Read in any parameters specified at the beginning of the file

+ lines = params[key].splitlines(True)

+ while lines and re.search(r"^\s*[\w\-]+\s*=", lines[0]):

+ name, value = lines.pop(0).split("=", 1)

+ params[name.strip()] = value.strip()

+ params[key] = "".join(lines)

+ def localize_string(self, name, localedata, escapes, links=[]):

+ def escape(s):

+ return re.sub(r".",

+ lambda match: escapes.get(match.group(0), match.group(0)),

+ s, flags=re.S)

+ def re_escape(s):

+ return re.escape(escape(s))

+ try:

+ result = localedata[name].strip()

+ except KeyError:

+ raise Exception("Lookup failed for string %s used on page %s" % (name, self._params["page"]))

+ # Insert links

+ result = escape(result)

+ while links:

+ result = re.sub(

+ r"%s([^<>]*?)%s" % (re_escape("<a>"), re_escape("</a>")),

+ r'<a href="%s">\1</a>' % links.pop(0),

+ result, 1, flags=re.S

+ )

+ # and tags are allowed

+ result = re.sub(

+ r"%s([^<>]*?)%s" % (re_escape(""), re_escape("")),

+ r"\1",

+ result, flags=re.S

+ )

+ result = re.sub(

+ r"%s([^<>]*?)%s" % (re_escape(""), re_escape("")),

+ r"\1",

+ result, flags=re.S

+ )

+ return result

+ def insert_localized_strings(self, text, escapes):

+ def lookup_string(match):

+ name, links = match.groups()

+ if links:

+ links = map(unicode.strip, links.strip("()").split(","))

+ else:

+ links = []

+ return self.localize_string(name, self._params["localedata"], escapes, links)

+ return re.sub(

+ r"\$([\w\-]+)($[^()$]+$)?\$",

+ lookup_string,

+ text

+ )

+ def process_links(self, text):

+ def process_link(match):

+ pre, attr, url, post = match.groups()

+ url = jinja2.Markup(url).unescape()

+ locale, new_url = self._params["source"].resolve_link(url, self._params["locale"])

+ if new_url != None:

+ url = new_url

+ if attr == "href":

+ post += ' hreflang="%s"' % jinja2.Markup.escape(locale)

+ return "".join((pre, jinja2.Markup.escape(url), post))

+ text = re.sub(r"(<a\s[^<>]*\b(href)=\")([^<>\"]+)(\")", process_link, text)

+ text = re.sub(r"(<img\s[^<>]*\b(src)=\")([^<>\"]+)(\")", process_link, text)

+ return text

+ def resolve_includes(self, text):

+ def resolve_include(match):

+ global converters

+ name = match.group(1)

+ for format, converter_class in converters.iteritems():

+ if self._params["source"].has_include(name, format):

+ self._params["includedata"] = self._params["source"].read_include(name, format)

+ converter = converter_class(self._params, key="includedata")

+ return converter()

+ raise Exception("Failed to resolve include %s in page %s" % (name, self._params["page"]))

+ return re.sub(r'<\?\s*include\s+([^\s<>"]+)\s*\?>', resolve_include, text)

+ def __call__(self):

+ result = self.get_html(self._params[self._key])

+ result = self.resolve_includes(result)

+ if self._key == "pagedata":

+ head = []

+ def add_to_head(match):

+ head.append(match.group(1))

+ return ""

+ body = re.sub(r"<head>(.*?)</head>", add_to_head, result, flags=re.S)

+ return "".join(head), body

+ else:

+ return result

+class RawConverter(Converter):

+ def get_html(self, source):

+ result = self.insert_localized_strings(source, html_escapes)

+ result = self.process_links(result)

+ return result

+class MarkdownConverter(Converter):

+ def get_html(self, source):

+ def remove_unnecessary_entities(match):

+ char = chr(int(match.group(1)))

+ if char in html_escapes:

+ return match.group(0)

+ else:

+ return char

+ escapes = {}

+ for char in markdown.Markdown.ESCAPED_CHARS:

+ escapes[char] = "&#" + str(ord(char)) + ";"

+ for key, value in html_escapes.iteritems():

+ escapes[key] = value

+ result = self.insert_localized_strings(source, escapes)

+ result = markdown.Markdown(output="html5", extensions=["attr_list"]).convert(result)

+ result = re.sub(r"&#(\d+);", remove_unnecessary_entities, result)

+ result = self.process_links(result)

+ return result

+class TemplateConverter(Converter):

+ def __init__(self, *args, **kwargs):

+ Converter.__init__(self, *args, **kwargs)

+ filters = {

+ "translate": self.translate,

+ "linkify": self.linkify,

+ "toclist": self.toclist,

+ }

+ self._env = get_custom_template_environment(filters)

+ def get_html(self, source):

+ template = self._env.from_string(source)

+ return template.render(self._params)

+ def translate(self, name, page=None, links=[]):

+ if page == None:

+ localedata = self._params["localedata"]

+ else:

+ localedata = self._params["source"].read_locale(self._params["locale"], page)

+ return jinja2.Markup(self.localize_string(name, localedata, html_escapes, links=links))

+ def linkify(self, page, locale=None):

+ if locale == None:

+ locale = self._params["locale"]

+ locale, url = self._params["source"].resolve_link(page, locale)

+ return jinja2.Markup('<a href="%s" hreflang="%s">' % (

+ jinja2.Markup.escape(url),

+ jinja2.Markup.escape(locale)

+ ))

+ def toclist(self, content):

+ flat = []

+ for match in re.finditer(r'<h(\d)\s[^<>]*\bid="([^<>"]+)"[^<>]*>(.*?)</h\1>', content, re.S):

+ flat.append({

+ "level": int(match.group(1)),

+ "anchor": jinja2.Markup(match.group(2)).unescape(),

+ "title": jinja2.Markup(match.group(3)).unescape(),

+ "subitems": [],

+ })

+ structured = []

+ stack = [{"level": 0, "subitems": structured}]

+ for item in flat:

+ while stack[-1]["level"] >= item["level"]:

+ stack.pop()

+ stack[-1]["subitems"].append(item)

+ stack.append(item)

+ return structured

+converters = {

+ "raw": RawConverter,

+ "md": MarkdownConverter,

+ "tmpl": TemplateConverter,

« no previous file with comments | « sitescripts/web/bin/test_server.py ('k') | sitescripts/web/sources.py » ('j') | no next file with comments »