Index: sitescripts/web/converters.py |
=================================================================== |
new file mode 100644 |
--- /dev/null |
+++ b/sitescripts/web/converters.py |
@@ -0,0 +1,200 @@ |
+# coding: utf-8 |
+ |
+# This file is part of the Adblock Plus web scripts, |
+# Copyright (C) 2006-2013 Eyeo GmbH |
+# |
+# Adblock Plus is free software: you can redistribute it and/or modify |
+# it under the terms of the GNU General Public License version 3 as |
+# published by the Free Software Foundation. |
+# |
+# Adblock Plus is distributed in the hope that it will be useful, |
+# but WITHOUT ANY WARRANTY; without even the implied warranty of |
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
+# GNU General Public License for more details. |
+# |
+# You should have received a copy of the GNU General Public License |
+# along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. |
+ |
+import re, jinja2, markdown |
+from sitescripts.utils import cached, setupStderr, get_custom_template_environment |
+ |
+# Monkey-patch Markdown's isBlockLevel function to ensure that no paragraphs are |
+# inserted into the <head> tag |
+orig_isBlockLevel = markdown.util.isBlockLevel |
+def isBlockLevel(tag): |
+ if tag == "head": |
+ return True |
+ else: |
+ return orig_isBlockLevel(tag) |
+markdown.util.isBlockLevel = isBlockLevel |
+ |
+def split_head(text): |
+ head = [] |
+ def add_to_head(match): |
+ head.append(match.group(1)) |
+ return "" |
+ body = re.sub(r"<head>(.*?)</head>", |
+ add_to_head, text, flags=re.S) |
+ return "".join(head), body |
+ |
+class MarkdownConverter: |
+ class Localizer(markdown.preprocessors.Preprocessor): |
+ def __init__(self, params): |
+ self._params = params |
+ |
+ self._escaped = set(markdown.Markdown.ESCAPED_CHARS) |
+ self._escaped.add("<"); |
+ self._escaped.add(">"); |
+ self._escaped.add("&"); |
+ self._escaped.add("\""); |
+ self._escaped.add("'"); |
+ |
+ def run(self, lines): |
+ new_lines = [] |
+ for line in lines: |
+ # Replace localized strings |
+ new_lines.append(re.sub(r"\$([\w\-]+)(\([^()$]+\))?\$", |
+ lambda match: self.lookup_string(match.group(1), match.group(2)), |
+ line)) |
+ return new_lines |
+ |
+ def lookup_string(self, name, links): |
+ def escape(char): |
+ if char in self._escaped: |
+ return "&#" + str(ord(char)) + ";" |
+ else: |
+ return char |
+ |
+ try: |
+ result = self._params["localedata"][name].strip() |
+ except KeyError: |
+ raise Exception("Lookup failed for string %s used on page %s" % (name, self._params["page"])) |
+ |
+ result = re.sub(r".", lambda match: escape(match.group(0)), result, flags=re.S) |
+ if links: |
+ links = map(unicode.strip, links.strip("()").split(",")) |
+ while len(links): |
+ result = re.sub(r"<a>(.*?)</a>", r'<a href="%s">\1</a>' % links.pop(0), result, 1, flags=re.S) |
+ return result |
+ |
+ class Linkifier(markdown.postprocessors.Postprocessor): |
+ def __init__(self, params): |
+ self._params = params |
+ |
+ def process_link(self, match): |
+ pre, attr, url, post = match.groups() |
+ url = jinja2.Markup(url).unescape() |
+ |
+ locale, new_url = self._params["source"].resolve_link(url, self._params["locale"]) |
+ if new_url != None: |
+ url = new_url |
+ if attr == "href": |
+ post += ' hreflang="%s"' % jinja2.Markup.escape(locale) |
+ |
+ return "".join((pre, jinja2.Markup.escape(url), post)) |
+ |
+ def run(self, text): |
+ text = re.sub(r"(<a [^<>]*\b(href)=\")([^<>\"]+)(\")", self.process_link, text) |
+ text = re.sub(r"(<img [^<>]*\b(src)=\")([^<>\"]+)(\")", self.process_link, text) |
+ return text |
+ |
+ def __init__(self, params, key="pagedata"): |
+ self._params = params |
+ self._splithead = key == "pagedata" |
+ |
+ self._md = markdown.Markdown(output="html5", extensions=["attr_list"]) |
+ self._md.preprocessors.add("localizer", self.Localizer(params), "_begin") |
+ self._md.postprocessors.add("linkifier", self.Linkifier(params), "_end") |
+ |
+ params["pagedata"] = params["pagedata"].decode("utf-8") |
+ |
+ # Read in any parameters specified at the beginning of the file |
+ lines = params["pagedata"].splitlines(True) |
+ while len(lines) and re.search(r"^\s*[\w\-]+\s*=", lines[0]): |
+ key, value = lines.pop(0).split("=", 1) |
+ params[key.strip()] = value.strip() |
+ params["pagedata"] = "".join(lines) |
+ |
+ def __call__(self): |
+ def beautify_entities(match): |
+ escape = { |
+ "<": "<", |
+ ">": ">", |
+ "&": "&", |
+ "\"": """, |
+ "'": "'", |
+ } |
+ char = chr(int(match.group(1))) |
+ return escape.get(char, char) |
+ |
+ result = self._md.convert(self._params["pagedata"]) |
+ result = re.sub(r"&#(\d+);", beautify_entities, result).encode("utf-8") |
+ |
+ if self._splithead: |
+ return split_head(result) |
+ else: |
+ return result |
+ |
+class TemplateConverter: |
+ def __init__(self, params, key="pagedata"): |
+ self._params = params |
+ self._splithead = key == "pagedata" |
+ filters = { |
+ "translate": self.translate, |
+ "linkify": self.linkify, |
+ "toclist": self.toclist, |
+ } |
+ env = get_custom_template_environment(filters) |
+ self._template = env.from_string(params[key].decode("utf-8")) |
+ |
+ def __call__(self): |
+ result = self._template.render(self._params).encode("utf-8") |
+ if self._splithead: |
+ return split_head(result) |
+ else: |
+ return result |
+ |
+ def translate(self, name, page=None): |
+ if page == None: |
+ localedata = self._params["localedata"] |
+ else: |
+ localedata = self._params["source"].read_locale(self._params["locale"], page) |
+ |
+ try: |
+ return localedata[name] |
+ except KeyError: |
+ raise Exception("Lookup failed for string %s used on page %s" % (name, self._params["page"])) |
+ |
+ def linkify(self, page, locale=None): |
+ if locale == None: |
+ locale = self._params["locale"] |
+ |
+ locale, url = self._params["source"].resolve_link(page, locale) |
+ return jinja2.Markup('<a href="%s" hreflang="%s">' % ( |
+ jinja2.Markup.escape(url), |
+ jinja2.Markup.escape(locale) |
+ )) |
+ |
+ def toclist(self, content): |
+ flat = [] |
+ for match in re.finditer(r'<h(\d) [^<>]*\bid="([^<>"]+)"[^<>]*>(.*?)</h\1>', content, re.S): |
+ flat.append({ |
+ "level": int(match.group(1)), |
+ "anchor": jinja2.Markup(match.group(2)).unescape(), |
+ "title": jinja2.Markup(match.group(3)).unescape(), |
+ "subitems": [], |
+ }) |
+ |
+ structured = [] |
+ stack = [{"level": 0, "subitems": structured}] |
+ for item in flat: |
+ while stack[-1]["level"] >= item["level"]: |
+ stack.pop() |
+ stack[-1]["subitems"].append(item) |
+ stack.append(item) |
+ return structured |
+ |
+converters = { |
+ "md": MarkdownConverter, |
+ "tmpl": TemplateConverter, |
+} |