OLD | NEW |
(Empty) | |
| 1 # coding: utf-8 |
| 2 |
| 3 # This file is part of the Adblock Plus web scripts, |
| 4 # Copyright (C) 2006-2013 Eyeo GmbH |
| 5 # |
| 6 # Adblock Plus is free software: you can redistribute it and/or modify |
| 7 # it under the terms of the GNU General Public License version 3 as |
| 8 # published by the Free Software Foundation. |
| 9 # |
| 10 # Adblock Plus is distributed in the hope that it will be useful, |
| 11 # but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 13 # GNU General Public License for more details. |
| 14 # |
| 15 # You should have received a copy of the GNU General Public License |
| 16 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. |
| 17 |
| 18 import re, jinja2, markdown |
| 19 from sitescripts.utils import cached, setupStderr, get_custom_template_environme
nt |
| 20 |
| 21 # Monkey-patch Markdown's isBlockLevel function to ensure that no paragraphs are |
| 22 # inserted into the <head> tag |
| 23 orig_isBlockLevel = markdown.util.isBlockLevel |
| 24 def isBlockLevel(tag): |
| 25 if tag == "head": |
| 26 return True |
| 27 else: |
| 28 return orig_isBlockLevel(tag) |
| 29 markdown.util.isBlockLevel = isBlockLevel |
| 30 |
| 31 def split_head(text): |
| 32 head = [] |
| 33 def add_to_head(match): |
| 34 head.append(match.group(1)) |
| 35 return "" |
| 36 body = re.sub(r"<head>(.*?)</head>", |
| 37 add_to_head, text, flags=re.S) |
| 38 return "".join(head), body |
| 39 |
| 40 class MarkdownConverter: |
| 41 class Localizer(markdown.preprocessors.Preprocessor): |
| 42 def __init__(self, params): |
| 43 self._params = params |
| 44 |
| 45 self._escaped = set(markdown.Markdown.ESCAPED_CHARS) |
| 46 self._escaped.add("<"); |
| 47 self._escaped.add(">"); |
| 48 self._escaped.add("&"); |
| 49 self._escaped.add("\""); |
| 50 self._escaped.add("'"); |
| 51 |
| 52 def run(self, lines): |
| 53 new_lines = [] |
| 54 for line in lines: |
| 55 # Replace localized strings |
| 56 new_lines.append(re.sub(r"\$([\w\-]+)(\([^()$]+\))?\$", |
| 57 lambda match: self.lookup_string(match.group(1), match.group(2)), |
| 58 line)) |
| 59 return new_lines |
| 60 |
| 61 def lookup_string(self, name, links): |
| 62 def escape(char): |
| 63 if char in self._escaped: |
| 64 return "&#" + str(ord(char)) + ";" |
| 65 else: |
| 66 return char |
| 67 |
| 68 try: |
| 69 result = self._params["localedata"][name].strip() |
| 70 except KeyError: |
| 71 raise Exception("Lookup failed for string %s used on page %s" % (name, s
elf._params["page"])) |
| 72 |
| 73 result = re.sub(r".", lambda match: escape(match.group(0)), result, flags=
re.S) |
| 74 if links: |
| 75 links = map(unicode.strip, links.strip("()").split(",")) |
| 76 while len(links): |
| 77 result = re.sub(r"<a>(.*?)</a>", r'<a href="%s">\1</a>
' % links.pop(0), result, 1, flags=re.S) |
| 78 return result |
| 79 |
| 80 class Linkifier(markdown.postprocessors.Postprocessor): |
| 81 def __init__(self, params): |
| 82 self._params = params |
| 83 |
| 84 def process_link(self, match): |
| 85 pre, attr, url, post = match.groups() |
| 86 url = jinja2.Markup(url).unescape() |
| 87 |
| 88 locale, new_url = self._params["source"].resolve_link(url, self._params["l
ocale"]) |
| 89 if new_url != None: |
| 90 url = new_url |
| 91 if attr == "href": |
| 92 post += ' hreflang="%s"' % jinja2.Markup.escape(locale) |
| 93 |
| 94 return "".join((pre, jinja2.Markup.escape(url), post)) |
| 95 |
| 96 def run(self, text): |
| 97 text = re.sub(r"(<a [^<>]*\b(href)=\")([^<>\"]+)(\")", self.process_link,
text) |
| 98 text = re.sub(r"(<img [^<>]*\b(src)=\")([^<>\"]+)(\")", self.process_link,
text) |
| 99 return text |
| 100 |
| 101 def __init__(self, params, key="pagedata"): |
| 102 self._params = params |
| 103 self._splithead = key == "pagedata" |
| 104 |
| 105 self._md = markdown.Markdown(output="html5", extensions=["attr_list"]) |
| 106 self._md.preprocessors.add("localizer", self.Localizer(params), "_begin") |
| 107 self._md.postprocessors.add("linkifier", self.Linkifier(params), "_end") |
| 108 |
| 109 params["pagedata"] = params["pagedata"].decode("utf-8") |
| 110 |
| 111 # Read in any parameters specified at the beginning of the file |
| 112 lines = params["pagedata"].splitlines(True) |
| 113 while len(lines) and re.search(r"^\s*[\w\-]+\s*=", lines[0]): |
| 114 key, value = lines.pop(0).split("=", 1) |
| 115 params[key.strip()] = value.strip() |
| 116 params["pagedata"] = "".join(lines) |
| 117 |
| 118 def __call__(self): |
| 119 def beautify_entities(match): |
| 120 escape = { |
| 121 "<": "<", |
| 122 ">": ">", |
| 123 "&": "&", |
| 124 "\"": """, |
| 125 "'": "'", |
| 126 } |
| 127 char = chr(int(match.group(1))) |
| 128 return escape.get(char, char) |
| 129 |
| 130 result = self._md.convert(self._params["pagedata"]) |
| 131 result = re.sub(r"&#(\d+);", beautify_entities, result).encode("utf-8") |
| 132 |
| 133 if self._splithead: |
| 134 return split_head(result) |
| 135 else: |
| 136 return result |
| 137 |
| 138 class TemplateConverter: |
| 139 def __init__(self, params, key="pagedata"): |
| 140 self._params = params |
| 141 self._splithead = key == "pagedata" |
| 142 filters = { |
| 143 "translate": self.translate, |
| 144 "linkify": self.linkify, |
| 145 "toclist": self.toclist, |
| 146 } |
| 147 env = get_custom_template_environment(filters) |
| 148 self._template = env.from_string(params[key].decode("utf-8")) |
| 149 |
| 150 def __call__(self): |
| 151 result = self._template.render(self._params).encode("utf-8") |
| 152 if self._splithead: |
| 153 return split_head(result) |
| 154 else: |
| 155 return result |
| 156 |
| 157 def translate(self, name, page=None): |
| 158 if page == None: |
| 159 localedata = self._params["localedata"] |
| 160 else: |
| 161 localedata = self._params["source"].read_locale(self._params["locale"], pa
ge) |
| 162 |
| 163 try: |
| 164 return localedata[name] |
| 165 except KeyError: |
| 166 raise Exception("Lookup failed for string %s used on page %s" % (name, sel
f._params["page"])) |
| 167 |
| 168 def linkify(self, page, locale=None): |
| 169 if locale == None: |
| 170 locale = self._params["locale"] |
| 171 |
| 172 locale, url = self._params["source"].resolve_link(page, locale) |
| 173 return jinja2.Markup('<a href="%s" hreflang="%s">' % ( |
| 174 jinja2.Markup.escape(url), |
| 175 jinja2.Markup.escape(locale) |
| 176 )) |
| 177 |
| 178 def toclist(self, content): |
| 179 flat = [] |
| 180 for match in re.finditer(r'<h(\d) [^<>]*\bid="([^<>"]+)"[^<>]*>(.*?)</h\1>',
content, re.S): |
| 181 flat.append({ |
| 182 "level": int(match.group(1)), |
| 183 "anchor": jinja2.Markup(match.group(2)).unescape(), |
| 184 "title": jinja2.Markup(match.group(3)).unescape(), |
| 185 "subitems": [], |
| 186 }) |
| 187 |
| 188 structured = [] |
| 189 stack = [{"level": 0, "subitems": structured}] |
| 190 for item in flat: |
| 191 while stack[-1]["level"] >= item["level"]: |
| 192 stack.pop() |
| 193 stack[-1]["subitems"].append(item) |
| 194 stack.append(item) |
| 195 return structured |
| 196 |
| 197 converters = { |
| 198 "md": MarkdownConverter, |
| 199 "tmpl": TemplateConverter, |
| 200 } |
OLD | NEW |