| Left: | ||
| Right: |
| OLD | NEW |
|---|---|
| (Empty) | |
| 1 # coding: utf-8 | |
| 2 | |
| 3 # This file is part of the Adblock Plus web scripts, | |
| 4 # Copyright (C) 2006-2013 Eyeo GmbH | |
| 5 # | |
| 6 # Adblock Plus is free software: you can redistribute it and/or modify | |
| 7 # it under the terms of the GNU General Public License version 3 as | |
| 8 # published by the Free Software Foundation. | |
| 9 # | |
| 10 # Adblock Plus is distributed in the hope that it will be useful, | |
| 11 # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| 12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
| 13 # GNU General Public License for more details. | |
| 14 # | |
| 15 # You should have received a copy of the GNU General Public License | |
| 16 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. | |
| 17 | |
| 18 import re, jinja2, markdown | |
| 19 from ..utils import get_custom_template_environment | |
| 20 | |
| 21 # Monkey-patch Markdown's isBlockLevel function to ensure that no paragraphs are | |
| 22 # inserted into the <head> tag | |
| 23 orig_isBlockLevel = markdown.util.isBlockLevel | |
| 24 def isBlockLevel(tag): | |
| 25 if tag == "head": | |
| 26 return True | |
| 27 else: | |
| 28 return orig_isBlockLevel(tag) | |
| 29 markdown.util.isBlockLevel = isBlockLevel | |
| 30 | |
| 31 html_escapes = { | |
| 32 "<": "<", | |
| 33 ">": ">", | |
| 34 "&": "&", | |
| 35 "\"": """, | |
| 36 "'": "'", | |
| 37 } | |
| 38 | |
| 39 class Converter: | |
| 40 def __init__(self, params, key="pagedata"): | |
| 41 self._params = params | |
| 42 self._key = key | |
| 43 | |
| 44 # Read in any parameters specified at the beginning of the file | |
| 45 lines = params[key].splitlines(True) | |
| 46 while len(lines) and re.search(r"^\s*[\w\-]+\s*=", lines[0]): | |
| 47 name, value = lines.pop(0).split("=", 1) | |
| 48 params[name.strip()] = value.strip() | |
| 49 params[key] = "".join(lines) | |
| 50 | |
| 51 def insert_localized_strings(self, text, escapes): | |
| 52 def escape(s): | |
| 53 return re.sub(r".", | |
| 54 lambda match: escapes.get(match.group(0), match.group(0)), | |
| 55 s, flags=re.S) | |
| 56 | |
| 57 def lookup_string(match): | |
| 58 name, links = match.groups() | |
| 59 try: | |
| 60 result = self._params["localedata"][name].strip() | |
| 61 except KeyError: | |
| 62 raise Exception("Lookup failed for string %s used on page %s" % (name, s elf._params["page"])) | |
| 63 | |
| 64 result = escape(result) | |
| 65 if links: | |
| 66 links = map(unicode.strip, links.strip("()").split(",")) | |
| 67 while len(links): | |
|
Sebastian Noack
2013/10/29 11:04:17
len() isn't needed here. Lists evaluate to True wh
| |
| 68 result = re.sub( | |
| 69 r"%s(.*?)%s" % (escape("<a>"), escape("</a>")), | |
| 70 r'<a href="%s">\1</a>' % links.pop(0), | |
| 71 result, 1, flags=re.S | |
| 72 ) | |
| 73 return result | |
| 74 | |
| 75 return re.sub( | |
| 76 r"\$([\w\-]+)(\([^()$]+\))?\$", | |
| 77 lookup_string, | |
| 78 text | |
| 79 ) | |
| 80 | |
| 81 def process_links(self, text): | |
| 82 def process_link(match): | |
| 83 pre, attr, url, post = match.groups() | |
| 84 url = jinja2.Markup(url).unescape() | |
| 85 | |
| 86 locale, new_url = self._params["source"].resolve_link(url, self._params["l ocale"]) | |
| 87 if new_url != None: | |
| 88 url = new_url | |
| 89 if attr == "href": | |
| 90 post += ' hreflang="%s"' % jinja2.Markup.escape(locale) | |
| 91 | |
| 92 return "".join((pre, jinja2.Markup.escape(url), post)) | |
| 93 | |
| 94 text = re.sub(r"(<a [^<>]*\b(href)=\")([^<>\"]+)(\")", process_link, text) | |
| 95 text = re.sub(r"(<img [^<>]*\b(src)=\")([^<>\"]+)(\")", process_link, text) | |
| 96 return text | |
| 97 | |
| 98 def resolve_includes(self, text): | |
| 99 def resolve_include(match): | |
| 100 global converters | |
| 101 name = match.group(1) | |
| 102 for format, converter_class in converters.iteritems(): | |
| 103 if self._params["source"].has_include(name, format): | |
| 104 self._params["includedata"] = self._params["source"].read_include(name , format) | |
| 105 converter = converter_class(self._params, key="includedata") | |
| 106 return converter() | |
| 107 raise Exception("Failed to resolve include %s in page %s" % (name, self._p arams["page"])) | |
| 108 | |
| 109 return re.sub(r'<\?\s*include\s+([^\s<>"]+)\s*\?>', resolve_include, text) | |
| 110 | |
| 111 def __call__(self): | |
| 112 result = self.get_html(self._params[self._key]) | |
| 113 result = self.resolve_includes(result) | |
| 114 if self._key == "pagedata": | |
| 115 head = [] | |
| 116 def add_to_head(match): | |
| 117 head.append(match.group(1)) | |
| 118 return "" | |
| 119 body = re.sub(r"<head>(.*?)</head>", add_to_head, result, flags=re.S) | |
| 120 return "".join(head), body | |
| 121 else: | |
| 122 return result | |
| 123 | |
| 124 class RawConverter(Converter): | |
| 125 def get_html(self, source): | |
| 126 result = self.insert_localized_strings(source, html_escapes) | |
| 127 result = self.process_links(result) | |
| 128 return result | |
| 129 | |
| 130 class MarkdownConverter(Converter): | |
| 131 def get_html(self, source): | |
| 132 def remove_unnecessary_entities(match): | |
| 133 char = chr(int(match.group(1))) | |
| 134 if char in html_escapes: | |
| 135 return match.group(0) | |
| 136 else: | |
| 137 return char | |
| 138 | |
| 139 escapes = {} | |
| 140 for char in markdown.Markdown.ESCAPED_CHARS: | |
| 141 escapes[char] = "&#" + str(ord(char)) + ";" | |
| 142 for key, value in html_escapes.iteritems(): | |
| 143 escapes[key] = value | |
| 144 | |
| 145 result = self.insert_localized_strings(source, escapes) | |
| 146 result = markdown.Markdown(output="html5", extensions=["attr_list"]).convert (result) | |
| 147 result = re.sub(r"&#(\d+);", remove_unnecessary_entities, result) | |
| 148 result = self.process_links(result) | |
| 149 return result | |
| 150 | |
| 151 class TemplateConverter(Converter): | |
| 152 def __init__(self, *args, **kwargs): | |
| 153 Converter.__init__(self, *args, **kwargs) | |
| 154 | |
| 155 filters = { | |
| 156 "translate": self.translate, | |
| 157 "linkify": self.linkify, | |
| 158 "toclist": self.toclist, | |
| 159 } | |
| 160 self._env = get_custom_template_environment(filters) | |
| 161 | |
| 162 def get_html(self, source): | |
| 163 template = self._env.from_string(source) | |
| 164 return template.render(self._params) | |
| 165 | |
| 166 def translate(self, name, page=None): | |
| 167 if page == None: | |
| 168 localedata = self._params["localedata"] | |
| 169 else: | |
| 170 localedata = self._params["source"].read_locale(self._params["locale"], pa ge) | |
| 171 | |
| 172 try: | |
| 173 return localedata[name] | |
| 174 except KeyError: | |
| 175 raise Exception("Lookup failed for string %s used on page %s" % (name, sel f._params["page"])) | |
| 176 | |
| 177 def linkify(self, page, locale=None): | |
| 178 if locale == None: | |
| 179 locale = self._params["locale"] | |
| 180 | |
| 181 locale, url = self._params["source"].resolve_link(page, locale) | |
| 182 return jinja2.Markup('<a href="%s" hreflang="%s">' % ( | |
| 183 jinja2.Markup.escape(url), | |
| 184 jinja2.Markup.escape(locale) | |
| 185 )) | |
| 186 | |
| 187 def toclist(self, content): | |
| 188 flat = [] | |
| 189 for match in re.finditer(r'<h(\d) [^<>]*\bid="([^<>"]+)"[^<>]*>(.*?)</h\1>', content, re.S): | |
|
Sebastian Noack
2013/10/29 11:04:17
I would use \s instead of a whitespace, after the
| |
| 190 flat.append({ | |
| 191 "level": int(match.group(1)), | |
| 192 "anchor": jinja2.Markup(match.group(2)).unescape(), | |
| 193 "title": jinja2.Markup(match.group(3)).unescape(), | |
| 194 "subitems": [], | |
| 195 }) | |
| 196 | |
| 197 structured = [] | |
| 198 stack = [{"level": 0, "subitems": structured}] | |
| 199 for item in flat: | |
| 200 while stack[-1]["level"] >= item["level"]: | |
| 201 stack.pop() | |
| 202 stack[-1]["subitems"].append(item) | |
| 203 stack.append(item) | |
| 204 return structured | |
| 205 | |
| 206 converters = { | |
| 207 "raw": RawConverter, | |
| 208 "md": MarkdownConverter, | |
| 209 "tmpl": TemplateConverter, | |
| 210 } | |
| OLD | NEW |