| LEFT | RIGHT | 
|---|
| 1 # coding: utf-8 | 1 # coding: utf-8 | 
| 2 | 2 | 
| 3 # This file is part of the Adblock Plus web scripts, | 3 # This file is part of the Adblock Plus web scripts, | 
| 4 # Copyright (C) 2006-2013 Eyeo GmbH | 4 # Copyright (C) 2006-2013 Eyeo GmbH | 
| 5 # | 5 # | 
| 6 # Adblock Plus is free software: you can redistribute it and/or modify | 6 # Adblock Plus is free software: you can redistribute it and/or modify | 
| 7 # it under the terms of the GNU General Public License version 3 as | 7 # it under the terms of the GNU General Public License version 3 as | 
| 8 # published by the Free Software Foundation. | 8 # published by the Free Software Foundation. | 
| 9 # | 9 # | 
| 10 # Adblock Plus is distributed in the hope that it will be useful, | 10 # Adblock Plus is distributed in the hope that it will be useful, | 
| 11 # but WITHOUT ANY WARRANTY; without even the implied warranty of | 11 # but WITHOUT ANY WARRANTY; without even the implied warranty of | 
| 12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | 12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | 
| 13 # GNU General Public License for more details. | 13 # GNU General Public License for more details. | 
| 14 # | 14 # | 
| 15 # You should have received a copy of the GNU General Public License | 15 # You should have received a copy of the GNU General Public License | 
| 16 # along with Adblock Plus.  If not, see <http://www.gnu.org/licenses/>. | 16 # along with Adblock Plus.  If not, see <http://www.gnu.org/licenses/>. | 
| 17 | 17 | 
| 18 import re, jinja2, markdown | 18 import re, jinja2, markdown | 
| 19 from sitescripts.utils import cached, setupStderr, get_custom_template_environme
     nt | 19 from ..utils import get_custom_template_environment | 
| 20 | 20 | 
| 21 # Monkey-patch Markdown's isBlockLevel function to ensure that no paragraphs are | 21 # Monkey-patch Markdown's isBlockLevel function to ensure that no paragraphs are | 
| 22 # inserted into the <head> tag | 22 # inserted into the <head> tag | 
| 23 orig_isBlockLevel = markdown.util.isBlockLevel | 23 orig_isBlockLevel = markdown.util.isBlockLevel | 
| 24 def isBlockLevel(tag): | 24 def isBlockLevel(tag): | 
| 25   if tag == "head": | 25   if tag == "head": | 
| 26     return True | 26     return True | 
| 27   else: | 27   else: | 
| 28     return orig_isBlockLevel(tag) | 28     return orig_isBlockLevel(tag) | 
| 29 markdown.util.isBlockLevel = isBlockLevel | 29 markdown.util.isBlockLevel = isBlockLevel | 
| 30 | 30 | 
| 31 def split_head(text): | 31 html_escapes = { | 
| 32   head = [] | 32   "<": "<", | 
| 33   def add_to_head(match): | 33   ">": ">", | 
| 34     head.append(match.group(1)) | 34   "&": "&", | 
| 35     return "" | 35   "\"": """, | 
| 36   body = re.sub(r"<head>(.*?)</head>", | 36   "'": "'", | 
| 37       add_to_head, text, flags=re.S) | 37 } | 
| 38   return "".join(head), body | 38 | 
| 39 | 39 class Converter: | 
| 40 class MarkdownConverter: | 40   def __init__(self, params, key="pagedata"): | 
| 41   class Localizer(markdown.preprocessors.Preprocessor): | 41     self._params = params | 
| 42     def __init__(self, params): | 42     self._key = key | 
| 43       self._params = params | 43 | 
| 44 | 44     # Read in any parameters specified at the beginning of the file | 
| 45       self._escaped = set(markdown.Markdown.ESCAPED_CHARS) | 45     lines = params[key].splitlines(True) | 
| 46       self._escaped.add("<"); | 46     while lines and re.search(r"^\s*[\w\-]+\s*=", lines[0]): | 
| 47       self._escaped.add(">"); | 47       name, value = lines.pop(0).split("=", 1) | 
| 48       self._escaped.add("&"); | 48       params[name.strip()] = value.strip() | 
| 49       self._escaped.add("\""); | 49     params[key] = "".join(lines) | 
| 50       self._escaped.add("'"); | 50 | 
| 51 | 51   def localize_string(self, name, localedata, escapes, links=[]): | 
| 52     def run(self, lines): | 52     def escape(s): | 
| 53       new_lines = [] | 53       return re.sub(r".", | 
| 54       for line in lines: | 54         lambda match: escapes.get(match.group(0), match.group(0)), | 
| 55         # Replace localized strings | 55         s, flags=re.S) | 
| 56         new_lines.append(re.sub(r"\$([\w\-]+)(\([^()$]+\))?\$", | 56     def re_escape(s): | 
| 57             lambda match: self.lookup_string(match.group(1), match.group(2)), | 57       return re.escape(escape(s)) | 
| 58             line)) | 58 | 
| 59       return new_lines | 59     try: | 
| 60 | 60       result = localedata[name].strip() | 
| 61     def lookup_string(self, name, links): | 61     except KeyError: | 
| 62       def escape(char): | 62       raise Exception("Lookup failed for string %s used on page %s" % (name, sel
     f._params["page"])) | 
| 63         if char in self._escaped: | 63 | 
| 64           return "&#" + str(ord(char)) + ";" | 64     # Insert links | 
| 65         else: | 65     result = escape(result) | 
| 66           return char | 66     while links: | 
| 67 | 67       result = re.sub( | 
| 68       try: | 68         r"%s([^<>]*?)%s" % (re_escape("<a>"), re_escape("</a>")), | 
| 69         result = self._params["localedata"][name].strip() | 69         r'<a href="%s">\1</a>' % links.pop(0), | 
| 70       except KeyError: | 70         result, 1, flags=re.S | 
| 71         raise Exception("Lookup failed for string %s used on page %s" % (name, s
     elf._params["page"])) | 71       ) | 
| 72 | 72 | 
| 73       result = re.sub(r".", lambda match: escape(match.group(0)), result, flags=
     re.S) | 73     # <strong> and <em> tags are allowed | 
|  | 74     result = re.sub( | 
|  | 75       r"%s([^<>]*?)%s" % (re_escape("<strong>"), re_escape("</strong>")), | 
|  | 76       r"<strong>\1</strong>", | 
|  | 77       result, flags=re.S | 
|  | 78     ) | 
|  | 79     result = re.sub( | 
|  | 80       r"%s([^<>]*?)%s" % (re_escape("<em>"), re_escape("</em>")), | 
|  | 81       r"<em>\1</em>", | 
|  | 82       result, flags=re.S | 
|  | 83     ) | 
|  | 84     return result | 
|  | 85 | 
|  | 86   def insert_localized_strings(self, text, escapes): | 
|  | 87     def lookup_string(match): | 
|  | 88       name, links = match.groups() | 
| 74       if links: | 89       if links: | 
| 75         links = map(unicode.strip, links.strip("()").split(",")) | 90         links = map(unicode.strip, links.strip("()").split(",")) | 
| 76         while len(links): | 91       else: | 
| 77           result = re.sub(r"<a>(.*?)</a>", r'<a href="%s">\1</a>
     ' % links.pop(0), result, 1, flags=re.S) | 92         links = [] | 
| 78       return result | 93       return self.localize_string(name, self._params["localedata"], escapes, lin
     ks) | 
| 79 | 94 | 
| 80   class Linkifier(markdown.postprocessors.Postprocessor): | 95     return re.sub( | 
| 81     def __init__(self, params): | 96       r"\$([\w\-]+)(\([^()$]+\))?\$", | 
| 82       self._params = params | 97       lookup_string, | 
| 83 | 98       text | 
| 84     def process_link(self, match): | 99     ) | 
|  | 100 | 
|  | 101   def process_links(self, text): | 
|  | 102     def process_link(match): | 
| 85       pre, attr, url, post = match.groups() | 103       pre, attr, url, post = match.groups() | 
| 86       url = jinja2.Markup(url).unescape() | 104       url = jinja2.Markup(url).unescape() | 
| 87 | 105 | 
| 88       locale, new_url = self._params["source"].resolve_link(url, self._params["l
     ocale"]) | 106       locale, new_url = self._params["source"].resolve_link(url, self._params["l
     ocale"]) | 
| 89       if new_url != None: | 107       if new_url != None: | 
| 90         url = new_url | 108         url = new_url | 
| 91         if attr == "href": | 109         if attr == "href": | 
| 92           post += ' hreflang="%s"' % jinja2.Markup.escape(locale) | 110           post += ' hreflang="%s"' % jinja2.Markup.escape(locale) | 
| 93 | 111 | 
| 94       return "".join((pre, jinja2.Markup.escape(url), post)) | 112       return "".join((pre, jinja2.Markup.escape(url), post)) | 
| 95 | 113 | 
| 96     def run(self, text): | 114     text = re.sub(r"(<a\s[^<>]*\b(href)=\")([^<>\"]+)(\")", process_link, text) | 
| 97       text = re.sub(r"(<a [^<>]*\b(href)=\")([^<>\"]+)(\")", self.process_link, 
     text) | 115     text = re.sub(r"(<img\s[^<>]*\b(src)=\")([^<>\"]+)(\")", process_link, text) | 
| 98       text = re.sub(r"(<img [^<>]*\b(src)=\")([^<>\"]+)(\")", self.process_link,
      text) | 116     return text | 
| 99       return text | 117 | 
| 100 | 118   def resolve_includes(self, text): | 
| 101   def __init__(self, params, key="pagedata"): | 119     def resolve_include(match): | 
| 102     self._params = params | 120       global converters | 
| 103     self._splithead = key == "pagedata" | 121       name = match.group(1) | 
| 104 | 122       for format, converter_class in converters.iteritems(): | 
| 105     self._md = markdown.Markdown(output="html5", extensions=["attr_list"]) | 123         if self._params["source"].has_include(name, format): | 
| 106     self._md.preprocessors.add("localizer", self.Localizer(params), "_begin") | 124           self._params["includedata"] = self._params["source"].read_include(name
     , format) | 
| 107     self._md.postprocessors.add("linkifier", self.Linkifier(params), "_end") | 125           converter = converter_class(self._params, key="includedata") | 
| 108 | 126           return converter() | 
| 109     params["pagedata"] = params["pagedata"].decode("utf-8") | 127       raise Exception("Failed to resolve include %s in page %s" % (name, self._p
     arams["page"])) | 
| 110 | 128 | 
| 111     # Read in any parameters specified at the beginning of the file | 129     return re.sub(r'<\?\s*include\s+([^\s<>"]+)\s*\?>', resolve_include, text) | 
| 112     lines = params["pagedata"].splitlines(True) |  | 
| 113     while len(lines) and re.search(r"^\s*[\w\-]+\s*=", lines[0]): |  | 
| 114       key, value = lines.pop(0).split("=", 1) |  | 
| 115       params[key.strip()] = value.strip() |  | 
| 116     params["pagedata"] = "".join(lines) |  | 
| 117 | 130 | 
| 118   def __call__(self): | 131   def __call__(self): | 
| 119     def beautify_entities(match): | 132     result = self.get_html(self._params[self._key]) | 
| 120       escape = { | 133     result = self.resolve_includes(result) | 
| 121         "<": "<", | 134     if self._key == "pagedata": | 
| 122         ">": ">", | 135       head = [] | 
| 123         "&": "&", | 136       def add_to_head(match): | 
| 124         "\"": """, | 137         head.append(match.group(1)) | 
| 125         "'": "'", | 138         return "" | 
| 126       } | 139       body = re.sub(r"<head>(.*?)</head>", add_to_head, result, flags=re.S) | 
| 127       char = chr(int(match.group(1))) | 140       return "".join(head), body | 
| 128       return escape.get(char, char) |  | 
| 129 |  | 
| 130     result = self._md.convert(self._params["pagedata"]) |  | 
| 131     result = re.sub(r"&#(\d+);", beautify_entities, result).encode("utf-8") |  | 
| 132 |  | 
| 133     if self._splithead: |  | 
| 134       return split_head(result) |  | 
| 135     else: | 141     else: | 
| 136       return result | 142       return result | 
| 137 | 143 | 
| 138 class TemplateConverter: | 144 class RawConverter(Converter): | 
| 139   def __init__(self, params, key="pagedata"): | 145   def get_html(self, source): | 
| 140     self._params = params | 146     result = self.insert_localized_strings(source, html_escapes) | 
| 141     self._splithead = key == "pagedata" | 147     result = self.process_links(result) | 
|  | 148     return result | 
|  | 149 | 
|  | 150 class MarkdownConverter(Converter): | 
|  | 151   def get_html(self, source): | 
|  | 152     def remove_unnecessary_entities(match): | 
|  | 153       char = chr(int(match.group(1))) | 
|  | 154       if char in html_escapes: | 
|  | 155         return match.group(0) | 
|  | 156       else: | 
|  | 157         return char | 
|  | 158 | 
|  | 159     escapes = {} | 
|  | 160     for char in markdown.Markdown.ESCAPED_CHARS: | 
|  | 161       escapes[char] = "&#" + str(ord(char)) + ";" | 
|  | 162     for key, value in html_escapes.iteritems(): | 
|  | 163       escapes[key] = value | 
|  | 164 | 
|  | 165     result = self.insert_localized_strings(source, escapes) | 
|  | 166     result = markdown.Markdown(output="html5", extensions=["attr_list"]).convert
     (result) | 
|  | 167     result = re.sub(r"&#(\d+);", remove_unnecessary_entities, result) | 
|  | 168     result = self.process_links(result) | 
|  | 169     return result | 
|  | 170 | 
|  | 171 class TemplateConverter(Converter): | 
|  | 172   def __init__(self, *args, **kwargs): | 
|  | 173     Converter.__init__(self, *args, **kwargs) | 
|  | 174 | 
| 142     filters = { | 175     filters = { | 
| 143       "translate": self.translate, | 176       "translate": self.translate, | 
| 144       "linkify": self.linkify, | 177       "linkify": self.linkify, | 
| 145       "toclist": self.toclist, | 178       "toclist": self.toclist, | 
| 146     } | 179     } | 
| 147     env = get_custom_template_environment(filters) | 180     self._env = get_custom_template_environment(filters) | 
| 148     self._template = env.from_string(params[key].decode("utf-8")) | 181 | 
| 149 | 182   def get_html(self, source): | 
| 150   def __call__(self): | 183     template = self._env.from_string(source) | 
| 151     result = self._template.render(self._params).encode("utf-8") | 184     return template.render(self._params) | 
| 152     if self._splithead: | 185 | 
| 153       return split_head(result) | 186   def translate(self, name, page=None, links=[]): | 
| 154     else: |  | 
| 155       return result |  | 
| 156 |  | 
| 157   def translate(self, name, page=None): |  | 
| 158     if page == None: | 187     if page == None: | 
| 159       localedata = self._params["localedata"] | 188       localedata = self._params["localedata"] | 
| 160     else: | 189     else: | 
| 161       localedata = self._params["source"].read_locale(self._params["locale"], pa
     ge) | 190       localedata = self._params["source"].read_locale(self._params["locale"], pa
     ge) | 
| 162 | 191     return jinja2.Markup(self.localize_string(name, localedata, html_escapes, li
     nks=links)) | 
| 163     try: |  | 
| 164       return localedata[name] |  | 
| 165     except KeyError: |  | 
| 166       raise Exception("Lookup failed for string %s used on page %s" % (name, sel
     f._params["page"])) |  | 
| 167 | 192 | 
| 168   def linkify(self, page, locale=None): | 193   def linkify(self, page, locale=None): | 
| 169     if locale == None: | 194     if locale == None: | 
| 170       locale = self._params["locale"] | 195       locale = self._params["locale"] | 
| 171 | 196 | 
| 172     locale, url = self._params["source"].resolve_link(page, locale) | 197     locale, url = self._params["source"].resolve_link(page, locale) | 
| 173     return jinja2.Markup('<a href="%s" hreflang="%s">' % ( | 198     return jinja2.Markup('<a href="%s" hreflang="%s">' % ( | 
| 174       jinja2.Markup.escape(url), | 199       jinja2.Markup.escape(url), | 
| 175       jinja2.Markup.escape(locale) | 200       jinja2.Markup.escape(locale) | 
| 176     )) | 201     )) | 
| 177 | 202 | 
| 178   def toclist(self, content): | 203   def toclist(self, content): | 
| 179     flat = [] | 204     flat = [] | 
| 180     for match in re.finditer(r'<h(\d) [^<>]*\bid="([^<>"]+)"[^<>]*>(.*?)</h\1>',
      content, re.S): | 205     for match in re.finditer(r'<h(\d)\s[^<>]*\bid="([^<>"]+)"[^<>]*>(.*?)</h\1>'
     , content, re.S): | 
| 181       flat.append({ | 206       flat.append({ | 
| 182         "level": int(match.group(1)), | 207         "level": int(match.group(1)), | 
| 183         "anchor": jinja2.Markup(match.group(2)).unescape(), | 208         "anchor": jinja2.Markup(match.group(2)).unescape(), | 
| 184         "title": jinja2.Markup(match.group(3)).unescape(), | 209         "title": jinja2.Markup(match.group(3)).unescape(), | 
| 185         "subitems": [], | 210         "subitems": [], | 
| 186       }) | 211       }) | 
| 187 | 212 | 
| 188     structured = [] | 213     structured = [] | 
| 189     stack = [{"level": 0, "subitems": structured}] | 214     stack = [{"level": 0, "subitems": structured}] | 
| 190     for item in flat: | 215     for item in flat: | 
| 191       while stack[-1]["level"] >= item["level"]: | 216       while stack[-1]["level"] >= item["level"]: | 
| 192         stack.pop() | 217         stack.pop() | 
| 193       stack[-1]["subitems"].append(item) | 218       stack[-1]["subitems"].append(item) | 
| 194       stack.append(item) | 219       stack.append(item) | 
| 195     return structured | 220     return structured | 
| 196 | 221 | 
| 197 converters = { | 222 converters = { | 
|  | 223   "raw": RawConverter, | 
| 198   "md": MarkdownConverter, | 224   "md": MarkdownConverter, | 
| 199   "tmpl": TemplateConverter, | 225   "tmpl": TemplateConverter, | 
| 200 } | 226 } | 
| LEFT | RIGHT | 
|---|