| LEFT | RIGHT | 
|    1 # coding: utf-8 |    1 # coding: utf-8 | 
|    2  |    2  | 
|    3 # This file is part of the Adblock Plus web scripts, |    3 # This file is part of the Adblock Plus web scripts, | 
|    4 # Copyright (C) 2006-2013 Eyeo GmbH |    4 # Copyright (C) 2006-2013 Eyeo GmbH | 
|    5 # |    5 # | 
|    6 # Adblock Plus is free software: you can redistribute it and/or modify |    6 # Adblock Plus is free software: you can redistribute it and/or modify | 
|    7 # it under the terms of the GNU General Public License version 3 as |    7 # it under the terms of the GNU General Public License version 3 as | 
|    8 # published by the Free Software Foundation. |    8 # published by the Free Software Foundation. | 
|    9 # |    9 # | 
|   10 # Adblock Plus is distributed in the hope that it will be useful, |   10 # Adblock Plus is distributed in the hope that it will be useful, | 
|   11 # but WITHOUT ANY WARRANTY; without even the implied warranty of |   11 # but WITHOUT ANY WARRANTY; without even the implied warranty of | 
|   12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the |   12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | 
|   13 # GNU General Public License for more details. |   13 # GNU General Public License for more details. | 
|   14 # |   14 # | 
|   15 # You should have received a copy of the GNU General Public License |   15 # You should have received a copy of the GNU General Public License | 
|   16 # along with Adblock Plus.  If not, see <http://www.gnu.org/licenses/>. |   16 # along with Adblock Plus.  If not, see <http://www.gnu.org/licenses/>. | 
|   17  |   17  | 
|   18 import re, jinja2, markdown |   18 import re, jinja2, markdown | 
|   19 from sitescripts.utils import cached, setupStderr, get_custom_template_environme
     nt |   19 from ..utils import get_custom_template_environment | 
|   20  |   20  | 
|   21 # Monkey-patch Markdown's isBlockLevel function to ensure that no paragraphs are |   21 # Monkey-patch Markdown's isBlockLevel function to ensure that no paragraphs are | 
|   22 # inserted into the <head> tag |   22 # inserted into the <head> tag | 
|   23 orig_isBlockLevel = markdown.util.isBlockLevel |   23 orig_isBlockLevel = markdown.util.isBlockLevel | 
|   24 def isBlockLevel(tag): |   24 def isBlockLevel(tag): | 
|   25   if tag == "head": |   25   if tag == "head": | 
|   26     return True |   26     return True | 
|   27   else: |   27   else: | 
|   28     return orig_isBlockLevel(tag) |   28     return orig_isBlockLevel(tag) | 
|   29 markdown.util.isBlockLevel = isBlockLevel |   29 markdown.util.isBlockLevel = isBlockLevel | 
|   30  |   30  | 
|   31 def split_head(text): |   31 html_escapes = { | 
|   32   head = [] |   32   "<": "<", | 
|   33   def add_to_head(match): |   33   ">": ">", | 
|   34     head.append(match.group(1)) |   34   "&": "&", | 
|   35     return "" |   35   "\"": """, | 
|   36   body = re.sub(r"<head>(.*?)</head>", |   36   "'": "'", | 
|   37       add_to_head, text, flags=re.S) |   37 } | 
|   38   return "".join(head), body |   38  | 
|   39  |   39 class Converter: | 
|   40 class MarkdownConverter: |   40   def __init__(self, params, key="pagedata"): | 
|   41   class Localizer(markdown.preprocessors.Preprocessor): |   41     self._params = params | 
|   42     def __init__(self, params): |   42     self._key = key | 
|   43       self._params = params |   43  | 
|   44  |   44     # Read in any parameters specified at the beginning of the file | 
|   45       self._escaped = set(markdown.Markdown.ESCAPED_CHARS) |   45     lines = params[key].splitlines(True) | 
|   46       self._escaped.add("<"); |   46     while lines and re.search(r"^\s*[\w\-]+\s*=", lines[0]): | 
|   47       self._escaped.add(">"); |   47       name, value = lines.pop(0).split("=", 1) | 
|   48       self._escaped.add("&"); |   48       params[name.strip()] = value.strip() | 
|   49       self._escaped.add("\""); |   49     params[key] = "".join(lines) | 
|   50       self._escaped.add("'"); |   50  | 
|   51  |   51   def localize_string(self, name, localedata, escapes, links=[]): | 
|   52     def run(self, lines): |   52     def escape(s): | 
|   53       new_lines = [] |   53       return re.sub(r".", | 
|   54       for line in lines: |   54         lambda match: escapes.get(match.group(0), match.group(0)), | 
|   55         # Replace localized strings |   55         s, flags=re.S) | 
|   56         new_lines.append(re.sub(r"\$([\w\-]+)(\([^()$]+\))?\$", |   56     def re_escape(s): | 
|   57             lambda match: self.lookup_string(match.group(1), match.group(2)), |   57       return re.escape(escape(s)) | 
|   58             line)) |   58  | 
|   59       return new_lines |   59     try: | 
|   60  |   60       result = localedata[name].strip() | 
|   61     def lookup_string(self, name, links): |   61     except KeyError: | 
|   62       def escape(char): |   62       raise Exception("Lookup failed for string %s used on page %s" % (name, sel
     f._params["page"])) | 
|   63         if char in self._escaped: |   63  | 
|   64           return "&#" + str(ord(char)) + ";" |   64     # Insert links | 
|   65         else: |   65     result = escape(result) | 
|   66           return char |   66     while links: | 
|   67  |   67       result = re.sub( | 
|   68       try: |   68         r"%s([^<>]*?)%s" % (re_escape("<a>"), re_escape("</a>")), | 
|   69         result = self._params["localedata"][name].strip() |   69         r'<a href="%s">\1</a>' % links.pop(0), | 
|   70       except KeyError: |   70         result, 1, flags=re.S | 
|   71         raise Exception("Lookup failed for string %s used on page %s" % (name, s
     elf._params["page"])) |   71       ) | 
|   72  |   72  | 
|   73       result = re.sub(r".", lambda match: escape(match.group(0)), result, flags=
     re.S) |   73     # <strong> and <em> tags are allowed | 
 |   74     result = re.sub( | 
 |   75       r"%s([^<>]*?)%s" % (re_escape("<strong>"), re_escape("</strong>")), | 
 |   76       r"<strong>\1</strong>", | 
 |   77       result, flags=re.S | 
 |   78     ) | 
 |   79     result = re.sub( | 
 |   80       r"%s([^<>]*?)%s" % (re_escape("<em>"), re_escape("</em>")), | 
 |   81       r"<em>\1</em>", | 
 |   82       result, flags=re.S | 
 |   83     ) | 
 |   84     return result | 
 |   85  | 
 |   86   def insert_localized_strings(self, text, escapes): | 
 |   87     def lookup_string(match): | 
 |   88       name, links = match.groups() | 
|   74       if links: |   89       if links: | 
|   75         links = map(unicode.strip, links.strip("()").split(",")) |   90         links = map(unicode.strip, links.strip("()").split(",")) | 
|   76         while len(links): |   91       else: | 
|   77           result = re.sub(r"<a>(.*?)</a>", r'<a href="%s">\1</a>
     ' % links.pop(0), result, 1, flags=re.S) |   92         links = [] | 
|   78       return result |   93       return self.localize_string(name, self._params["localedata"], escapes, lin
     ks) | 
|   79  |   94  | 
|   80   class Linkifier(markdown.postprocessors.Postprocessor): |   95     return re.sub( | 
|   81     def __init__(self, params): |   96       r"\$([\w\-]+)(\([^()$]+\))?\$", | 
|   82       self._params = params |   97       lookup_string, | 
|   83  |   98       text | 
|   84     def process_link(self, match): |   99     ) | 
 |  100  | 
 |  101   def process_links(self, text): | 
 |  102     def process_link(match): | 
|   85       pre, attr, url, post = match.groups() |  103       pre, attr, url, post = match.groups() | 
|   86       url = jinja2.Markup(url).unescape() |  104       url = jinja2.Markup(url).unescape() | 
|   87  |  105  | 
|   88       locale, new_url = self._params["source"].resolve_link(url, self._params["l
     ocale"]) |  106       locale, new_url = self._params["source"].resolve_link(url, self._params["l
     ocale"]) | 
|   89       if new_url != None: |  107       if new_url != None: | 
|   90         url = new_url |  108         url = new_url | 
|   91         if attr == "href": |  109         if attr == "href": | 
|   92           post += ' hreflang="%s"' % jinja2.Markup.escape(locale) |  110           post += ' hreflang="%s"' % jinja2.Markup.escape(locale) | 
|   93  |  111  | 
|   94       return "".join((pre, jinja2.Markup.escape(url), post)) |  112       return "".join((pre, jinja2.Markup.escape(url), post)) | 
|   95  |  113  | 
|   96     def run(self, text): |  114     text = re.sub(r"(<a\s[^<>]*\b(href)=\")([^<>\"]+)(\")", process_link, text) | 
|   97       text = re.sub(r"(<a [^<>]*\b(href)=\")([^<>\"]+)(\")", self.process_link, 
     text) |  115     text = re.sub(r"(<img\s[^<>]*\b(src)=\")([^<>\"]+)(\")", process_link, text) | 
|   98       text = re.sub(r"(<img [^<>]*\b(src)=\")([^<>\"]+)(\")", self.process_link,
      text) |  116     return text | 
|   99       return text |  117  | 
|  100  |  118   def resolve_includes(self, text): | 
|  101   def __init__(self, params, key="pagedata"): |  119     def resolve_include(match): | 
|  102     self._params = params |  120       global converters | 
|  103     self._splithead = key == "pagedata" |  121       name = match.group(1) | 
|  104  |  122       for format, converter_class in converters.iteritems(): | 
|  105     self._md = markdown.Markdown(output="html5", extensions=["attr_list"]) |  123         if self._params["source"].has_include(name, format): | 
|  106     self._md.preprocessors.add("localizer", self.Localizer(params), "_begin") |  124           self._params["includedata"] = self._params["source"].read_include(name
     , format) | 
|  107     self._md.postprocessors.add("linkifier", self.Linkifier(params), "_end") |  125           converter = converter_class(self._params, key="includedata") | 
|  108  |  126           return converter() | 
|  109     params["pagedata"] = params["pagedata"].decode("utf-8") |  127       raise Exception("Failed to resolve include %s in page %s" % (name, self._p
     arams["page"])) | 
|  110  |  128  | 
|  111     # Read in any parameters specified at the beginning of the file |  129     return re.sub(r'<\?\s*include\s+([^\s<>"]+)\s*\?>', resolve_include, text) | 
|  112     lines = params["pagedata"].splitlines(True) |  | 
|  113     while len(lines) and re.search(r"^\s*[\w\-]+\s*=", lines[0]): |  | 
|  114       key, value = lines.pop(0).split("=", 1) |  | 
|  115       params[key.strip()] = value.strip() |  | 
|  116     params["pagedata"] = "".join(lines) |  | 
|  117  |  130  | 
|  118   def __call__(self): |  131   def __call__(self): | 
|  119     def beautify_entities(match): |  132     result = self.get_html(self._params[self._key]) | 
|  120       escape = { |  133     result = self.resolve_includes(result) | 
|  121         "<": "<", |  134     if self._key == "pagedata": | 
|  122         ">": ">", |  135       head = [] | 
|  123         "&": "&", |  136       def add_to_head(match): | 
|  124         "\"": """, |  137         head.append(match.group(1)) | 
|  125         "'": "'", |  138         return "" | 
|  126       } |  139       body = re.sub(r"<head>(.*?)</head>", add_to_head, result, flags=re.S) | 
|  127       char = chr(int(match.group(1))) |  140       return "".join(head), body | 
|  128       return escape.get(char, char) |  | 
|  129  |  | 
|  130     result = self._md.convert(self._params["pagedata"]) |  | 
|  131     result = re.sub(r"&#(\d+);", beautify_entities, result).encode("utf-8") |  | 
|  132  |  | 
|  133     if self._splithead: |  | 
|  134       return split_head(result) |  | 
|  135     else: |  141     else: | 
|  136       return result |  142       return result | 
|  137  |  143  | 
|  138 class TemplateConverter: |  144 class RawConverter(Converter): | 
|  139   def __init__(self, params, key="pagedata"): |  145   def get_html(self, source): | 
|  140     self._params = params |  146     result = self.insert_localized_strings(source, html_escapes) | 
|  141     self._splithead = key == "pagedata" |  147     result = self.process_links(result) | 
 |  148     return result | 
 |  149  | 
 |  150 class MarkdownConverter(Converter): | 
 |  151   def get_html(self, source): | 
 |  152     def remove_unnecessary_entities(match): | 
 |  153       char = chr(int(match.group(1))) | 
 |  154       if char in html_escapes: | 
 |  155         return match.group(0) | 
 |  156       else: | 
 |  157         return char | 
 |  158  | 
 |  159     escapes = {} | 
 |  160     for char in markdown.Markdown.ESCAPED_CHARS: | 
 |  161       escapes[char] = "&#" + str(ord(char)) + ";" | 
 |  162     for key, value in html_escapes.iteritems(): | 
 |  163       escapes[key] = value | 
 |  164  | 
 |  165     result = self.insert_localized_strings(source, escapes) | 
 |  166     result = markdown.Markdown(output="html5", extensions=["attr_list"]).convert
     (result) | 
 |  167     result = re.sub(r"&#(\d+);", remove_unnecessary_entities, result) | 
 |  168     result = self.process_links(result) | 
 |  169     return result | 
 |  170  | 
 |  171 class TemplateConverter(Converter): | 
 |  172   def __init__(self, *args, **kwargs): | 
 |  173     Converter.__init__(self, *args, **kwargs) | 
 |  174  | 
|  142     filters = { |  175     filters = { | 
|  143       "translate": self.translate, |  176       "translate": self.translate, | 
|  144       "linkify": self.linkify, |  177       "linkify": self.linkify, | 
|  145       "toclist": self.toclist, |  178       "toclist": self.toclist, | 
|  146     } |  179     } | 
|  147     env = get_custom_template_environment(filters) |  180     self._env = get_custom_template_environment(filters) | 
|  148     self._template = env.from_string(params[key].decode("utf-8")) |  181  | 
|  149  |  182   def get_html(self, source): | 
|  150   def __call__(self): |  183     template = self._env.from_string(source) | 
|  151     result = self._template.render(self._params).encode("utf-8") |  184     return template.render(self._params) | 
|  152     if self._splithead: |  185  | 
|  153       return split_head(result) |  186   def translate(self, name, page=None, links=[]): | 
|  154     else: |  | 
|  155       return result |  | 
|  156  |  | 
|  157   def translate(self, name, page=None): |  | 
|  158     if page == None: |  187     if page == None: | 
|  159       localedata = self._params["localedata"] |  188       localedata = self._params["localedata"] | 
|  160     else: |  189     else: | 
|  161       localedata = self._params["source"].read_locale(self._params["locale"], pa
     ge) |  190       localedata = self._params["source"].read_locale(self._params["locale"], pa
     ge) | 
|  162  |  191     return jinja2.Markup(self.localize_string(name, localedata, html_escapes, li
     nks=links)) | 
|  163     try: |  | 
|  164       return localedata[name] |  | 
|  165     except KeyError: |  | 
|  166       raise Exception("Lookup failed for string %s used on page %s" % (name, sel
     f._params["page"])) |  | 
|  167  |  192  | 
|  168   def linkify(self, page, locale=None): |  193   def linkify(self, page, locale=None): | 
|  169     if locale == None: |  194     if locale == None: | 
|  170       locale = self._params["locale"] |  195       locale = self._params["locale"] | 
|  171  |  196  | 
|  172     locale, url = self._params["source"].resolve_link(page, locale) |  197     locale, url = self._params["source"].resolve_link(page, locale) | 
|  173     return jinja2.Markup('<a href="%s" hreflang="%s">' % ( |  198     return jinja2.Markup('<a href="%s" hreflang="%s">' % ( | 
|  174       jinja2.Markup.escape(url), |  199       jinja2.Markup.escape(url), | 
|  175       jinja2.Markup.escape(locale) |  200       jinja2.Markup.escape(locale) | 
|  176     )) |  201     )) | 
|  177  |  202  | 
|  178   def toclist(self, content): |  203   def toclist(self, content): | 
|  179     flat = [] |  204     flat = [] | 
|  180     for match in re.finditer(r'<h(\d) [^<>]*\bid="([^<>"]+)"[^<>]*>(.*?)</h\1>',
      content, re.S): |  205     for match in re.finditer(r'<h(\d)\s[^<>]*\bid="([^<>"]+)"[^<>]*>(.*?)</h\1>'
     , content, re.S): | 
|  181       flat.append({ |  206       flat.append({ | 
|  182         "level": int(match.group(1)), |  207         "level": int(match.group(1)), | 
|  183         "anchor": jinja2.Markup(match.group(2)).unescape(), |  208         "anchor": jinja2.Markup(match.group(2)).unescape(), | 
|  184         "title": jinja2.Markup(match.group(3)).unescape(), |  209         "title": jinja2.Markup(match.group(3)).unescape(), | 
|  185         "subitems": [], |  210         "subitems": [], | 
|  186       }) |  211       }) | 
|  187  |  212  | 
|  188     structured = [] |  213     structured = [] | 
|  189     stack = [{"level": 0, "subitems": structured}] |  214     stack = [{"level": 0, "subitems": structured}] | 
|  190     for item in flat: |  215     for item in flat: | 
|  191       while stack[-1]["level"] >= item["level"]: |  216       while stack[-1]["level"] >= item["level"]: | 
|  192         stack.pop() |  217         stack.pop() | 
|  193       stack[-1]["subitems"].append(item) |  218       stack[-1]["subitems"].append(item) | 
|  194       stack.append(item) |  219       stack.append(item) | 
|  195     return structured |  220     return structured | 
|  196  |  221  | 
|  197 converters = { |  222 converters = { | 
 |  223   "raw": RawConverter, | 
|  198   "md": MarkdownConverter, |  224   "md": MarkdownConverter, | 
|  199   "tmpl": TemplateConverter, |  225   "tmpl": TemplateConverter, | 
|  200 } |  226 } | 
| LEFT | RIGHT |