LEFT | RIGHT |
1 # coding: utf-8 | 1 # coding: utf-8 |
2 | 2 |
3 # This file is part of the Adblock Plus web scripts, | 3 # This file is part of the Adblock Plus web scripts, |
4 # Copyright (C) 2006-2013 Eyeo GmbH | 4 # Copyright (C) 2006-2013 Eyeo GmbH |
5 # | 5 # |
6 # Adblock Plus is free software: you can redistribute it and/or modify | 6 # Adblock Plus is free software: you can redistribute it and/or modify |
7 # it under the terms of the GNU General Public License version 3 as | 7 # it under the terms of the GNU General Public License version 3 as |
8 # published by the Free Software Foundation. | 8 # published by the Free Software Foundation. |
9 # | 9 # |
10 # Adblock Plus is distributed in the hope that it will be useful, | 10 # Adblock Plus is distributed in the hope that it will be useful, |
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of | 11 # but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | 12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
13 # GNU General Public License for more details. | 13 # GNU General Public License for more details. |
14 # | 14 # |
15 # You should have received a copy of the GNU General Public License | 15 # You should have received a copy of the GNU General Public License |
16 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. | 16 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. |
17 | 17 |
18 import re, jinja2, markdown | 18 import re, jinja2, markdown |
19 from sitescripts.utils import cached, setupStderr, get_custom_template_environme
nt | 19 from ..utils import get_custom_template_environment |
20 | 20 |
21 # Monkey-patch Markdown's isBlockLevel function to ensure that no paragraphs are | 21 # Monkey-patch Markdown's isBlockLevel function to ensure that no paragraphs are |
22 # inserted into the <head> tag | 22 # inserted into the <head> tag |
23 orig_isBlockLevel = markdown.util.isBlockLevel | 23 orig_isBlockLevel = markdown.util.isBlockLevel |
24 def isBlockLevel(tag): | 24 def isBlockLevel(tag): |
25 if tag == "head": | 25 if tag == "head": |
26 return True | 26 return True |
27 else: | 27 else: |
28 return orig_isBlockLevel(tag) | 28 return orig_isBlockLevel(tag) |
29 markdown.util.isBlockLevel = isBlockLevel | 29 markdown.util.isBlockLevel = isBlockLevel |
30 | 30 |
31 def split_head(text): | 31 html_escapes = { |
32 head = [] | 32 "<": "<", |
33 def add_to_head(match): | 33 ">": ">", |
34 head.append(match.group(1)) | 34 "&": "&", |
35 return "" | 35 "\"": """, |
36 body = re.sub(r"<head>(.*?)</head>", | 36 "'": "'", |
37 add_to_head, text, flags=re.S) | 37 } |
38 return "".join(head), body | 38 |
39 | 39 class Converter: |
40 class MarkdownConverter: | 40 def __init__(self, params, key="pagedata"): |
41 class Localizer(markdown.preprocessors.Preprocessor): | 41 self._params = params |
42 def __init__(self, params): | 42 self._key = key |
43 self._params = params | 43 |
44 | 44 # Read in any parameters specified at the beginning of the file |
45 self._escaped = set(markdown.Markdown.ESCAPED_CHARS) | 45 lines = params[key].splitlines(True) |
46 self._escaped.add("<"); | 46 while lines and re.search(r"^\s*[\w\-]+\s*=", lines[0]): |
47 self._escaped.add(">"); | 47 name, value = lines.pop(0).split("=", 1) |
48 self._escaped.add("&"); | 48 params[name.strip()] = value.strip() |
49 self._escaped.add("\""); | 49 params[key] = "".join(lines) |
50 self._escaped.add("'"); | 50 |
51 | 51 def localize_string(self, name, localedata, escapes, links=[]): |
52 def run(self, lines): | 52 def escape(s): |
53 new_lines = [] | 53 return re.sub(r".", |
54 for line in lines: | 54 lambda match: escapes.get(match.group(0), match.group(0)), |
55 # Replace localized strings | 55 s, flags=re.S) |
56 new_lines.append(re.sub(r"\$([\w\-]+)(\([^()$]+\))?\$", | 56 def re_escape(s): |
57 lambda match: self.lookup_string(match.group(1), match.group(2)), | 57 return re.escape(escape(s)) |
58 line)) | 58 |
59 return new_lines | 59 try: |
60 | 60 result = localedata[name].strip() |
61 def lookup_string(self, name, links): | 61 except KeyError: |
62 def escape(char): | 62 raise Exception("Lookup failed for string %s used on page %s" % (name, sel
f._params["page"])) |
63 if char in self._escaped: | 63 |
64 return "&#" + str(ord(char)) + ";" | 64 # Insert links |
65 else: | 65 result = escape(result) |
66 return char | 66 while links: |
67 | 67 result = re.sub( |
68 try: | 68 r"%s([^<>]*?)%s" % (re_escape("<a>"), re_escape("</a>")), |
69 result = self._params["localedata"][name].strip() | 69 r'<a href="%s">\1</a>' % links.pop(0), |
70 except KeyError: | 70 result, 1, flags=re.S |
71 raise Exception("Lookup failed for string %s used on page %s" % (name, s
elf._params["page"])) | 71 ) |
72 | 72 |
73 result = re.sub(r".", lambda match: escape(match.group(0)), result, flags=
re.S) | 73 # <strong> and <em> tags are allowed |
| 74 result = re.sub( |
| 75 r"%s([^<>]*?)%s" % (re_escape("<strong>"), re_escape("</strong>")), |
| 76 r"<strong>\1</strong>", |
| 77 result, flags=re.S |
| 78 ) |
| 79 result = re.sub( |
| 80 r"%s([^<>]*?)%s" % (re_escape("<em>"), re_escape("</em>")), |
| 81 r"<em>\1</em>", |
| 82 result, flags=re.S |
| 83 ) |
| 84 return result |
| 85 |
| 86 def insert_localized_strings(self, text, escapes): |
| 87 def lookup_string(match): |
| 88 name, links = match.groups() |
74 if links: | 89 if links: |
75 links = map(unicode.strip, links.strip("()").split(",")) | 90 links = map(unicode.strip, links.strip("()").split(",")) |
76 while len(links): | 91 else: |
77 result = re.sub(r"<a>(.*?)</a>", r'<a href="%s">\1</a>
' % links.pop(0), result, 1, flags=re.S) | 92 links = [] |
78 return result | 93 return self.localize_string(name, self._params["localedata"], escapes, lin
ks) |
79 | 94 |
80 class Linkifier(markdown.postprocessors.Postprocessor): | 95 return re.sub( |
81 def __init__(self, params): | 96 r"\$([\w\-]+)(\([^()$]+\))?\$", |
82 self._params = params | 97 lookup_string, |
83 | 98 text |
84 def process_link(self, match): | 99 ) |
| 100 |
| 101 def process_links(self, text): |
| 102 def process_link(match): |
85 pre, attr, url, post = match.groups() | 103 pre, attr, url, post = match.groups() |
86 url = jinja2.Markup(url).unescape() | 104 url = jinja2.Markup(url).unescape() |
87 | 105 |
88 locale, new_url = self._params["source"].resolve_link(url, self._params["l
ocale"]) | 106 locale, new_url = self._params["source"].resolve_link(url, self._params["l
ocale"]) |
89 if new_url != None: | 107 if new_url != None: |
90 url = new_url | 108 url = new_url |
91 if attr == "href": | 109 if attr == "href": |
92 post += ' hreflang="%s"' % jinja2.Markup.escape(locale) | 110 post += ' hreflang="%s"' % jinja2.Markup.escape(locale) |
93 | 111 |
94 return "".join((pre, jinja2.Markup.escape(url), post)) | 112 return "".join((pre, jinja2.Markup.escape(url), post)) |
95 | 113 |
96 def run(self, text): | 114 text = re.sub(r"(<a\s[^<>]*\b(href)=\")([^<>\"]+)(\")", process_link, text) |
97 text = re.sub(r"(<a [^<>]*\b(href)=\")([^<>\"]+)(\")", self.process_link,
text) | 115 text = re.sub(r"(<img\s[^<>]*\b(src)=\")([^<>\"]+)(\")", process_link, text) |
98 text = re.sub(r"(<img [^<>]*\b(src)=\")([^<>\"]+)(\")", self.process_link,
text) | 116 return text |
99 return text | 117 |
100 | 118 def resolve_includes(self, text): |
101 def __init__(self, params, key="pagedata"): | 119 def resolve_include(match): |
102 self._params = params | 120 global converters |
103 self._splithead = key == "pagedata" | 121 name = match.group(1) |
104 | 122 for format, converter_class in converters.iteritems(): |
105 self._md = markdown.Markdown(output="html5", extensions=["attr_list"]) | 123 if self._params["source"].has_include(name, format): |
106 self._md.preprocessors.add("localizer", self.Localizer(params), "_begin") | 124 self._params["includedata"] = self._params["source"].read_include(name
, format) |
107 self._md.postprocessors.add("linkifier", self.Linkifier(params), "_end") | 125 converter = converter_class(self._params, key="includedata") |
108 | 126 return converter() |
109 params["pagedata"] = params["pagedata"].decode("utf-8") | 127 raise Exception("Failed to resolve include %s in page %s" % (name, self._p
arams["page"])) |
110 | 128 |
111 # Read in any parameters specified at the beginning of the file | 129 return re.sub(r'<\?\s*include\s+([^\s<>"]+)\s*\?>', resolve_include, text) |
112 lines = params["pagedata"].splitlines(True) | |
113 while len(lines) and re.search(r"^\s*[\w\-]+\s*=", lines[0]): | |
114 key, value = lines.pop(0).split("=", 1) | |
115 params[key.strip()] = value.strip() | |
116 params["pagedata"] = "".join(lines) | |
117 | 130 |
118 def __call__(self): | 131 def __call__(self): |
119 def beautify_entities(match): | 132 result = self.get_html(self._params[self._key]) |
120 escape = { | 133 result = self.resolve_includes(result) |
121 "<": "<", | 134 if self._key == "pagedata": |
122 ">": ">", | 135 head = [] |
123 "&": "&", | 136 def add_to_head(match): |
124 "\"": """, | 137 head.append(match.group(1)) |
125 "'": "'", | 138 return "" |
126 } | 139 body = re.sub(r"<head>(.*?)</head>", add_to_head, result, flags=re.S) |
127 char = chr(int(match.group(1))) | 140 return "".join(head), body |
128 return escape.get(char, char) | |
129 | |
130 result = self._md.convert(self._params["pagedata"]) | |
131 result = re.sub(r"&#(\d+);", beautify_entities, result).encode("utf-8") | |
132 | |
133 if self._splithead: | |
134 return split_head(result) | |
135 else: | 141 else: |
136 return result | 142 return result |
137 | 143 |
138 class TemplateConverter: | 144 class RawConverter(Converter): |
139 def __init__(self, params, key="pagedata"): | 145 def get_html(self, source): |
140 self._params = params | 146 result = self.insert_localized_strings(source, html_escapes) |
141 self._splithead = key == "pagedata" | 147 result = self.process_links(result) |
| 148 return result |
| 149 |
| 150 class MarkdownConverter(Converter): |
| 151 def get_html(self, source): |
| 152 def remove_unnecessary_entities(match): |
| 153 char = chr(int(match.group(1))) |
| 154 if char in html_escapes: |
| 155 return match.group(0) |
| 156 else: |
| 157 return char |
| 158 |
| 159 escapes = {} |
| 160 for char in markdown.Markdown.ESCAPED_CHARS: |
| 161 escapes[char] = "&#" + str(ord(char)) + ";" |
| 162 for key, value in html_escapes.iteritems(): |
| 163 escapes[key] = value |
| 164 |
| 165 result = self.insert_localized_strings(source, escapes) |
| 166 result = markdown.Markdown(output="html5", extensions=["attr_list"]).convert
(result) |
| 167 result = re.sub(r"&#(\d+);", remove_unnecessary_entities, result) |
| 168 result = self.process_links(result) |
| 169 return result |
| 170 |
| 171 class TemplateConverter(Converter): |
| 172 def __init__(self, *args, **kwargs): |
| 173 Converter.__init__(self, *args, **kwargs) |
| 174 |
142 filters = { | 175 filters = { |
143 "translate": self.translate, | 176 "translate": self.translate, |
144 "linkify": self.linkify, | 177 "linkify": self.linkify, |
145 "toclist": self.toclist, | 178 "toclist": self.toclist, |
146 } | 179 } |
147 env = get_custom_template_environment(filters) | 180 self._env = get_custom_template_environment(filters) |
148 self._template = env.from_string(params[key].decode("utf-8")) | 181 |
149 | 182 def get_html(self, source): |
150 def __call__(self): | 183 template = self._env.from_string(source) |
151 result = self._template.render(self._params).encode("utf-8") | 184 return template.render(self._params) |
152 if self._splithead: | 185 |
153 return split_head(result) | 186 def translate(self, name, page=None, links=[]): |
154 else: | |
155 return result | |
156 | |
157 def translate(self, name, page=None): | |
158 if page == None: | 187 if page == None: |
159 localedata = self._params["localedata"] | 188 localedata = self._params["localedata"] |
160 else: | 189 else: |
161 localedata = self._params["source"].read_locale(self._params["locale"], pa
ge) | 190 localedata = self._params["source"].read_locale(self._params["locale"], pa
ge) |
162 | 191 return jinja2.Markup(self.localize_string(name, localedata, html_escapes, li
nks=links)) |
163 try: | |
164 return localedata[name] | |
165 except KeyError: | |
166 raise Exception("Lookup failed for string %s used on page %s" % (name, sel
f._params["page"])) | |
167 | 192 |
168 def linkify(self, page, locale=None): | 193 def linkify(self, page, locale=None): |
169 if locale == None: | 194 if locale == None: |
170 locale = self._params["locale"] | 195 locale = self._params["locale"] |
171 | 196 |
172 locale, url = self._params["source"].resolve_link(page, locale) | 197 locale, url = self._params["source"].resolve_link(page, locale) |
173 return jinja2.Markup('<a href="%s" hreflang="%s">' % ( | 198 return jinja2.Markup('<a href="%s" hreflang="%s">' % ( |
174 jinja2.Markup.escape(url), | 199 jinja2.Markup.escape(url), |
175 jinja2.Markup.escape(locale) | 200 jinja2.Markup.escape(locale) |
176 )) | 201 )) |
177 | 202 |
178 def toclist(self, content): | 203 def toclist(self, content): |
179 flat = [] | 204 flat = [] |
180 for match in re.finditer(r'<h(\d) [^<>]*\bid="([^<>"]+)"[^<>]*>(.*?)</h\1>',
content, re.S): | 205 for match in re.finditer(r'<h(\d)\s[^<>]*\bid="([^<>"]+)"[^<>]*>(.*?)</h\1>'
, content, re.S): |
181 flat.append({ | 206 flat.append({ |
182 "level": int(match.group(1)), | 207 "level": int(match.group(1)), |
183 "anchor": jinja2.Markup(match.group(2)).unescape(), | 208 "anchor": jinja2.Markup(match.group(2)).unescape(), |
184 "title": jinja2.Markup(match.group(3)).unescape(), | 209 "title": jinja2.Markup(match.group(3)).unescape(), |
185 "subitems": [], | 210 "subitems": [], |
186 }) | 211 }) |
187 | 212 |
188 structured = [] | 213 structured = [] |
189 stack = [{"level": 0, "subitems": structured}] | 214 stack = [{"level": 0, "subitems": structured}] |
190 for item in flat: | 215 for item in flat: |
191 while stack[-1]["level"] >= item["level"]: | 216 while stack[-1]["level"] >= item["level"]: |
192 stack.pop() | 217 stack.pop() |
193 stack[-1]["subitems"].append(item) | 218 stack[-1]["subitems"].append(item) |
194 stack.append(item) | 219 stack.append(item) |
195 return structured | 220 return structured |
196 | 221 |
197 converters = { | 222 converters = { |
| 223 "raw": RawConverter, |
198 "md": MarkdownConverter, | 224 "md": MarkdownConverter, |
199 "tmpl": TemplateConverter, | 225 "tmpl": TemplateConverter, |
200 } | 226 } |
LEFT | RIGHT |