OLD | NEW |
| (Empty) |
1 # coding: utf-8 | |
2 | |
3 # This file is part of the Adblock Plus web scripts, | |
4 # Copyright (C) 2006-2015 Eyeo GmbH | |
5 # | |
6 # Adblock Plus is free software: you can redistribute it and/or modify | |
7 # it under the terms of the GNU General Public License version 3 as | |
8 # published by the Free Software Foundation. | |
9 # | |
10 # Adblock Plus is distributed in the hope that it will be useful, | |
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
13 # GNU General Public License for more details. | |
14 # | |
15 # You should have received a copy of the GNU General Public License | |
16 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. | |
17 | |
18 import os, imp, re, jinja2, markdown | |
19 from ..utils import get_custom_template_environment | |
20 | |
21 # Monkey-patch Markdown's isBlockLevel function to ensure that no paragraphs are | |
22 # inserted into the <head> tag | |
23 orig_isBlockLevel = markdown.util.isBlockLevel | |
24 def isBlockLevel(tag): | |
25 if tag == "head": | |
26 return True | |
27 else: | |
28 return orig_isBlockLevel(tag) | |
29 markdown.util.isBlockLevel = isBlockLevel | |
30 | |
31 html_escapes = { | |
32 "<": "<", | |
33 ">": ">", | |
34 "&": "&", | |
35 "\"": """, | |
36 "'": "'", | |
37 } | |
38 | |
39 class Converter: | |
40 def __init__(self, params, key="pagedata"): | |
41 self._params = params | |
42 self._key = key | |
43 | |
44 # Read in any parameters specified at the beginning of the file | |
45 lines = params[key].splitlines(True) | |
46 while lines and re.search(r"^\s*[\w\-]+\s*=", lines[0]): | |
47 name, value = lines.pop(0).split("=", 1) | |
48 params[name.strip()] = value.strip() | |
49 params[key] = "".join(lines) | |
50 | |
51 def localize_string(self, name, localedata, escapes, links=[]): | |
52 def escape(s): | |
53 return re.sub(r".", | |
54 lambda match: escapes.get(match.group(0), match.group(0)), | |
55 s, flags=re.S) | |
56 def re_escape(s): | |
57 return re.escape(escape(s)) | |
58 | |
59 try: | |
60 result = localedata[name].strip() | |
61 except KeyError: | |
62 raise Exception("Lookup failed for string %s used on page %s" % (name, sel
f._params["page"])) | |
63 | |
64 # Insert links | |
65 result = escape(result) | |
66 while links: | |
67 result = re.sub( | |
68 r"%s([^<>]*?)%s" % (re_escape("<a>"), re_escape("</a>")), | |
69 r'<a href="%s">\1</a>' % links.pop(0), | |
70 result, 1, flags=re.S | |
71 ) | |
72 | |
73 # <strong> and <em> tags are allowed | |
74 result = re.sub( | |
75 r"%s([^<>]*?)%s" % (re_escape("<strong>"), re_escape("</strong>")), | |
76 r"<strong>\1</strong>", | |
77 result, flags=re.S | |
78 ) | |
79 result = re.sub( | |
80 r"%s([^<>]*?)%s" % (re_escape("<em>"), re_escape("</em>")), | |
81 r"<em>\1</em>", | |
82 result, flags=re.S | |
83 ) | |
84 return result | |
85 | |
86 def insert_localized_strings(self, text, escapes): | |
87 def lookup_string(match): | |
88 name, links = match.groups() | |
89 if links: | |
90 links = map(unicode.strip, links.strip("()").split(",")) | |
91 else: | |
92 links = [] | |
93 return self.localize_string(name, self._params["localedata"], escapes, lin
ks) | |
94 | |
95 return re.sub( | |
96 r"\$([\w\-]+)(\([^()$]+\))?\$", | |
97 lookup_string, | |
98 text | |
99 ) | |
100 | |
101 def process_links(self, text): | |
102 def process_link(match): | |
103 pre, attr, url, post = match.groups() | |
104 url = jinja2.Markup(url).unescape() | |
105 | |
106 locale, new_url = self._params["source"].resolve_link(url, self._params["l
ocale"]) | |
107 if new_url != None: | |
108 url = new_url | |
109 if attr == "href": | |
110 post += ' hreflang="%s"' % jinja2.Markup.escape(locale) | |
111 | |
112 return "".join((pre, jinja2.Markup.escape(url), post)) | |
113 | |
114 text = re.sub(r"(<a\s[^<>]*\b(href)=\")([^<>\"]+)(\")", process_link, text) | |
115 text = re.sub(r"(<img\s[^<>]*\b(src)=\")([^<>\"]+)(\")", process_link, text) | |
116 return text | |
117 | |
118 include_start_regex = '<' | |
119 include_end_regex = '>' | |
120 | |
121 def resolve_includes(self, text): | |
122 def resolve_include(match): | |
123 global converters | |
124 name = match.group(1) | |
125 for format, converter_class in converters.iteritems(): | |
126 if self._params["source"].has_include(name, format): | |
127 self._params["includedata"] = self._params["source"].read_include(name
, format) | |
128 converter = converter_class(self._params, key="includedata") | |
129 return converter() | |
130 raise Exception("Failed to resolve include %s in page %s" % (name, self._p
arams["page"])) | |
131 | |
132 return re.sub( | |
133 r'%s\?\s*include\s+([^\s<>"]+)\s*\?%s' % ( | |
134 self.include_start_regex, | |
135 self.include_end_regex | |
136 ), | |
137 resolve_include, | |
138 text | |
139 ) | |
140 | |
141 def __call__(self): | |
142 result = self.get_html(self._params[self._key]) | |
143 result = self.resolve_includes(result) | |
144 if self._key == "pagedata": | |
145 head = [] | |
146 def add_to_head(match): | |
147 head.append(match.group(1)) | |
148 return "" | |
149 body = re.sub(r"<head>(.*?)</head>", add_to_head, result, flags=re.S) | |
150 return "".join(head), body | |
151 else: | |
152 return result | |
153 | |
154 class RawConverter(Converter): | |
155 def get_html(self, source): | |
156 result = self.insert_localized_strings(source, html_escapes) | |
157 result = self.process_links(result) | |
158 return result | |
159 | |
160 class MarkdownConverter(Converter): | |
161 include_start_regex = r'(?:%s|%s)' % ( | |
162 Converter.include_start_regex, | |
163 re.escape(jinja2.escape(Converter.include_start_regex)) | |
164 ) | |
165 include_end_regex = r'(?:%s|%s)' % ( | |
166 Converter.include_end_regex, | |
167 re.escape(jinja2.escape(Converter.include_end_regex)) | |
168 ) | |
169 | |
170 def get_html(self, source): | |
171 def remove_unnecessary_entities(match): | |
172 char = unichr(int(match.group(1))) | |
173 if char in html_escapes: | |
174 return match.group(0) | |
175 else: | |
176 return char | |
177 | |
178 escapes = {} | |
179 for char in markdown.Markdown.ESCAPED_CHARS: | |
180 escapes[char] = "&#" + str(ord(char)) + ";" | |
181 for key, value in html_escapes.iteritems(): | |
182 escapes[key] = value | |
183 | |
184 md = markdown.Markdown(output="html5", extensions=["attr_list"]) | |
185 md.preprocessors["html_block"].markdown_in_raw = True | |
186 | |
187 result = self.insert_localized_strings(source, escapes) | |
188 result = md.convert(result) | |
189 result = re.sub(r"&#(\d+);", remove_unnecessary_entities, result) | |
190 result = self.process_links(result) | |
191 return result | |
192 | |
193 class TemplateConverter(Converter): | |
194 class _SourceLoader(jinja2.BaseLoader): | |
195 def __init__(self, source): | |
196 self.source = source | |
197 | |
198 def get_source(self, environment, template): | |
199 try: | |
200 return self.source.read_file(template + ".tmpl"), None, None | |
201 except Exception: | |
202 raise jinja2.TemplateNotFound(template) | |
203 | |
204 def __init__(self, *args, **kwargs): | |
205 Converter.__init__(self, *args, **kwargs) | |
206 | |
207 filters = { | |
208 "translate": self.translate, | |
209 "linkify": self.linkify, | |
210 "toclist": self.toclist, | |
211 } | |
212 | |
213 for filename in self._params["source"].list_files("filters"): | |
214 root, ext = os.path.splitext(filename) | |
215 if ext.lower() != ".py": | |
216 continue | |
217 | |
218 path = "%s/%s" % ("filters", filename) | |
219 code = self._params["source"].read_file(path) | |
220 module = imp.new_module(root.replace("/", ".")) | |
221 exec code in module.__dict__ | |
222 | |
223 func = os.path.basename(root) | |
224 if not hasattr(module, func): | |
225 raise Exception("Expected function %s not found in filter file %s" % (fu
nc, filename)) | |
226 filters[func] = getattr(module, func) | |
227 filters[func].module_ref = module # Prevent garbage collection | |
228 | |
229 self._env = get_custom_template_environment(filters, self._SourceLoader(self
._params["source"])) | |
230 | |
231 def get_html(self, source): | |
232 template = self._env.from_string(source) | |
233 return template.render(self._params) | |
234 | |
235 def translate(self, name, page=None, links=[]): | |
236 if page == None: | |
237 localedata = self._params["localedata"] | |
238 else: | |
239 localedata = self._params["source"].read_locale(self._params["locale"], pa
ge) | |
240 return jinja2.Markup(self.localize_string(name, localedata, html_escapes, li
nks=links)) | |
241 | |
242 def linkify(self, page, locale=None, **attrs): | |
243 if locale == None: | |
244 locale = self._params["locale"] | |
245 | |
246 locale, url = self._params["source"].resolve_link(page, locale) | |
247 return jinja2.Markup('<a%s>' % ''.join( | |
248 ' %s="%s"' % (name, jinja2.escape(value)) for name, value in [ | |
249 ('href', url), | |
250 ('hreflang', locale) | |
251 ] + attrs.items() | |
252 )) | |
253 | |
254 def toclist(self, content): | |
255 flat = [] | |
256 for match in re.finditer(r'<h(\d)\s[^<>]*\bid="([^<>"]+)"[^<>]*>(.*?)</h\1>'
, content, re.S): | |
257 flat.append({ | |
258 "level": int(match.group(1)), | |
259 "anchor": jinja2.Markup(match.group(2)).unescape(), | |
260 "title": jinja2.Markup(match.group(3)).unescape(), | |
261 "subitems": [], | |
262 }) | |
263 | |
264 structured = [] | |
265 stack = [{"level": 0, "subitems": structured}] | |
266 for item in flat: | |
267 while stack[-1]["level"] >= item["level"]: | |
268 stack.pop() | |
269 stack[-1]["subitems"].append(item) | |
270 stack.append(item) | |
271 return structured | |
272 | |
273 converters = { | |
274 "raw": RawConverter, | |
275 "md": MarkdownConverter, | |
276 "tmpl": TemplateConverter, | |
277 } | |
OLD | NEW |