Left: | ||
Right: |
OLD | NEW |
---|---|
1 # coding: utf-8 | 1 # coding: utf-8 |
2 | 2 |
3 # This file is part of the Adblock Plus web scripts, | 3 # This file is part of the Adblock Plus web scripts, |
4 # Copyright (C) 2006-2015 Eyeo GmbH | 4 # Copyright (C) 2006-2015 Eyeo GmbH |
5 # | 5 # |
6 # Adblock Plus is free software: you can redistribute it and/or modify | 6 # Adblock Plus is free software: you can redistribute it and/or modify |
7 # it under the terms of the GNU General Public License version 3 as | 7 # it under the terms of the GNU General Public License version 3 as |
8 # published by the Free Software Foundation. | 8 # published by the Free Software Foundation. |
9 # | 9 # |
10 # Adblock Plus is distributed in the hope that it will be useful, | 10 # Adblock Plus is distributed in the hope that it will be useful, |
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of | 11 # but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | 12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
13 # GNU General Public License for more details. | 13 # GNU General Public License for more details. |
14 # | 14 # |
15 # You should have received a copy of the GNU General Public License | 15 # You should have received a copy of the GNU General Public License |
16 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. | 16 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. |
17 | 17 |
18 import os, imp, re, jinja2, markdown | 18 import os |
19 import HTMLParser | |
20 import imp | |
21 import re | |
22 | |
23 import jinja2 | |
24 import markdown | |
25 | |
19 | 26 |
20 # Monkey-patch Markdown's isBlockLevel function to ensure that no paragraphs are | 27 # Monkey-patch Markdown's isBlockLevel function to ensure that no paragraphs are |
21 # inserted into the <head> tag | 28 # inserted into the <head> tag |
22 orig_isBlockLevel = markdown.util.isBlockLevel | 29 orig_isBlockLevel = markdown.util.isBlockLevel |
23 def isBlockLevel(tag): | 30 def isBlockLevel(tag): |
24 if tag == "head": | 31 if tag == "head": |
25 return True | 32 return True |
26 else: | 33 else: |
27 return orig_isBlockLevel(tag) | 34 return orig_isBlockLevel(tag) |
28 markdown.util.isBlockLevel = isBlockLevel | 35 markdown.util.isBlockLevel = isBlockLevel |
29 | 36 |
30 html_escapes = { | 37 html_escapes = { |
31 "<": "<", | 38 "<": "<", |
32 ">": ">", | 39 ">": ">", |
33 "&": "&", | 40 "&": "&", |
34 "\"": """, | 41 "\"": """, |
35 "'": "'", | 42 "'": "'", |
36 } | 43 } |
37 | 44 |
45 class AttributeParser(HTMLParser.HTMLParser): | |
Sebastian Noack
2015/03/12 20:33:46
That makes actually sense, using a proper parser i
Wladimir Palant
2015/03/12 20:57:02
I actually tested this - invalid HTML will be norm
| |
46 _string = None | |
47 _attrs = None | |
48 | |
49 def __init__(self, whitelist): | |
50 self._whitelist = whitelist | |
51 | |
52 def parse(self, text, pagename): | |
53 self.reset() | |
54 self._string = "" | |
Sebastian Noack
2015/03/12 20:33:46
I'd rather use a list here, joining it when done.
Wladimir Palant
2015/03/12 20:57:02
Done.
| |
55 self._attrs = {} | |
56 self._pagename = pagename | |
57 | |
58 try: | |
59 self.feed(text) | |
60 return self._string, self._attrs | |
61 finally: | |
62 self._string = None | |
63 self._attrs = None | |
64 self._pagename = None | |
65 | |
66 def handle_starttag(self, tag, attrs): | |
67 if tag not in self._whitelist: | |
68 raise Exception("Unexpected HTML tag '%s' in localizable string on page %s " % (tag, self._pagename)) | |
69 self._attrs.setdefault(tag, []).append(attrs) | |
70 self._string += "<%s>" % tag | |
71 | |
72 def handle_endtag(self, tag): | |
73 self._string += "</%s>" % tag | |
74 | |
75 def handle_data(self, data): | |
76 # Note: lack of escaping here is intentional. The result is a locale string, | |
77 # HTML escaping is applied when this string is inserted into the document. | |
78 self._string += data | |
79 | |
80 def handle_entityref(self, name): | |
81 self._string += self.unescape("&%s;" % name) | |
82 | |
83 def handle_charref(self, name): | |
84 self._string += self.unescape("&#%s;" % name) | |
85 | |
38 class Converter: | 86 class Converter: |
87 whitelist = set(["a", "em", "strong"]) | |
88 | |
39 def __init__(self, params, key="pagedata"): | 89 def __init__(self, params, key="pagedata"): |
40 self._params = params | 90 self._params = params |
41 self._key = key | 91 self._key = key |
92 self._attribute_parser = AttributeParser(self.whitelist) | |
42 | 93 |
43 # Read in any parameters specified at the beginning of the file | 94 # Read in any parameters specified at the beginning of the file |
44 lines = params[key].splitlines(True) | 95 lines = params[key].splitlines(True) |
45 while lines and re.search(r"^\s*[\w\-]+\s*=", lines[0]): | 96 while lines and re.search(r"^\s*[\w\-]+\s*=", lines[0]): |
46 name, value = lines.pop(0).split("=", 1) | 97 name, value = lines.pop(0).split("=", 1) |
47 params[name.strip()] = value.strip() | 98 params[name.strip()] = value.strip() |
48 params[key] = "".join(lines) | 99 params[key] = "".join(lines) |
49 | 100 |
50 def localize_string(self, name, localedata, escapes, links=[]): | 101 def localize_string(self, name, default, localedata, escapes): |
51 def escape(s): | 102 def escape(s): |
52 return re.sub(r".", | 103 return re.sub(r".", |
53 lambda match: escapes.get(match.group(0), match.group(0)), | 104 lambda match: escapes.get(match.group(0), match.group(0)), |
54 s, flags=re.S) | 105 s, flags=re.S) |
55 def re_escape(s): | 106 def re_escape(s): |
56 return re.escape(escape(s)) | 107 return re.escape(escape(s)) |
57 | 108 |
58 try: | 109 # Extract tag attributes from default string |
110 default, saved_attributes = self._attribute_parser.parse(default, self._para ms["page"]) | |
111 | |
112 # Get translation | |
113 if self._params["locale"] != self._params["defaultlocale"] and name in local edata: | |
59 result = localedata[name].strip() | 114 result = localedata[name].strip() |
60 except KeyError: | 115 else: |
61 raise Exception("Lookup failed for string %s used on page %s" % (name, sel f._params["page"])) | 116 result = default |
62 | 117 |
63 # Insert links | 118 # Insert attributes |
64 result = escape(result) | 119 result = escape(result) |
65 while links: | 120 for tag in self.whitelist: |
121 saved = saved_attributes.get(tag, []) | |
122 for attrs in saved: | |
123 attrs = map(lambda (name, value): '%s="%s"' % (escape(name), escape(valu e)), attrs) | |
124 result = re.sub( | |
125 r"%s([^<>]*?)%s" % (re_escape("<%s>" % tag), re_escape("</%s>" % tag)) , | |
126 r'<%s %s>\1</%s>' % (tag, " ".join(attrs), tag), | |
127 result, 1, flags=re.S | |
128 ) | |
66 result = re.sub( | 129 result = re.sub( |
67 r"%s([^<>]*?)%s" % (re_escape("<a>"), re_escape("</a>")), | 130 r"%s([^<>]*?)%s" % (re_escape("<%s>" % tag), re_escape("</%s>" % tag)), |
68 r'<a href="%s">\1</a>' % links.pop(0), | 131 r"<%s>\1</%s>" % (tag, tag), |
69 result, 1, flags=re.S | 132 result, flags=re.S |
70 ) | 133 ) |
71 | |
72 # <strong> and <em> tags are allowed | |
73 result = re.sub( | |
74 r"%s([^<>]*?)%s" % (re_escape("<strong>"), re_escape("</strong>")), | |
75 r"<strong>\1</strong>", | |
76 result, flags=re.S | |
77 ) | |
78 result = re.sub( | |
79 r"%s([^<>]*?)%s" % (re_escape("<em>"), re_escape("</em>")), | |
80 r"<em>\1</em>", | |
81 result, flags=re.S | |
82 ) | |
83 return result | 134 return result |
84 | 135 |
85 def insert_localized_strings(self, text, escapes): | 136 def insert_localized_strings(self, text, escapes, to_html=lambda s: s): |
86 def lookup_string(match): | 137 def lookup_string(match): |
87 name, links = match.groups() | 138 name, comment, default = match.groups() |
88 if links: | 139 default = to_html(default).strip() |
89 links = map(unicode.strip, links.strip("()").split(",")) | 140 |
90 else: | 141 # Note: We currently ignore the comment, it is only relevant when |
91 links = [] | 142 # generating the master translation. |
92 return self.localize_string(name, self._params["localedata"], escapes, lin ks) | 143 return self.localize_string(name, default, self._params["localedata"], esc apes) |
93 | 144 |
94 return re.sub( | 145 return re.sub( |
95 r"\$([\w\-]+)(\([^()$]+\))?\$", | 146 r"\{\{\s*([\w\-]+)(?:\[(.*?)\])?\s+(.*?)\}\}", |
96 lookup_string, | 147 lookup_string, |
97 text | 148 text, |
149 flags=re.S | |
98 ) | 150 ) |
99 | 151 |
100 def process_links(self, text): | 152 def process_links(self, text): |
101 def process_link(match): | 153 def process_link(match): |
102 pre, attr, url, post = match.groups() | 154 pre, attr, url, post = match.groups() |
103 url = jinja2.Markup(url).unescape() | 155 url = jinja2.Markup(url).unescape() |
104 | 156 |
105 locale, new_url = self._params["source"].resolve_link(url, self._params["l ocale"]) | 157 locale, new_url = self._params["source"].resolve_link(url, self._params["l ocale"]) |
106 if new_url != None: | 158 if new_url != None: |
107 url = new_url | 159 url = new_url |
(...skipping 11 matching lines...) Expand all Loading... | |
119 | 171 |
120 def resolve_includes(self, text): | 172 def resolve_includes(self, text): |
121 def resolve_include(match): | 173 def resolve_include(match): |
122 global converters | 174 global converters |
123 name = match.group(1) | 175 name = match.group(1) |
124 for format, converter_class in converters.iteritems(): | 176 for format, converter_class in converters.iteritems(): |
125 if self._params["source"].has_include(name, format): | 177 if self._params["source"].has_include(name, format): |
126 self._params["includedata"] = self._params["source"].read_include(name , format) | 178 self._params["includedata"] = self._params["source"].read_include(name , format) |
127 converter = converter_class(self._params, key="includedata") | 179 converter = converter_class(self._params, key="includedata") |
128 return converter() | 180 return converter() |
129 raise Exception("Failed to resolve include %s in page %s" % (name, self._p arams["page"])) | 181 raise Exception("Failed to resolve include %s on page %s" % (name, self._p arams["page"])) |
130 | 182 |
131 return re.sub( | 183 return re.sub( |
132 r'%s\?\s*include\s+([^\s<>"]+)\s*\?%s' % ( | 184 r'%s\?\s*include\s+([^\s<>"]+)\s*\?%s' % ( |
133 self.include_start_regex, | 185 self.include_start_regex, |
134 self.include_end_regex | 186 self.include_end_regex |
135 ), | 187 ), |
136 resolve_include, | 188 resolve_include, |
137 text | 189 text |
138 ) | 190 ) |
139 | 191 |
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
176 | 228 |
177 escapes = {} | 229 escapes = {} |
178 for char in markdown.Markdown.ESCAPED_CHARS: | 230 for char in markdown.Markdown.ESCAPED_CHARS: |
179 escapes[char] = "&#" + str(ord(char)) + ";" | 231 escapes[char] = "&#" + str(ord(char)) + ";" |
180 for key, value in html_escapes.iteritems(): | 232 for key, value in html_escapes.iteritems(): |
181 escapes[key] = value | 233 escapes[key] = value |
182 | 234 |
183 md = markdown.Markdown(output="html5", extensions=["attr_list"]) | 235 md = markdown.Markdown(output="html5", extensions=["attr_list"]) |
184 md.preprocessors["html_block"].markdown_in_raw = True | 236 md.preprocessors["html_block"].markdown_in_raw = True |
185 | 237 |
186 result = self.insert_localized_strings(source, escapes) | 238 def to_html(s): |
239 return re.sub(r'</?p>', '', md.convert(s)) | |
240 | |
241 result = self.insert_localized_strings(source, escapes, to_html) | |
187 result = md.convert(result) | 242 result = md.convert(result) |
188 result = re.sub(r"&#(\d+);", remove_unnecessary_entities, result) | 243 result = re.sub(r"&#(\d+);", remove_unnecessary_entities, result) |
189 result = self.process_links(result) | 244 result = self.process_links(result) |
190 return result | 245 return result |
191 | 246 |
192 class TemplateConverter(Converter): | 247 class TemplateConverter(Converter): |
193 class _SourceLoader(jinja2.BaseLoader): | 248 class _SourceLoader(jinja2.BaseLoader): |
194 def __init__(self, source): | 249 def __init__(self, source): |
195 self.source = source | 250 self.source = source |
196 | 251 |
197 def get_source(self, environment, template): | 252 def get_source(self, environment, template): |
198 try: | 253 try: |
199 return self.source.read_file(template + ".tmpl"), None, None | 254 return self.source.read_file(template + ".tmpl"), None, None |
200 except Exception: | 255 except Exception: |
201 raise jinja2.TemplateNotFound(template) | 256 raise jinja2.TemplateNotFound(template) |
202 | 257 |
203 def __init__(self, *args, **kwargs): | 258 def __init__(self, *args, **kwargs): |
204 Converter.__init__(self, *args, **kwargs) | 259 Converter.__init__(self, *args, **kwargs) |
205 | 260 |
206 filters = { | 261 filters = { |
207 "translate": self.translate, | 262 "translate": self.translate, |
208 "linkify": self.linkify, | 263 "linkify": self.linkify, |
209 "toclist": self.toclist, | 264 "toclist": self.toclist, |
210 } | 265 } |
211 | 266 |
267 globals = { | |
268 "get_string": self.get_string, | |
269 } | |
270 | |
212 for filename in self._params["source"].list_files("filters"): | 271 for filename in self._params["source"].list_files("filters"): |
213 root, ext = os.path.splitext(filename) | 272 root, ext = os.path.splitext(filename) |
214 if ext.lower() != ".py": | 273 if ext.lower() != ".py": |
215 continue | 274 continue |
216 | 275 |
217 path = "%s/%s" % ("filters", filename) | 276 path = "%s/%s" % ("filters", filename) |
218 code = self._params["source"].read_file(path) | 277 code = self._params["source"].read_file(path) |
219 module = imp.new_module(root.replace("/", ".")) | 278 module = imp.new_module(root.replace("/", ".")) |
220 exec code in module.__dict__ | 279 exec code in module.__dict__ |
221 | 280 |
222 func = os.path.basename(root) | 281 func = os.path.basename(root) |
223 if not hasattr(module, func): | 282 if not hasattr(module, func): |
224 raise Exception("Expected function %s not found in filter file %s" % (fu nc, filename)) | 283 raise Exception("Expected function %s not found in filter file %s" % (fu nc, filename)) |
225 filters[func] = getattr(module, func) | 284 filters[func] = getattr(module, func) |
226 filters[func].module_ref = module # Prevent garbage collection | 285 filters[func].module_ref = module # Prevent garbage collection |
227 | 286 |
228 self._env = jinja2.Environment(loader=self._SourceLoader(self._params["sourc e"]), autoescape=True) | 287 self._env = jinja2.Environment(loader=self._SourceLoader(self._params["sourc e"]), autoescape=True) |
229 self._env.filters.update(filters) | 288 self._env.filters.update(filters) |
289 self._env.globals.update(globals) | |
230 | 290 |
231 def get_html(self, source): | 291 def get_html(self, source): |
232 template = self._env.from_string(source) | 292 template = self._env.from_string(source) |
233 return template.render(self._params) | 293 return template.render(self._params) |
234 | 294 |
235 def translate(self, name, page=None, links=[]): | 295 def translate(self, default, name, comment=None): |
236 if page == None: | 296 # Note: We currently ignore the comment, it is only relevant when |
237 localedata = self._params["localedata"] | 297 # generating the master translation. |
238 else: | 298 localedata = self._params["localedata"] |
239 localedata = self._params["source"].read_locale(self._params["locale"], pa ge) | 299 return jinja2.Markup(self.localize_string(name, default, localedata, html_es capes)) |
240 return jinja2.Markup(self.localize_string(name, localedata, html_escapes, li nks=links)) | 300 |
301 def get_string(self, name, page): | |
302 localedata = self._params["source"].read_locale(self._params["locale"], page ) | |
303 default = localedata[name] | |
304 return jinja2.Markup(self.localize_string(name, default, localedata, html_es capes)) | |
241 | 305 |
242 def linkify(self, page, locale=None, **attrs): | 306 def linkify(self, page, locale=None, **attrs): |
243 if locale == None: | 307 if locale is None: |
244 locale = self._params["locale"] | 308 locale = self._params["locale"] |
245 | 309 |
246 locale, url = self._params["source"].resolve_link(page, locale) | 310 locale, url = self._params["source"].resolve_link(page, locale) |
247 return jinja2.Markup('<a%s>' % ''.join( | 311 return jinja2.Markup('<a%s>' % ''.join( |
248 ' %s="%s"' % (name, jinja2.escape(value)) for name, value in [ | 312 ' %s="%s"' % (name, jinja2.escape(value)) for name, value in [ |
249 ('href', url), | 313 ('href', url), |
250 ('hreflang', locale) | 314 ('hreflang', locale) |
251 ] + attrs.items() | 315 ] + attrs.items() |
252 )) | 316 )) |
253 | 317 |
(...skipping 14 matching lines...) Expand all Loading... | |
268 stack.pop() | 332 stack.pop() |
269 stack[-1]["subitems"].append(item) | 333 stack[-1]["subitems"].append(item) |
270 stack.append(item) | 334 stack.append(item) |
271 return structured | 335 return structured |
272 | 336 |
273 converters = { | 337 converters = { |
274 "raw": RawConverter, | 338 "raw": RawConverter, |
275 "md": MarkdownConverter, | 339 "md": MarkdownConverter, |
276 "tmpl": TemplateConverter, | 340 "tmpl": TemplateConverter, |
277 } | 341 } |
OLD | NEW |