LEFT | RIGHT |
1 # coding: utf-8 | 1 # coding: utf-8 |
2 | 2 |
3 # This file is part of the Adblock Plus web scripts, | 3 # This file is part of the Adblock Plus web scripts, |
4 # Copyright (C) 2006-2015 Eyeo GmbH | 4 # Copyright (C) 2006-2015 Eyeo GmbH |
5 # | 5 # |
6 # Adblock Plus is free software: you can redistribute it and/or modify | 6 # Adblock Plus is free software: you can redistribute it and/or modify |
7 # it under the terms of the GNU General Public License version 3 as | 7 # it under the terms of the GNU General Public License version 3 as |
8 # published by the Free Software Foundation. | 8 # published by the Free Software Foundation. |
9 # | 9 # |
10 # Adblock Plus is distributed in the hope that it will be useful, | 10 # Adblock Plus is distributed in the hope that it will be useful, |
(...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
52 self._whitelist = whitelist | 52 self._whitelist = whitelist |
53 | 53 |
54 def parse(self, text, pagename): | 54 def parse(self, text, pagename): |
55 self.reset() | 55 self.reset() |
56 self._string = [] | 56 self._string = [] |
57 self._fixed_strings = [] | 57 self._fixed_strings = [] |
58 self._inside_fixed = False | 58 self._inside_fixed = False |
59 self._attrs = {} | 59 self._attrs = {} |
60 self._pagename = pagename | 60 self._pagename = pagename |
61 | 61 |
| 62 # Force-escape ampersands, otherwise the parser will autocomplete bogus |
| 63 # entities. |
| 64 text = re.sub(r"&(?!\S+;)", "&", text) |
| 65 |
62 try: | 66 try: |
63 self.feed(text) | 67 self.feed(text) |
64 return "".join(self._string), self._attrs, ["".join(s) for s in self._fixe
d_strings] | 68 return "".join(self._string), self._attrs, ["".join(s) for s in self._fixe
d_strings] |
65 finally: | 69 finally: |
66 self._string = None | 70 self._string = None |
67 self._attrs = None | 71 self._attrs = None |
68 self._pagename = None | 72 self._pagename = None |
69 self._inside_fixed = False | 73 self._inside_fixed = False |
70 self._fixed_strings = None | 74 self._fixed_strings = None |
71 | 75 |
(...skipping 27 matching lines...) Expand all Loading... |
99 # HTML escaping is applied when this string is inserted into the document. | 103 # HTML escaping is applied when this string is inserted into the document. |
100 self._append_text(data) | 104 self._append_text(data) |
101 | 105 |
102 def handle_entityref(self, name): | 106 def handle_entityref(self, name): |
103 self._append_text(self.unescape("&%s;" % name)) | 107 self._append_text(self.unescape("&%s;" % name)) |
104 | 108 |
105 def handle_charref(self, name): | 109 def handle_charref(self, name): |
106 self._append_text(self.unescape("&#%s;" % name)) | 110 self._append_text(self.unescape("&#%s;" % name)) |
107 | 111 |
108 class Converter: | 112 class Converter: |
109 whitelist = set(["a", "em", "strong"]) | 113 whitelist = {"a", "em", "strong", "code", "span"} |
110 missing_translations = 0 | 114 missing_translations = 0 |
111 total_translations = 0 | 115 total_translations = 0 |
112 | 116 |
113 def __init__(self, params, key="pagedata"): | 117 def __init__(self, params, key="pagedata"): |
114 self._params = params | 118 self._params = params |
115 self._key = key | 119 self._key = key |
116 self._attribute_parser = AttributeParser(self.whitelist) | 120 self._attribute_parser = AttributeParser(self.whitelist) |
117 | 121 |
118 # Read in any parameters specified at the beginning of the file | 122 # Read in any parameters specified at the beginning of the file |
119 lines = params[key].splitlines(True) | 123 lines = params[key].splitlines(True) |
(...skipping 18 matching lines...) Expand all Loading... |
138 if locale == self._params["defaultlocale"]: | 142 if locale == self._params["defaultlocale"]: |
139 result = default | 143 result = default |
140 elif name in localedata: | 144 elif name in localedata: |
141 result = localedata[name].strip() | 145 result = localedata[name].strip() |
142 else: | 146 else: |
143 result = default | 147 result = default |
144 self.missing_translations += 1 | 148 self.missing_translations += 1 |
145 self.total_translations += 1 | 149 self.total_translations += 1 |
146 | 150 |
147 # Insert fixed strings | 151 # Insert fixed strings |
148 for i in range(len(fixed_strings)): | 152 for i, fixed_string in enumerate(fixed_strings, 1): |
149 result = re.sub(r"\{%d\}" % (i + 1), fixed_strings[i], result, 1) | 153 result = result.replace("{%d}" % i, fixed_string) |
150 | 154 |
151 # Insert attributes | 155 # Insert attributes |
152 result = escape(result) | 156 result = escape(result) |
153 def stringify_attribute((name, value)): | 157 def stringify_attribute((name, value)): |
154 if name == "href": | 158 return '%s="%s"' % ( |
155 link_locale, link = self._params["source"].resolve_link(value, locale) | 159 escape(name), |
156 if link: | 160 escape(self.insert_localized_strings(value, {})) |
157 return 'href="%s" hreflang="%s"' % (escape(link), escape(link_locale)) | 161 ) |
158 return '%s="%s"' % (escape(name), escape(value)) | |
159 | 162 |
160 for tag in self.whitelist: | 163 for tag in self.whitelist: |
161 saved = saved_attributes.get(tag, []) | 164 saved = saved_attributes.get(tag, []) |
162 for attrs in saved: | 165 for attrs in saved: |
163 attrs = map(stringify_attribute, attrs) | 166 attrs = map(stringify_attribute, attrs) |
164 result = re.sub( | 167 result = re.sub( |
165 r"%s([^<>]*?)%s" % (re_escape("<%s>" % tag), re_escape("</%s>" % tag))
, | 168 r"%s([^<>]*?)%s" % (re_escape("<%s>" % tag), re_escape("</%s>" % tag))
, |
166 r'<%s %s>\1</%s>' % (tag, " ".join(attrs), tag), | 169 r'<%s%s>\1</%s>' % (tag, " " + " ".join(attrs) if attrs else "", tag), |
167 result, 1, flags=re.S | 170 result, 1, flags=re.S |
168 ) | 171 ) |
169 result = re.sub( | 172 result = re.sub( |
170 r"%s([^<>]*?)%s" % (re_escape("<%s>" % tag), re_escape("</%s>" % tag)), | 173 r"%s([^<>]*?)%s" % (re_escape("<%s>" % tag), re_escape("</%s>" % tag)), |
171 r"<%s>\1</%s>" % (tag, tag), | 174 r"<%s>\1</%s>" % (tag, tag), |
172 result, flags=re.S | 175 result, flags=re.S |
173 ) | 176 ) |
174 return result | 177 return result |
175 | 178 |
176 def insert_localized_strings(self, text, escapes, to_html=lambda s: s): | 179 def insert_localized_strings(self, text, escapes, to_html=lambda s: s): |
177 def lookup_string(match): | 180 def lookup_string(match): |
178 name, comment, default = match.groups() | 181 name, comment, default = match.groups() |
179 default = to_html(default).strip() | 182 default = to_html(default).strip() |
180 | 183 |
181 # Note: We currently ignore the comment, it is only relevant when | 184 # Note: We currently ignore the comment, it is only relevant when |
182 # generating the master translation. | 185 # generating the master translation. |
183 return self.localize_string(name, default, self._params["localedata"], esc
apes) | 186 return self.localize_string(name, default, self._params["localedata"], esc
apes) |
184 | 187 |
185 return re.sub( | 188 return re.sub( |
186 r"\{\{\s*([\w\-]+)(?:\[(.*?)\])?\s+(.*?)\}\}", | 189 r"{{\s*" |
| 190 r"([\w\-]+)" # String ID |
| 191 r"(?:\[(.*?)\])?" # Optional comment |
| 192 r"\s+" |
| 193 r"((?:(?!{{).|" # Translatable text |
| 194 r"{{(?:(?!}}).)*}}" # Nested translation |
| 195 r")*?)" |
| 196 r"}}", |
187 lookup_string, | 197 lookup_string, |
188 text, | 198 text, |
189 flags=re.S | 199 flags=re.S |
190 ) | 200 ) |
191 | 201 |
192 def process_links(self, text): | 202 def process_links(self, text): |
193 def process_link(match): | 203 def process_link(match): |
194 pre, attr, url, post = match.groups() | 204 pre, attr, url, post = match.groups() |
195 url = jinja2.Markup(url).unescape() | 205 url = jinja2.Markup(url).unescape() |
196 | 206 |
(...skipping 108 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
305 "translate": self.translate, | 315 "translate": self.translate, |
306 "linkify": self.linkify, | 316 "linkify": self.linkify, |
307 "toclist": self.toclist, | 317 "toclist": self.toclist, |
308 } | 318 } |
309 | 319 |
310 globals = { | 320 globals = { |
311 "get_string": self.get_string, | 321 "get_string": self.get_string, |
312 "get_page_content": self.get_page_content, | 322 "get_page_content": self.get_page_content, |
313 } | 323 } |
314 | 324 |
| 325 self._module_refs = [] |
315 for dirname, dictionary in [("filters", filters), ("globals", globals)]: | 326 for dirname, dictionary in [("filters", filters), ("globals", globals)]: |
316 for filename in self._params["source"].list_files(dirname): | 327 for filename in self._params["source"].list_files(dirname): |
317 root, ext = os.path.splitext(filename) | 328 root, ext = os.path.splitext(filename) |
318 if ext.lower() != ".py": | 329 if ext.lower() != ".py": |
319 continue | 330 continue |
320 | 331 |
321 path = "%s/%s" % (dirname, filename) | 332 path = "%s/%s" % (dirname, filename) |
322 code = self._params["source"].read_file(path) | 333 code = self._params["source"].read_file(path) |
323 module = imp.new_module(root.replace("/", ".")) | 334 module = imp.new_module(root.replace("/", ".")) |
324 exec code in module.__dict__ | 335 exec code in module.__dict__ |
325 | 336 |
326 name = os.path.basename(root) | 337 name = os.path.basename(root) |
327 if not hasattr(module, name): | 338 if not hasattr(module, name): |
328 raise Exception("Expected symbol %s not found in %s file %s" % (name,
dirname, filename)) | 339 raise Exception("Expected symbol %s not found in %s file %s" % (name,
dirname, filename)) |
329 dictionary[name] = getattr(module, name) | 340 dictionary[name] = getattr(module, name) |
330 | 341 |
331 # HACK: The module we created here can be garbage collected because it | 342 # HACK: The module we created here can be garbage collected because it |
332 # isn't added to sys.modules. If a function is called and its module is | 343 # isn't added to sys.modules. If a function is called and its module is |
333 # gone it might cause weird errors (imports and module variables | 344 # gone it might cause weird errors (imports and module variables |
334 # unavailable). We avoid this situation by explicitly referencing the | 345 # unavailable). We avoid this situation by keeping a reference. |
335 # module from the function so they can only be garbage collected | 346 self._module_refs.append(module) |
336 # together. | |
337 if callable(dictionary[name]): | |
338 dictionary[name].module_ref = module | |
339 | 347 |
340 self._env = jinja2.Environment(loader=self._SourceLoader(self._params["sourc
e"]), autoescape=True) | 348 self._env = jinja2.Environment(loader=self._SourceLoader(self._params["sourc
e"]), autoescape=True) |
341 self._env.filters.update(filters) | 349 self._env.filters.update(filters) |
342 self._env.globals.update(globals) | 350 self._env.globals.update(globals) |
343 | 351 |
344 def get_html(self, source): | 352 def get_html(self, source): |
345 template = self._env.from_string(source) | 353 template = self._env.from_string(source) |
346 module = template.make_module(self._params) | 354 module = template.make_module(self._params) |
347 for key, value in module.__dict__.iteritems(): | 355 for key, value in module.__dict__.iteritems(): |
348 if not key.startswith("_"): | 356 if not key.startswith("_"): |
349 self._params[key] = value | 357 self._params[key] = value |
350 return unicode(module) | 358 |
| 359 result = unicode(module) |
| 360 result = self.process_links(result) |
| 361 return result |
351 | 362 |
352 def translate(self, default, name, comment=None): | 363 def translate(self, default, name, comment=None): |
353 # Note: We currently ignore the comment, it is only relevant when | 364 # Note: We currently ignore the comment, it is only relevant when |
354 # generating the master translation. | 365 # generating the master translation. |
355 localedata = self._params["localedata"] | 366 localedata = self._params["localedata"] |
356 return jinja2.Markup(self.localize_string(name, default, localedata, html_es
capes)) | 367 return jinja2.Markup(self.localize_string(name, default, localedata, html_es
capes)) |
357 | 368 |
358 def get_string(self, name, page): | 369 def get_string(self, name, page): |
359 localedata = self._params["source"].read_locale(self._params["locale"], page
) | 370 localedata = self._params["source"].read_locale(self._params["locale"], page
) |
360 default = localedata[name] | 371 default = localedata[name] |
(...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
396 stack.pop() | 407 stack.pop() |
397 stack[-1]["subitems"].append(item) | 408 stack[-1]["subitems"].append(item) |
398 stack.append(item) | 409 stack.append(item) |
399 return structured | 410 return structured |
400 | 411 |
401 converters = { | 412 converters = { |
402 "html": RawConverter, | 413 "html": RawConverter, |
403 "md": MarkdownConverter, | 414 "md": MarkdownConverter, |
404 "tmpl": TemplateConverter, | 415 "tmpl": TemplateConverter, |
405 } | 416 } |
LEFT | RIGHT |