| Left: | ||
| Right: |
| OLD | NEW |
|---|---|
| 1 # coding: utf-8 | 1 # coding: utf-8 |
| 2 | 2 |
| 3 # This file is part of the Adblock Plus web scripts, | 3 # This file is part of the Adblock Plus web scripts, |
| 4 # Copyright (C) 2006-2015 Eyeo GmbH | 4 # Copyright (C) 2006-2015 Eyeo GmbH |
| 5 # | 5 # |
| 6 # Adblock Plus is free software: you can redistribute it and/or modify | 6 # Adblock Plus is free software: you can redistribute it and/or modify |
| 7 # it under the terms of the GNU General Public License version 3 as | 7 # it under the terms of the GNU General Public License version 3 as |
| 8 # published by the Free Software Foundation. | 8 # published by the Free Software Foundation. |
| 9 # | 9 # |
| 10 # Adblock Plus is distributed in the hope that it will be useful, | 10 # Adblock Plus is distributed in the hope that it will be useful, |
| (...skipping 26 matching lines...) Expand all Loading... | |
| 37 html_escapes = { | 37 html_escapes = { |
| 38 "<": "<", | 38 "<": "<", |
| 39 ">": ">", | 39 ">": ">", |
| 40 "&": "&", | 40 "&": "&", |
| 41 "\"": """, | 41 "\"": """, |
| 42 "'": "'", | 42 "'": "'", |
| 43 } | 43 } |
| 44 | 44 |
| 45 class AttributeParser(HTMLParser.HTMLParser): | 45 class AttributeParser(HTMLParser.HTMLParser): |
| 46 _string = None | 46 _string = None |
| 47 _inside_fixed = False | |
| 48 _fixed_strings = None | |
| 47 _attrs = None | 49 _attrs = None |
| 48 | 50 |
| 49 def __init__(self, whitelist): | 51 def __init__(self, whitelist): |
| 50 self._whitelist = whitelist | 52 self._whitelist = whitelist |
| 51 | 53 |
| 52 def parse(self, text, pagename): | 54 def parse(self, text, pagename): |
| 53 self.reset() | 55 self.reset() |
| 54 self._string = [] | 56 self._string = [] |
| 57 self._fixed_strings = [] | |
| 58 self._inside_fixed = False | |
| 55 self._attrs = {} | 59 self._attrs = {} |
| 56 self._pagename = pagename | 60 self._pagename = pagename |
| 57 | 61 |
| 58 try: | 62 try: |
| 59 self.feed(text) | 63 self.feed(text) |
| 60 return "".join(self._string), self._attrs | 64 return "".join(self._string), self._attrs, map(lambda s: "".join(s), self. _fixed_strings) |
|
Wladimir Palant
2015/03/26 20:48:56
Nit: ["".join(s) for s in self._fixed_strings] is
kzar
2015/03/26 22:11:32
Done.
| |
| 61 finally: | 65 finally: |
| 62 self._string = None | 66 self._string = None |
| 63 self._attrs = None | 67 self._attrs = None |
| 64 self._pagename = None | 68 self._pagename = None |
| 69 self._inside_fixed = False | |
| 70 self._fixed_strings = None | |
| 65 | 71 |
| 66 def handle_starttag(self, tag, attrs): | 72 def handle_starttag(self, tag, attrs): |
| 67 if tag not in self._whitelist: | 73 if tag not in self._whitelist: |
|
Wladimir Palant
2015/03/26 20:48:56
No tags should be allowed inside <fix> - meaning |
kzar
2015/03/26 22:11:32
Done.
| |
| 68 raise Exception("Unexpected HTML tag '%s' in localizable string on page %s " % (tag, self._pagename)) | 74 raise Exception("Unexpected HTML tag '%s' in localizable string on page %s " % (tag, self._pagename)) |
| 69 self._attrs.setdefault(tag, []).append(attrs) | 75 if tag == "fix": |
| 70 self._string.append("<%s>" % tag) | 76 self._inside_fixed = True |
| 77 self._fixed_strings.append([]) | |
| 78 else: | |
| 79 self._attrs.setdefault(tag, []).append(attrs) | |
| 80 self._string.append("<%s>" % tag) | |
| 71 | 81 |
| 72 def handle_endtag(self, tag): | 82 def handle_endtag(self, tag): |
| 73 self._string.append("</%s>" % tag) | 83 if tag == "fix": |
| 84 self._string.append("{%d}" % len(self._fixed_strings)) | |
| 85 self._inside_fixed = False | |
| 86 else: | |
| 87 self._string.append("</%s>" % tag) | |
| 88 | |
| 89 def _append_string(self, s): | |
|
Wladimir Palant
2015/03/26 20:48:56
Nit: we are adding to self._string a lot, this hel
kzar
2015/03/26 22:11:32
Done.
| |
| 90 if self._inside_fixed: | |
| 91 self._fixed_strings[-1].append(s) | |
| 92 else: | |
| 93 self._string.append(s) | |
| 74 | 94 |
| 75 def handle_data(self, data): | 95 def handle_data(self, data): |
| 76 # Note: lack of escaping here is intentional. The result is a locale string, | 96 # Note: lack of escaping here is intentional. The result is a locale string, |
| 77 # HTML escaping is applied when this string is inserted into the document. | 97 # HTML escaping is applied when this string is inserted into the document. |
| 78 self._string.append(data) | 98 self._append_string(data) |
| 79 | 99 |
| 80 def handle_entityref(self, name): | 100 def handle_entityref(self, name): |
| 81 self._string.append(self.unescape("&%s;" % name)) | 101 self._append_string(self.unescape("&%s;" % name)) |
| 82 | 102 |
| 83 def handle_charref(self, name): | 103 def handle_charref(self, name): |
| 84 self._string.append(self.unescape("&#%s;" % name)) | 104 self._append_string(self.unescape("&#%s;" % name)) |
| 85 | 105 |
| 86 class Converter: | 106 class Converter: |
| 87 whitelist = set(["a", "em", "strong"]) | 107 whitelist = set(["a", "em", "fix", "strong"]) |
|
Wladimir Palant
2015/03/26 20:48:56
It makes no sense to change this set - <fix> isn't
kzar
2015/03/26 22:11:32
Done.
| |
| 88 | 108 |
| 89 def __init__(self, params, key="pagedata"): | 109 def __init__(self, params, key="pagedata"): |
| 90 self._params = params | 110 self._params = params |
| 91 self._key = key | 111 self._key = key |
| 92 self._attribute_parser = AttributeParser(self.whitelist) | 112 self._attribute_parser = AttributeParser(self.whitelist) |
| 93 | 113 |
| 94 # Read in any parameters specified at the beginning of the file | 114 # Read in any parameters specified at the beginning of the file |
| 95 lines = params[key].splitlines(True) | 115 lines = params[key].splitlines(True) |
| 96 while lines and re.search(r"^\s*[\w\-]+\s*=", lines[0]): | 116 while lines and re.search(r"^\s*[\w\-]+\s*=", lines[0]): |
| 97 name, value = lines.pop(0).split("=", 1) | 117 name, value = lines.pop(0).split("=", 1) |
| 98 params[name.strip()] = value.strip() | 118 params[name.strip()] = value.strip() |
| 99 params[key] = "".join(lines) | 119 params[key] = "".join(lines) |
| 100 | 120 |
| 101 def localize_string(self, name, default, localedata, escapes): | 121 def localize_string(self, name, default, localedata, escapes): |
| 102 def escape(s): | 122 def escape(s): |
| 103 return re.sub(r".", | 123 return re.sub(r".", |
| 104 lambda match: escapes.get(match.group(0), match.group(0)), | 124 lambda match: escapes.get(match.group(0), match.group(0)), |
| 105 s, flags=re.S) | 125 s, flags=re.S) |
| 106 def re_escape(s): | 126 def re_escape(s): |
| 107 return re.escape(escape(s)) | 127 return re.escape(escape(s)) |
| 108 | 128 |
| 109 # Extract tag attributes from default string | 129 # Extract tag attributes from default string |
| 110 default, saved_attributes = self._attribute_parser.parse(default, self._para ms["page"]) | 130 default, saved_attributes, fixed_strings = self._attribute_parser.parse(defa ult, self._params["page"]) |
| 111 | 131 |
| 112 # Get translation | 132 # Get translation |
| 113 if self._params["locale"] != self._params["defaultlocale"] and name in local edata: | 133 if self._params["locale"] != self._params["defaultlocale"] and name in local edata: |
| 114 result = localedata[name].strip() | 134 result = localedata[name].strip() |
| 115 else: | 135 else: |
| 116 result = default | 136 result = default |
| 117 | 137 |
| 138 # Insert fixed strings | |
| 139 for i in range(len(fixed_strings)): | |
| 140 result = re.sub(re_escape("{%d}" % (i + 1)), fixed_strings[i], result, 1) | |
|
Wladimir Palant
2015/03/26 20:48:56
Escape the string being inserted? It looks like cu
kzar
2015/03/26 22:11:32
I originally did escape fixed_strings[i] here but
Wladimir Palant
2015/03/27 07:29:48
I see, you are inserting them before the escaping
| |
| 141 | |
| 118 # Insert attributes | 142 # Insert attributes |
| 119 result = escape(result) | 143 result = escape(result) |
| 120 for tag in self.whitelist: | 144 for tag in self.whitelist: |
| 121 saved = saved_attributes.get(tag, []) | 145 saved = saved_attributes.get(tag, []) |
| 122 for attrs in saved: | 146 for attrs in saved: |
| 123 attrs = map(lambda (name, value): '%s="%s"' % (escape(name), escape(valu e)), attrs) | 147 attrs = map(lambda (name, value): '%s="%s"' % (escape(name), escape(valu e)), attrs) |
| 124 result = re.sub( | 148 result = re.sub( |
| 125 r"%s([^<>]*?)%s" % (re_escape("<%s>" % tag), re_escape("</%s>" % tag)) , | 149 r"%s([^<>]*?)%s" % (re_escape("<%s>" % tag), re_escape("</%s>" % tag)) , |
| 126 r'<%s %s>\1</%s>' % (tag, " ".join(attrs), tag), | 150 r'<%s %s>\1</%s>' % (tag, " ".join(attrs), tag), |
| 127 result, 1, flags=re.S | 151 result, 1, flags=re.S |
| (...skipping 217 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 345 stack.pop() | 369 stack.pop() |
| 346 stack[-1]["subitems"].append(item) | 370 stack[-1]["subitems"].append(item) |
| 347 stack.append(item) | 371 stack.append(item) |
| 348 return structured | 372 return structured |
| 349 | 373 |
| 350 converters = { | 374 converters = { |
| 351 "html": RawConverter, | 375 "html": RawConverter, |
| 352 "md": MarkdownConverter, | 376 "md": MarkdownConverter, |
| 353 "tmpl": TemplateConverter, | 377 "tmpl": TemplateConverter, |
| 354 } | 378 } |
| OLD | NEW |