OLD | NEW |
1 # coding: utf-8 | 1 # coding: utf-8 |
2 | 2 |
3 # This file is part of the Adblock Plus web scripts, | 3 # This file is part of the Adblock Plus web scripts, |
4 # Copyright (C) 2006-2015 Eyeo GmbH | 4 # Copyright (C) 2006-2015 Eyeo GmbH |
5 # | 5 # |
6 # Adblock Plus is free software: you can redistribute it and/or modify | 6 # Adblock Plus is free software: you can redistribute it and/or modify |
7 # it under the terms of the GNU General Public License version 3 as | 7 # it under the terms of the GNU General Public License version 3 as |
8 # published by the Free Software Foundation. | 8 # published by the Free Software Foundation. |
9 # | 9 # |
10 # Adblock Plus is distributed in the hope that it will be useful, | 10 # Adblock Plus is distributed in the hope that it will be useful, |
(...skipping 26 matching lines...) Expand all Loading... |
37 html_escapes = { | 37 html_escapes = { |
38 "<": "<", | 38 "<": "<", |
39 ">": ">", | 39 ">": ">", |
40 "&": "&", | 40 "&": "&", |
41 "\"": """, | 41 "\"": """, |
42 "'": "'", | 42 "'": "'", |
43 } | 43 } |
44 | 44 |
45 class AttributeParser(HTMLParser.HTMLParser): | 45 class AttributeParser(HTMLParser.HTMLParser): |
46 _string = None | 46 _string = None |
| 47 _inside_fixed = False |
| 48 _fixed_strings = None |
47 _attrs = None | 49 _attrs = None |
48 | 50 |
49 def __init__(self, whitelist): | 51 def __init__(self, whitelist): |
50 self._whitelist = whitelist | 52 self._whitelist = whitelist |
51 | 53 |
52 def parse(self, text, pagename): | 54 def parse(self, text, pagename): |
53 self.reset() | 55 self.reset() |
54 self._string = [] | 56 self._string = [] |
| 57 self._fixed_strings = [] |
| 58 self._inside_fixed = False |
55 self._attrs = {} | 59 self._attrs = {} |
56 self._pagename = pagename | 60 self._pagename = pagename |
57 | 61 |
58 try: | 62 try: |
59 self.feed(text) | 63 self.feed(text) |
60 return "".join(self._string), self._attrs | 64 return "".join(self._string), self._attrs, ["".join(s) for s in self._fixe
d_strings] |
61 finally: | 65 finally: |
62 self._string = None | 66 self._string = None |
63 self._attrs = None | 67 self._attrs = None |
64 self._pagename = None | 68 self._pagename = None |
| 69 self._inside_fixed = False |
| 70 self._fixed_strings = None |
65 | 71 |
66 def handle_starttag(self, tag, attrs): | 72 def handle_starttag(self, tag, attrs): |
67 if tag not in self._whitelist: | 73 if self._inside_fixed: |
| 74 raise Exception("Unexpected HTML tag '%s' inside a fixed string on page %s
" % (tag, self._pagename)) |
| 75 elif tag == "fix": |
| 76 self._inside_fixed = True |
| 77 self._fixed_strings.append([]) |
| 78 elif tag in self._whitelist: |
| 79 self._attrs.setdefault(tag, []).append(attrs) |
| 80 self._string.append("<%s>" % tag) |
| 81 else: |
68 raise Exception("Unexpected HTML tag '%s' in localizable string on page %s
" % (tag, self._pagename)) | 82 raise Exception("Unexpected HTML tag '%s' in localizable string on page %s
" % (tag, self._pagename)) |
69 self._attrs.setdefault(tag, []).append(attrs) | |
70 self._string.append("<%s>" % tag) | |
71 | 83 |
72 def handle_endtag(self, tag): | 84 def handle_endtag(self, tag): |
73 self._string.append("</%s>" % tag) | 85 if tag == "fix": |
| 86 self._string.append("{%d}" % len(self._fixed_strings)) |
| 87 self._inside_fixed = False |
| 88 else: |
| 89 self._string.append("</%s>" % tag) |
| 90 |
| 91 def _append_text(self, s): |
| 92 if self._inside_fixed: |
| 93 self._fixed_strings[-1].append(s) |
| 94 else: |
| 95 self._string.append(s) |
74 | 96 |
75 def handle_data(self, data): | 97 def handle_data(self, data): |
76 # Note: lack of escaping here is intentional. The result is a locale string, | 98 # Note: lack of escaping here is intentional. The result is a locale string, |
77 # HTML escaping is applied when this string is inserted into the document. | 99 # HTML escaping is applied when this string is inserted into the document. |
78 self._string.append(data) | 100 self._append_text(data) |
79 | 101 |
80 def handle_entityref(self, name): | 102 def handle_entityref(self, name): |
81 self._string.append(self.unescape("&%s;" % name)) | 103 self._append_text(self.unescape("&%s;" % name)) |
82 | 104 |
83 def handle_charref(self, name): | 105 def handle_charref(self, name): |
84 self._string.append(self.unescape("&#%s;" % name)) | 106 self._append_text(self.unescape("&#%s;" % name)) |
85 | 107 |
86 class Converter: | 108 class Converter: |
87 whitelist = set(["a", "em", "strong"]) | 109 whitelist = set(["a", "em", "strong"]) |
88 | 110 |
89 def __init__(self, params, key="pagedata"): | 111 def __init__(self, params, key="pagedata"): |
90 self._params = params | 112 self._params = params |
91 self._key = key | 113 self._key = key |
92 self._attribute_parser = AttributeParser(self.whitelist) | 114 self._attribute_parser = AttributeParser(self.whitelist) |
93 | 115 |
94 # Read in any parameters specified at the beginning of the file | 116 # Read in any parameters specified at the beginning of the file |
95 lines = params[key].splitlines(True) | 117 lines = params[key].splitlines(True) |
96 while lines and re.search(r"^\s*[\w\-]+\s*=", lines[0]): | 118 while lines and re.search(r"^\s*[\w\-]+\s*=", lines[0]): |
97 name, value = lines.pop(0).split("=", 1) | 119 name, value = lines.pop(0).split("=", 1) |
98 params[name.strip()] = value.strip() | 120 params[name.strip()] = value.strip() |
99 params[key] = "".join(lines) | 121 params[key] = "".join(lines) |
100 | 122 |
101 def localize_string(self, name, default, localedata, escapes): | 123 def localize_string(self, name, default, localedata, escapes): |
102 def escape(s): | 124 def escape(s): |
103 return re.sub(r".", | 125 return re.sub(r".", |
104 lambda match: escapes.get(match.group(0), match.group(0)), | 126 lambda match: escapes.get(match.group(0), match.group(0)), |
105 s, flags=re.S) | 127 s, flags=re.S) |
106 def re_escape(s): | 128 def re_escape(s): |
107 return re.escape(escape(s)) | 129 return re.escape(escape(s)) |
108 | 130 |
109 # Extract tag attributes from default string | 131 # Extract tag attributes from default string |
110 default, saved_attributes = self._attribute_parser.parse(default, self._para
ms["page"]) | 132 default, saved_attributes, fixed_strings = self._attribute_parser.parse(defa
ult, self._params["page"]) |
111 | 133 |
112 # Get translation | 134 # Get translation |
113 if self._params["locale"] != self._params["defaultlocale"] and name in local
edata: | 135 if self._params["locale"] != self._params["defaultlocale"] and name in local
edata: |
114 result = localedata[name].strip() | 136 result = localedata[name].strip() |
115 else: | 137 else: |
116 result = default | 138 result = default |
117 | 139 |
| 140 # Insert fixed strings |
| 141 for i in range(len(fixed_strings)): |
| 142 result = re.sub(r"\{%d\}" % (i + 1), fixed_strings[i], result, 1) |
| 143 |
118 # Insert attributes | 144 # Insert attributes |
119 result = escape(result) | 145 result = escape(result) |
120 for tag in self.whitelist: | 146 for tag in self.whitelist: |
121 saved = saved_attributes.get(tag, []) | 147 saved = saved_attributes.get(tag, []) |
122 for attrs in saved: | 148 for attrs in saved: |
123 attrs = map(lambda (name, value): '%s="%s"' % (escape(name), escape(valu
e)), attrs) | 149 attrs = map(lambda (name, value): '%s="%s"' % (escape(name), escape(valu
e)), attrs) |
124 result = re.sub( | 150 result = re.sub( |
125 r"%s([^<>]*?)%s" % (re_escape("<%s>" % tag), re_escape("</%s>" % tag))
, | 151 r"%s([^<>]*?)%s" % (re_escape("<%s>" % tag), re_escape("</%s>" % tag))
, |
126 r'<%s %s>\1</%s>' % (tag, " ".join(attrs), tag), | 152 r'<%s %s>\1</%s>' % (tag, " ".join(attrs), tag), |
127 result, 1, flags=re.S | 153 result, 1, flags=re.S |
(...skipping 217 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
345 stack.pop() | 371 stack.pop() |
346 stack[-1]["subitems"].append(item) | 372 stack[-1]["subitems"].append(item) |
347 stack.append(item) | 373 stack.append(item) |
348 return structured | 374 return structured |
349 | 375 |
350 converters = { | 376 converters = { |
351 "html": RawConverter, | 377 "html": RawConverter, |
352 "md": MarkdownConverter, | 378 "md": MarkdownConverter, |
353 "tmpl": TemplateConverter, | 379 "tmpl": TemplateConverter, |
354 } | 380 } |
OLD | NEW |