| Left: | ||
| Right: |
| LEFT | RIGHT |
|---|---|
| 1 # coding: utf-8 | 1 # coding: utf-8 |
| 2 | 2 |
| 3 # This file is part of the Adblock Plus web scripts, | 3 # This file is part of the Adblock Plus web scripts, |
| 4 # Copyright (C) 2006-2015 Eyeo GmbH | 4 # Copyright (C) 2006-2015 Eyeo GmbH |
| 5 # | 5 # |
| 6 # Adblock Plus is free software: you can redistribute it and/or modify | 6 # Adblock Plus is free software: you can redistribute it and/or modify |
| 7 # it under the terms of the GNU General Public License version 3 as | 7 # it under the terms of the GNU General Public License version 3 as |
| 8 # published by the Free Software Foundation. | 8 # published by the Free Software Foundation. |
| 9 # | 9 # |
| 10 # Adblock Plus is distributed in the hope that it will be useful, | 10 # Adblock Plus is distributed in the hope that it will be useful, |
| (...skipping 24 matching lines...) Expand all Loading... | |
| 35 markdown.util.isBlockLevel = isBlockLevel | 35 markdown.util.isBlockLevel = isBlockLevel |
| 36 | 36 |
| 37 html_escapes = { | 37 html_escapes = { |
| 38 "<": "<", | 38 "<": "<", |
| 39 ">": ">", | 39 ">": ">", |
| 40 "&": "&", | 40 "&": "&", |
| 41 "\"": """, | 41 "\"": """, |
| 42 "'": "'", | 42 "'": "'", |
| 43 } | 43 } |
| 44 | 44 |
| 45 class AttributeParser(HTMLParser.HTMLParser): | 45 class AttributeParser(HTMLParser.HTMLParser): |
|
Sebastian Noack
2015/03/12 20:33:46
That makes actually sense, using a proper parser i
Wladimir Palant
2015/03/12 20:57:02
I actually tested this - invalid HTML will be norm
| |
| 46 _string = None | 46 _string = None |
| 47 _attrs = None | 47 _attrs = None |
| 48 | 48 |
| 49 def __init__(self, whitelist): | 49 def __init__(self, whitelist): |
| 50 self._whitelist = whitelist | 50 self._whitelist = whitelist |
| 51 | 51 |
| 52 def parse(self, text, pagename): | 52 def parse(self, text, pagename): |
| 53 self.reset() | 53 self.reset() |
| 54 self._string = "" | 54 self._string = [] |
|
Sebastian Noack
2015/03/12 20:33:46
I'd rather use a list here, joining it when done.
Wladimir Palant
2015/03/12 20:57:02
Done.
| |
| 55 self._attrs = {} | 55 self._attrs = {} |
| 56 self._pagename = pagename | 56 self._pagename = pagename |
| 57 | 57 |
| 58 try: | 58 try: |
| 59 self.feed(text) | 59 self.feed(text) |
| 60 return self._string, self._attrs | 60 return "".join(self._string), self._attrs |
| 61 finally: | 61 finally: |
| 62 self._string = None | 62 self._string = None |
| 63 self._attrs = None | 63 self._attrs = None |
| 64 self._pagename = None | 64 self._pagename = None |
| 65 | 65 |
| 66 def handle_starttag(self, tag, attrs): | 66 def handle_starttag(self, tag, attrs): |
| 67 if tag not in self._whitelist: | 67 if tag not in self._whitelist: |
| 68 raise Exception("Unexpected HTML tag '%s' in localizable string on page %s " % (tag, self._pagename)) | 68 raise Exception("Unexpected HTML tag '%s' in localizable string on page %s " % (tag, self._pagename)) |
| 69 self._attrs.setdefault(tag, []).append(attrs) | 69 self._attrs.setdefault(tag, []).append(attrs) |
| 70 self._string += "<%s>" % tag | 70 self._string.append("<%s>" % tag) |
| 71 | 71 |
| 72 def handle_endtag(self, tag): | 72 def handle_endtag(self, tag): |
| 73 self._string += "</%s>" % tag | 73 self._string.append("</%s>" % tag) |
| 74 | 74 |
| 75 def handle_data(self, data): | 75 def handle_data(self, data): |
| 76 # Note: lack of escaping here is intentional. The result is a locale string, | 76 # Note: lack of escaping here is intentional. The result is a locale string, |
| 77 # HTML escaping is applied when this string is inserted into the document. | 77 # HTML escaping is applied when this string is inserted into the document. |
| 78 self._string += data | 78 self._string.append(data) |
| 79 | 79 |
| 80 def handle_entityref(self, name): | 80 def handle_entityref(self, name): |
| 81 self._string += self.unescape("&%s;" % name) | 81 self._string.append(self.unescape("&%s;" % name)) |
| 82 | 82 |
| 83 def handle_charref(self, name): | 83 def handle_charref(self, name): |
| 84 self._string += self.unescape("&#%s;" % name) | 84 self._string.append(self.unescape("&#%s;" % name)) |
| 85 | 85 |
| 86 class Converter: | 86 class Converter: |
| 87 whitelist = set(["a", "em", "strong"]) | 87 whitelist = set(["a", "em", "strong"]) |
| 88 | 88 |
| 89 def __init__(self, params, key="pagedata"): | 89 def __init__(self, params, key="pagedata"): |
| 90 self._params = params | 90 self._params = params |
| 91 self._key = key | 91 self._key = key |
| 92 self._attribute_parser = AttributeParser(self.whitelist) | 92 self._attribute_parser = AttributeParser(self.whitelist) |
| 93 | 93 |
| 94 # Read in any parameters specified at the beginning of the file | 94 # Read in any parameters specified at the beginning of the file |
| (...skipping 237 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 332 stack.pop() | 332 stack.pop() |
| 333 stack[-1]["subitems"].append(item) | 333 stack[-1]["subitems"].append(item) |
| 334 stack.append(item) | 334 stack.append(item) |
| 335 return structured | 335 return structured |
| 336 | 336 |
| 337 converters = { | 337 converters = { |
| 338 "raw": RawConverter, | 338 "raw": RawConverter, |
| 339 "md": MarkdownConverter, | 339 "md": MarkdownConverter, |
| 340 "tmpl": TemplateConverter, | 340 "tmpl": TemplateConverter, |
| 341 } | 341 } |
| LEFT | RIGHT |