Left: | ||
Right: |
LEFT | RIGHT |
---|---|
1 # coding: utf-8 | 1 # coding: utf-8 |
2 | 2 |
3 # This file is part of the Adblock Plus web scripts, | 3 # This file is part of the Adblock Plus web scripts, |
4 # Copyright (C) 2006-2013 Eyeo GmbH | 4 # Copyright (C) 2006-2013 Eyeo GmbH |
5 # | 5 # |
6 # Adblock Plus is free software: you can redistribute it and/or modify | 6 # Adblock Plus is free software: you can redistribute it and/or modify |
7 # it under the terms of the GNU General Public License version 3 as | 7 # it under the terms of the GNU General Public License version 3 as |
8 # published by the Free Software Foundation. | 8 # published by the Free Software Foundation. |
9 # | 9 # |
10 # Adblock Plus is distributed in the hope that it will be useful, | 10 # Adblock Plus is distributed in the hope that it will be useful, |
(...skipping 97 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
108 url = new_url | 108 url = new_url |
109 if attr == "href": | 109 if attr == "href": |
110 post += ' hreflang="%s"' % jinja2.Markup.escape(locale) | 110 post += ' hreflang="%s"' % jinja2.Markup.escape(locale) |
111 | 111 |
112 return "".join((pre, jinja2.Markup.escape(url), post)) | 112 return "".join((pre, jinja2.Markup.escape(url), post)) |
113 | 113 |
114 text = re.sub(r"(<a\s[^<>]*\b(href)=\")([^<>\"]+)(\")", process_link, text) | 114 text = re.sub(r"(<a\s[^<>]*\b(href)=\")([^<>\"]+)(\")", process_link, text) |
115 text = re.sub(r"(<img\s[^<>]*\b(src)=\")([^<>\"]+)(\")", process_link, text) | 115 text = re.sub(r"(<img\s[^<>]*\b(src)=\")([^<>\"]+)(\")", process_link, text) |
116 return text | 116 return text |
117 | 117 |
118 include_start_regex = '<' | |
119 include_end_regex = '>' | |
120 | |
118 def resolve_includes(self, text): | 121 def resolve_includes(self, text): |
119 def resolve_include(match): | 122 def resolve_include(match): |
120 global converters | 123 global converters |
121 name = match.group(1) | 124 name = match.group(1) |
122 for format, converter_class in converters.iteritems(): | 125 for format, converter_class in converters.iteritems(): |
123 if self._params["source"].has_include(name, format): | 126 if self._params["source"].has_include(name, format): |
124 self._params["includedata"] = self._params["source"].read_include(name , format) | 127 self._params["includedata"] = self._params["source"].read_include(name , format) |
125 converter = converter_class(self._params, key="includedata") | 128 converter = converter_class(self._params, key="includedata") |
126 return converter() | 129 return converter() |
127 raise Exception("Failed to resolve include %s in page %s" % (name, self._p arams["page"])) | 130 raise Exception("Failed to resolve include %s in page %s" % (name, self._p arams["page"])) |
128 | 131 |
129 return re.sub(r'(?:<|\<\;)\?\s*include\s+([^\s<>"]+)\s*\?(?:>|\>\;)', re solve_include, text) | 132 return re.sub( |
Wladimir Palant
2013/12/12 12:51:06
Nope, we really don't want to do this in raw HTML
Sebastian Noack
2013/12/12 13:54:39
I agree that we should do that only for markdown.
Wladimir Palant
2013/12/13 14:01:45
The other regexp applied to processing instruction
| |
133 r'%s\?\s*include\s+([^\s<>"]+)\s*\?%s' % ( | |
134 self.include_start_regex, | |
135 self.include_end_regex | |
136 ), | |
137 resolve_include, | |
138 text | |
139 ) | |
130 | 140 |
131 def __call__(self): | 141 def __call__(self): |
132 result = self.get_html(self._params[self._key]) | 142 result = self.get_html(self._params[self._key]) |
133 result = self.resolve_includes(result) | 143 result = self.resolve_includes(result) |
134 if self._key == "pagedata": | 144 if self._key == "pagedata": |
135 head = [] | 145 head = [] |
136 def add_to_head(match): | 146 def add_to_head(match): |
137 head.append(match.group(1)) | 147 head.append(match.group(1)) |
138 return "" | 148 return "" |
139 body = re.sub(r"<head>(.*?)</head>", add_to_head, result, flags=re.S) | 149 body = re.sub(r"<head>(.*?)</head>", add_to_head, result, flags=re.S) |
140 return "".join(head), body | 150 return "".join(head), body |
141 else: | 151 else: |
142 return result | 152 return result |
143 | 153 |
144 class RawConverter(Converter): | 154 class RawConverter(Converter): |
145 def get_html(self, source): | 155 def get_html(self, source): |
146 result = self.insert_localized_strings(source, html_escapes) | 156 result = self.insert_localized_strings(source, html_escapes) |
147 result = self.process_links(result) | 157 result = self.process_links(result) |
148 return result | 158 return result |
149 | 159 |
150 class MarkdownConverter(Converter): | 160 class MarkdownConverter(Converter): |
161 include_start_regex = r'(?:%s|%s)' % ( | |
162 Converter.include_start_regex, | |
163 re.escape(jinja2.escape(Converter.include_start_regex)) | |
164 ) | |
165 include_end_regex = r'(?:%s|%s)' % ( | |
166 Converter.include_end_regex, | |
167 re.escape(jinja2.escape(Converter.include_end_regex)) | |
168 ) | |
169 | |
151 def get_html(self, source): | 170 def get_html(self, source): |
152 def remove_unnecessary_entities(match): | 171 def remove_unnecessary_entities(match): |
153 char = unichr(int(match.group(1))) | 172 char = unichr(int(match.group(1))) |
154 if char in html_escapes: | 173 if char in html_escapes: |
155 return match.group(0) | 174 return match.group(0) |
156 else: | 175 else: |
157 return char | 176 return char |
158 | 177 |
159 escapes = {} | 178 escapes = {} |
160 for char in markdown.Markdown.ESCAPED_CHARS: | 179 for char in markdown.Markdown.ESCAPED_CHARS: |
(...skipping 86 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
247 stack.pop() | 266 stack.pop() |
248 stack[-1]["subitems"].append(item) | 267 stack[-1]["subitems"].append(item) |
249 stack.append(item) | 268 stack.append(item) |
250 return structured | 269 return structured |
251 | 270 |
252 converters = { | 271 converters = { |
253 "raw": RawConverter, | 272 "raw": RawConverter, |
254 "md": MarkdownConverter, | 273 "md": MarkdownConverter, |
255 "tmpl": TemplateConverter, | 274 "tmpl": TemplateConverter, |
256 } | 275 } |
LEFT | RIGHT |