OLD | NEW |
1 # This file is part of the Adblock Plus web scripts, | 1 # This file is part of the Adblock Plus web scripts, |
2 # Copyright (C) 2006-present eyeo GmbH | 2 # Copyright (C) 2006-present eyeo GmbH |
3 # | 3 # |
4 # Adblock Plus is free software: you can redistribute it and/or modify | 4 # Adblock Plus is free software: you can redistribute it and/or modify |
5 # it under the terms of the GNU General Public License version 3 as | 5 # it under the terms of the GNU General Public License version 3 as |
6 # published by the Free Software Foundation. | 6 # published by the Free Software Foundation. |
7 # | 7 # |
8 # Adblock Plus is distributed in the hope that it will be useful, | 8 # Adblock Plus is distributed in the hope that it will be useful, |
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of | 9 # but WITHOUT ANY WARRANTY; without even the implied warranty of |
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | 10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
(...skipping 166 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
177 # Insert fixed strings | 177 # Insert fixed strings |
178 for i, fixed_string in enumerate(fixed_strings, 1): | 178 for i, fixed_string in enumerate(fixed_strings, 1): |
179 result = result.replace('{{{}}}'.format(i), fixed_string) | 179 result = result.replace('{{{}}}'.format(i), fixed_string) |
180 | 180 |
181 # Insert attributes | 181 # Insert attributes |
182 result = escape(result) | 182 result = escape(result) |
183 | 183 |
184 def stringify_attribute(name, value): | 184 def stringify_attribute(name, value): |
185 return '{}="{}"'.format( | 185 return '{}="{}"'.format( |
186 escape(name), | 186 escape(name), |
187 escape(self.insert_localized_strings(value, {})) | 187 escape(self.insert_localized_strings(value, {})), |
188 ) | 188 ) |
189 | 189 |
190 for tag in self.whitelist: | 190 for tag in self.whitelist: |
191 allowed_contents = '(?:[^<>]|{})'.format('|'.join( | 191 allowed_contents = '(?:[^<>]|{})'.format('|'.join( |
192 '<(?:{}[^<>]*?|/{})>'.format(t, t) | 192 '<(?:{}[^<>]*?|/{})>'.format(t, t) |
193 for t in map(re.escape, self.whitelist - {tag}) | 193 for t in map(re.escape, self.whitelist - {tag}) |
194 )) | 194 )) |
195 saved = saved_attributes.get(tag, []) | 195 saved = saved_attributes.get(tag, []) |
196 for attrs in saved: | 196 for attrs in saved: |
197 attrs = [stringify_attribute(*attr) for attr in attrs] | 197 attrs = [stringify_attribute(*attr) for attr in attrs] |
198 result = re.sub( | 198 result = re.sub( |
199 r'{}({}*?){}'.format(re_escape('<{}>'.format(tag)), | 199 r'{}({}*?){}'.format(re_escape('<{}>'.format(tag)), |
200 allowed_contents, | 200 allowed_contents, |
201 re_escape('</{}>'.format(tag))), | 201 re_escape('</{}>'.format(tag))), |
202 lambda match: r'<{}{}>{}</{}>'.format( | 202 lambda match: r'<{}{}>{}</{}>'.format( |
203 tag, | 203 tag, |
204 ' ' + ' '.join(attrs) if attrs else '', | 204 ' ' + ' '.join(attrs) if attrs else '', |
205 match.group(1), | 205 match.group(1), |
206 tag | 206 tag, |
207 ), | 207 ), |
208 result, 1, flags=re.S | 208 result, 1, flags=re.S, |
209 ) | 209 ) |
210 result = re.sub( | 210 result = re.sub( |
211 r'{}({}*?){}'.format(re_escape('<{}>'.format(tag)), | 211 r'{}({}*?){}'.format(re_escape('<{}>'.format(tag)), |
212 allowed_contents, | 212 allowed_contents, |
213 re_escape('</{}>'.format(tag))), | 213 re_escape('</{}>'.format(tag))), |
214 r'<{}>\1</{}>'.format(tag, tag), | 214 r'<{}>\1</{}>'.format(tag, tag), |
215 result, flags=re.S | 215 result, flags=re.S, |
216 ) | 216 ) |
217 return result | 217 return result |
218 | 218 |
219 def insert_localized_strings(self, text, escapes, to_html=lambda s: s): | 219 def insert_localized_strings(self, text, escapes, to_html=lambda s: s): |
220 def lookup_string(match): | 220 def lookup_string(match): |
221 name, comment, default = match.groups() | 221 name, comment, default = match.groups() |
222 if default: | 222 if default: |
223 default = to_html(default).strip() | 223 default = to_html(default).strip() |
224 return self.localize_string(self._params['page'], name, default, | 224 return self.localize_string(self._params['page'], name, default, |
225 comment, self._params['localedata'], | 225 comment, self._params['localedata'], |
226 escapes) | 226 escapes) |
227 | 227 |
228 return re.sub( | 228 return re.sub( |
229 r'{{\s*' | 229 r'{{\s*' |
230 r'([\w\-]+)' # String ID | 230 r'([\w\-]+)' # String ID |
231 r'(?:(?:\[(.*?)\])?' # Optional comment | 231 r'(?:(?:\[(.*?)\])?' # Optional comment |
232 r'\s+' | 232 r'\s+' |
233 r'((?:(?!{{).|' # Translatable text | 233 r'((?:(?!{{).|' # Translatable text |
234 r'{{(?:(?!}}).)*}}' # Nested translation | 234 r'{{(?:(?!}}).)*}}' # Nested translation |
235 r')*?)' | 235 r')*?)' |
236 r')?' | 236 r')?' |
237 r'}}', | 237 r'}}', |
238 lookup_string, | 238 lookup_string, |
239 text, | 239 text, |
240 flags=re.S | 240 flags=re.S, |
241 ) | 241 ) |
242 | 242 |
243 def process_links(self, text): | 243 def process_links(self, text): |
244 def process_link(match): | 244 def process_link(match): |
245 pre, attr, url, post = match.groups() | 245 pre, attr, url, post = match.groups() |
246 url = jinja2.Markup(url).unescape() | 246 url = jinja2.Markup(url).unescape() |
247 | 247 |
248 locale, new_url = ( | 248 locale, new_url = ( |
249 self._params['source'] | 249 self._params['source'] |
250 .resolve_link(url, self._params['locale'])) | 250 .resolve_link(url, self._params['locale'])) |
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
284 result = converter() | 284 result = converter() |
285 self.missing_translations += converter.missing_translations | 285 self.missing_translations += converter.missing_translations |
286 self.total_translations += converter.total_translations | 286 self.total_translations += converter.total_translations |
287 return result | 287 return result |
288 raise Exception('Failed to resolve include {}' | 288 raise Exception('Failed to resolve include {}' |
289 ' on page {}'.format(name, self._params['page'])) | 289 ' on page {}'.format(name, self._params['page'])) |
290 | 290 |
291 return re.sub( | 291 return re.sub( |
292 r'{}\?\s*include\s+([^\s<>"]+)\s*\?{}'.format( | 292 r'{}\?\s*include\s+([^\s<>"]+)\s*\?{}'.format( |
293 self.include_start_regex, | 293 self.include_start_regex, |
294 self.include_end_regex | 294 self.include_end_regex, |
295 ), | 295 ), |
296 resolve_include, | 296 resolve_include, |
297 text | 297 text, |
298 ) | 298 ) |
299 | 299 |
300 def __call__(self): | 300 def __call__(self): |
301 result = self.get_html(self._data, self._filename) | 301 result = self.get_html(self._data, self._filename) |
302 return self.resolve_includes(result) | 302 return self.resolve_includes(result) |
303 | 303 |
304 | 304 |
305 class RawConverter(Converter): | 305 class RawConverter(Converter): |
306 def get_html(self, source, filename): | 306 def get_html(self, source, filename): |
307 result = self.insert_localized_strings(source, html_escapes) | 307 result = self.insert_localized_strings(source, html_escapes) |
308 result = self.process_links(result) | 308 result = self.process_links(result) |
309 return result | 309 return result |
310 | 310 |
311 | 311 |
312 class MarkdownConverter(Converter): | 312 class MarkdownConverter(Converter): |
313 include_start_regex = r'(?:{}|{})'.format( | 313 include_start_regex = r'(?:{}|{})'.format( |
314 Converter.include_start_regex, | 314 Converter.include_start_regex, |
315 re.escape(jinja2.escape(Converter.include_start_regex)) | 315 re.escape(jinja2.escape(Converter.include_start_regex)), |
316 ) | 316 ) |
317 include_end_regex = r'(?:{}|{})'.format( | 317 include_end_regex = r'(?:{}|{})'.format( |
318 Converter.include_end_regex, | 318 Converter.include_end_regex, |
319 re.escape(jinja2.escape(Converter.include_end_regex)) | 319 re.escape(jinja2.escape(Converter.include_end_regex)), |
320 ) | 320 ) |
321 | 321 |
322 def get_html(self, source, filename): | 322 def get_html(self, source, filename): |
323 def remove_unnecessary_entities(match): | 323 def remove_unnecessary_entities(match): |
324 char = unichr(int(match.group(1))) | 324 char = unichr(int(match.group(1))) |
325 if char in html_escapes: | 325 if char in html_escapes: |
326 return match.group(0) | 326 return match.group(0) |
327 return char | 327 return char |
328 | 328 |
329 escapes = {} | 329 escapes = {} |
(...skipping 85 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
415 if not key.startswith('_'): | 415 if not key.startswith('_'): |
416 self._params[key] = value | 416 self._params[key] = value |
417 | 417 |
418 result = unicode(module) | 418 result = unicode(module) |
419 result = self.process_links(result) | 419 result = self.process_links(result) |
420 return result | 420 return result |
421 | 421 |
422 def translate(self, default, name, comment=None): | 422 def translate(self, default, name, comment=None): |
423 return jinja2.Markup(self.localize_string( | 423 return jinja2.Markup(self.localize_string( |
424 self._params['page'], name, default, comment, | 424 self._params['page'], name, default, comment, |
425 self._params['localedata'], html_escapes | 425 self._params['localedata'], html_escapes, |
426 )) | 426 )) |
427 | 427 |
428 def get_string(self, name, page=None): | 428 def get_string(self, name, page=None): |
429 if page is None: | 429 if page is None: |
430 page = self._params['page'] | 430 page = self._params['page'] |
431 | 431 |
432 localedata = self._get_locale_data(page) | 432 localedata = self._get_locale_data(page) |
433 default = localedata[name] | 433 default = localedata[name] |
434 return jinja2.Markup(self.localize_string( | 434 return jinja2.Markup(self.localize_string( |
435 page, name, default, '', localedata, html_escapes | 435 page, name, default, '', localedata, html_escapes, |
436 )) | 436 )) |
437 | 437 |
438 def has_string(self, name, page=None): | 438 def has_string(self, name, page=None): |
439 if page is None: | 439 if page is None: |
440 page = self._params['page'] | 440 page = self._params['page'] |
441 | 441 |
442 localedata = self._get_locale_data(page) | 442 localedata = self._get_locale_data(page) |
443 return name in localedata | 443 return name in localedata |
444 | 444 |
445 def get_page_content(self, page, locale=None): | 445 def get_page_content(self, page, locale=None): |
446 if locale is None: | 446 if locale is None: |
447 locale = self._params['locale'] | 447 locale = self._params['locale'] |
448 return utils.get_page_params(self._params['source'], locale, page) | 448 return utils.get_page_params(self._params['source'], locale, page) |
449 | 449 |
450 def linkify(self, page, locale=None, **attrs): | 450 def linkify(self, page, locale=None, **attrs): |
451 if locale is None: | 451 if locale is None: |
452 locale = self._params['locale'] | 452 locale = self._params['locale'] |
453 | 453 |
454 locale, url = self._params['source'].resolve_link(page, locale) | 454 locale, url = self._params['source'].resolve_link(page, locale) |
455 return jinja2.Markup('<a{}>'.format(''.join( | 455 return jinja2.Markup('<a{}>'.format(''.join( |
456 ' {}="{}"'.format(name, jinja2.escape(value)) for name, value in [ | 456 ' {}="{}"'.format(name, jinja2.escape(value)) for name, value in [ |
457 ('href', url), | 457 ('href', url), |
458 ('hreflang', locale) | 458 ('hreflang', locale), |
459 ] + attrs.items() | 459 ] + attrs.items() |
460 ))) | 460 ))) |
461 | 461 |
462 def get_pages_metadata(self, filters=None): | 462 def get_pages_metadata(self, filters=None): |
463 if filters is not None and not isinstance(filters, dict): | 463 if filters is not None and not isinstance(filters, dict): |
464 raise TypeError('Filters are not a dictionary') | 464 raise TypeError('Filters are not a dictionary') |
465 | 465 |
466 return_data = [] | 466 return_data = [] |
467 for page_name, _format in self._params['source'].list_pages(): | 467 for page_name, _format in self._params['source'].list_pages(): |
468 data, filename = self._params['source'].read_page(page_name, | 468 data, filename = self._params['source'].read_page(page_name, |
(...skipping 26 matching lines...) Expand all Loading... |
495 | 495 |
496 def get_canonical_url(self, page): | 496 def get_canonical_url(self, page): |
497 """Return canonical URL for the page (without locale code)""" | 497 """Return canonical URL for the page (without locale code)""" |
498 try: | 498 try: |
499 base_url = self._params['site_url'] | 499 base_url = self._params['site_url'] |
500 except KeyError: | 500 except KeyError: |
501 raise Exception('You must configure `siteurl` to use' | 501 raise Exception('You must configure `siteurl` to use' |
502 '`get_canonical_url()`') | 502 '`get_canonical_url()`') |
503 | 503 |
504 locale, page_url = self._params['source'].resolve_link( | 504 locale, page_url = self._params['source'].resolve_link( |
505 page, self._params['locale'] | 505 page, self._params['locale'], |
506 ) | 506 ) |
507 # Remove the locale component that `resolve_link` adds at the | 507 # Remove the locale component that `resolve_link` adds at the |
508 # beginning. | 508 # beginning. |
509 page_url = page_url[len(locale) + 1:] | 509 page_url = page_url[len(locale) + 1:] |
510 return urlparse.urljoin(base_url, page_url) | 510 return urlparse.urljoin(base_url, page_url) |
511 | 511 |
512 def toclist(self, content): | 512 def toclist(self, content): |
513 toc_re = r'<h(\d)\s[^<>]*\bid="([^<>"]+)"[^<>]*>(.*?)</h\1>' | 513 toc_re = r'<h(\d)\s[^<>]*\bid="([^<>"]+)"[^<>]*>(.*?)</h\1>' |
514 flat = [] | 514 flat = [] |
515 for match in re.finditer(toc_re, content, re.S): | 515 for match in re.finditer(toc_re, content, re.S): |
(...skipping 12 matching lines...) Expand all Loading... |
528 stack[-1]['subitems'].append(item) | 528 stack[-1]['subitems'].append(item) |
529 stack.append(item) | 529 stack.append(item) |
530 return structured | 530 return structured |
531 | 531 |
532 | 532 |
533 converters = { | 533 converters = { |
534 'html': RawConverter, | 534 'html': RawConverter, |
535 'md': MarkdownConverter, | 535 'md': MarkdownConverter, |
536 'tmpl': TemplateConverter, | 536 'tmpl': TemplateConverter, |
537 } | 537 } |
OLD | NEW |