| OLD | NEW |
| 1 # This file is part of the Adblock Plus web scripts, | 1 # This file is part of the Adblock Plus web scripts, |
| 2 # Copyright (C) 2006-present eyeo GmbH | 2 # Copyright (C) 2006-present eyeo GmbH |
| 3 # | 3 # |
| 4 # Adblock Plus is free software: you can redistribute it and/or modify | 4 # Adblock Plus is free software: you can redistribute it and/or modify |
| 5 # it under the terms of the GNU General Public License version 3 as | 5 # it under the terms of the GNU General Public License version 3 as |
| 6 # published by the Free Software Foundation. | 6 # published by the Free Software Foundation. |
| 7 # | 7 # |
| 8 # Adblock Plus is distributed in the hope that it will be useful, | 8 # Adblock Plus is distributed in the hope that it will be useful, |
| 9 # but WITHOUT ANY WARRANTY; without even the implied warranty of | 9 # but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | 10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| (...skipping 166 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 177 # Insert fixed strings | 177 # Insert fixed strings |
| 178 for i, fixed_string in enumerate(fixed_strings, 1): | 178 for i, fixed_string in enumerate(fixed_strings, 1): |
| 179 result = result.replace('{{{}}}'.format(i), fixed_string) | 179 result = result.replace('{{{}}}'.format(i), fixed_string) |
| 180 | 180 |
| 181 # Insert attributes | 181 # Insert attributes |
| 182 result = escape(result) | 182 result = escape(result) |
| 183 | 183 |
| 184 def stringify_attribute(name, value): | 184 def stringify_attribute(name, value): |
| 185 return '{}="{}"'.format( | 185 return '{}="{}"'.format( |
| 186 escape(name), | 186 escape(name), |
| 187 escape(self.insert_localized_strings(value, {})) | 187 escape(self.insert_localized_strings(value, {})), |
| 188 ) | 188 ) |
| 189 | 189 |
| 190 for tag in self.whitelist: | 190 for tag in self.whitelist: |
| 191 allowed_contents = '(?:[^<>]|{})'.format('|'.join( | 191 allowed_contents = '(?:[^<>]|{})'.format('|'.join( |
| 192 '<(?:{}[^<>]*?|/{})>'.format(t, t) | 192 '<(?:{}[^<>]*?|/{})>'.format(t, t) |
| 193 for t in map(re.escape, self.whitelist - {tag}) | 193 for t in map(re.escape, self.whitelist - {tag}) |
| 194 )) | 194 )) |
| 195 saved = saved_attributes.get(tag, []) | 195 saved = saved_attributes.get(tag, []) |
| 196 for attrs in saved: | 196 for attrs in saved: |
| 197 attrs = [stringify_attribute(*attr) for attr in attrs] | 197 attrs = [stringify_attribute(*attr) for attr in attrs] |
| 198 result = re.sub( | 198 result = re.sub( |
| 199 r'{}({}*?){}'.format(re_escape('<{}>'.format(tag)), | 199 r'{}({}*?){}'.format(re_escape('<{}>'.format(tag)), |
| 200 allowed_contents, | 200 allowed_contents, |
| 201 re_escape('</{}>'.format(tag))), | 201 re_escape('</{}>'.format(tag))), |
| 202 lambda match: r'<{}{}>{}</{}>'.format( | 202 lambda match: r'<{}{}>{}</{}>'.format( |
| 203 tag, | 203 tag, |
| 204 ' ' + ' '.join(attrs) if attrs else '', | 204 ' ' + ' '.join(attrs) if attrs else '', |
| 205 match.group(1), | 205 match.group(1), |
| 206 tag | 206 tag, |
| 207 ), | 207 ), |
| 208 result, 1, flags=re.S | 208 result, 1, flags=re.S, |
| 209 ) | 209 ) |
| 210 result = re.sub( | 210 result = re.sub( |
| 211 r'{}({}*?){}'.format(re_escape('<{}>'.format(tag)), | 211 r'{}({}*?){}'.format(re_escape('<{}>'.format(tag)), |
| 212 allowed_contents, | 212 allowed_contents, |
| 213 re_escape('</{}>'.format(tag))), | 213 re_escape('</{}>'.format(tag))), |
| 214 r'<{}>\1</{}>'.format(tag, tag), | 214 r'<{}>\1</{}>'.format(tag, tag), |
| 215 result, flags=re.S | 215 result, flags=re.S, |
| 216 ) | 216 ) |
| 217 return result | 217 return result |
| 218 | 218 |
| 219 def insert_localized_strings(self, text, escapes, to_html=lambda s: s): | 219 def insert_localized_strings(self, text, escapes, to_html=lambda s: s): |
| 220 def lookup_string(match): | 220 def lookup_string(match): |
| 221 name, comment, default = match.groups() | 221 name, comment, default = match.groups() |
| 222 if default: | 222 if default: |
| 223 default = to_html(default).strip() | 223 default = to_html(default).strip() |
| 224 return self.localize_string(self._params['page'], name, default, | 224 return self.localize_string(self._params['page'], name, default, |
| 225 comment, self._params['localedata'], | 225 comment, self._params['localedata'], |
| 226 escapes) | 226 escapes) |
| 227 | 227 |
| 228 return re.sub( | 228 return re.sub( |
| 229 r'{{\s*' | 229 r'{{\s*' |
| 230 r'([\w\-]+)' # String ID | 230 r'([\w\-]+)' # String ID |
| 231 r'(?:(?:\[(.*?)\])?' # Optional comment | 231 r'(?:(?:\[(.*?)\])?' # Optional comment |
| 232 r'\s+' | 232 r'\s+' |
| 233 r'((?:(?!{{).|' # Translatable text | 233 r'((?:(?!{{).|' # Translatable text |
| 234 r'{{(?:(?!}}).)*}}' # Nested translation | 234 r'{{(?:(?!}}).)*}}' # Nested translation |
| 235 r')*?)' | 235 r')*?)' |
| 236 r')?' | 236 r')?' |
| 237 r'}}', | 237 r'}}', |
| 238 lookup_string, | 238 lookup_string, |
| 239 text, | 239 text, |
| 240 flags=re.S | 240 flags=re.S, |
| 241 ) | 241 ) |
| 242 | 242 |
| 243 def process_links(self, text): | 243 def process_links(self, text): |
| 244 def process_link(match): | 244 def process_link(match): |
| 245 pre, attr, url, post = match.groups() | 245 pre, attr, url, post = match.groups() |
| 246 url = jinja2.Markup(url).unescape() | 246 url = jinja2.Markup(url).unescape() |
| 247 | 247 |
| 248 locale, new_url = ( | 248 locale, new_url = ( |
| 249 self._params['source'] | 249 self._params['source'] |
| 250 .resolve_link(url, self._params['locale'])) | 250 .resolve_link(url, self._params['locale'])) |
| (...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 284 result = converter() | 284 result = converter() |
| 285 self.missing_translations += converter.missing_translations | 285 self.missing_translations += converter.missing_translations |
| 286 self.total_translations += converter.total_translations | 286 self.total_translations += converter.total_translations |
| 287 return result | 287 return result |
| 288 raise Exception('Failed to resolve include {}' | 288 raise Exception('Failed to resolve include {}' |
| 289 ' on page {}'.format(name, self._params['page'])) | 289 ' on page {}'.format(name, self._params['page'])) |
| 290 | 290 |
| 291 return re.sub( | 291 return re.sub( |
| 292 r'{}\?\s*include\s+([^\s<>"]+)\s*\?{}'.format( | 292 r'{}\?\s*include\s+([^\s<>"]+)\s*\?{}'.format( |
| 293 self.include_start_regex, | 293 self.include_start_regex, |
| 294 self.include_end_regex | 294 self.include_end_regex, |
| 295 ), | 295 ), |
| 296 resolve_include, | 296 resolve_include, |
| 297 text | 297 text, |
| 298 ) | 298 ) |
| 299 | 299 |
| 300 def __call__(self): | 300 def __call__(self): |
| 301 result = self.get_html(self._data, self._filename) | 301 result = self.get_html(self._data, self._filename) |
| 302 return self.resolve_includes(result) | 302 return self.resolve_includes(result) |
| 303 | 303 |
| 304 | 304 |
| 305 class RawConverter(Converter): | 305 class RawConverter(Converter): |
| 306 def get_html(self, source, filename): | 306 def get_html(self, source, filename): |
| 307 result = self.insert_localized_strings(source, html_escapes) | 307 result = self.insert_localized_strings(source, html_escapes) |
| 308 result = self.process_links(result) | 308 result = self.process_links(result) |
| 309 return result | 309 return result |
| 310 | 310 |
| 311 | 311 |
| 312 class MarkdownConverter(Converter): | 312 class MarkdownConverter(Converter): |
| 313 include_start_regex = r'(?:{}|{})'.format( | 313 include_start_regex = r'(?:{}|{})'.format( |
| 314 Converter.include_start_regex, | 314 Converter.include_start_regex, |
| 315 re.escape(jinja2.escape(Converter.include_start_regex)) | 315 re.escape(jinja2.escape(Converter.include_start_regex)), |
| 316 ) | 316 ) |
| 317 include_end_regex = r'(?:{}|{})'.format( | 317 include_end_regex = r'(?:{}|{})'.format( |
| 318 Converter.include_end_regex, | 318 Converter.include_end_regex, |
| 319 re.escape(jinja2.escape(Converter.include_end_regex)) | 319 re.escape(jinja2.escape(Converter.include_end_regex)), |
| 320 ) | 320 ) |
| 321 | 321 |
| 322 def get_html(self, source, filename): | 322 def get_html(self, source, filename): |
| 323 def remove_unnecessary_entities(match): | 323 def remove_unnecessary_entities(match): |
| 324 char = unichr(int(match.group(1))) | 324 char = unichr(int(match.group(1))) |
| 325 if char in html_escapes: | 325 if char in html_escapes: |
| 326 return match.group(0) | 326 return match.group(0) |
| 327 return char | 327 return char |
| 328 | 328 |
| 329 escapes = {} | 329 escapes = {} |
| (...skipping 85 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 415 if not key.startswith('_'): | 415 if not key.startswith('_'): |
| 416 self._params[key] = value | 416 self._params[key] = value |
| 417 | 417 |
| 418 result = unicode(module) | 418 result = unicode(module) |
| 419 result = self.process_links(result) | 419 result = self.process_links(result) |
| 420 return result | 420 return result |
| 421 | 421 |
| 422 def translate(self, default, name, comment=None): | 422 def translate(self, default, name, comment=None): |
| 423 return jinja2.Markup(self.localize_string( | 423 return jinja2.Markup(self.localize_string( |
| 424 self._params['page'], name, default, comment, | 424 self._params['page'], name, default, comment, |
| 425 self._params['localedata'], html_escapes | 425 self._params['localedata'], html_escapes, |
| 426 )) | 426 )) |
| 427 | 427 |
| 428 def get_string(self, name, page=None): | 428 def get_string(self, name, page=None): |
| 429 if page is None: | 429 if page is None: |
| 430 page = self._params['page'] | 430 page = self._params['page'] |
| 431 | 431 |
| 432 localedata = self._get_locale_data(page) | 432 localedata = self._get_locale_data(page) |
| 433 default = localedata[name] | 433 default = localedata[name] |
| 434 return jinja2.Markup(self.localize_string( | 434 return jinja2.Markup(self.localize_string( |
| 435 page, name, default, '', localedata, html_escapes | 435 page, name, default, '', localedata, html_escapes, |
| 436 )) | 436 )) |
| 437 | 437 |
| 438 def has_string(self, name, page=None): | 438 def has_string(self, name, page=None): |
| 439 if page is None: | 439 if page is None: |
| 440 page = self._params['page'] | 440 page = self._params['page'] |
| 441 | 441 |
| 442 localedata = self._get_locale_data(page) | 442 localedata = self._get_locale_data(page) |
| 443 return name in localedata | 443 return name in localedata |
| 444 | 444 |
| 445 def get_page_content(self, page, locale=None): | 445 def get_page_content(self, page, locale=None): |
| 446 if locale is None: | 446 if locale is None: |
| 447 locale = self._params['locale'] | 447 locale = self._params['locale'] |
| 448 return utils.get_page_params(self._params['source'], locale, page) | 448 return utils.get_page_params(self._params['source'], locale, page) |
| 449 | 449 |
| 450 def linkify(self, page, locale=None, **attrs): | 450 def linkify(self, page, locale=None, **attrs): |
| 451 if locale is None: | 451 if locale is None: |
| 452 locale = self._params['locale'] | 452 locale = self._params['locale'] |
| 453 | 453 |
| 454 locale, url = self._params['source'].resolve_link(page, locale) | 454 locale, url = self._params['source'].resolve_link(page, locale) |
| 455 return jinja2.Markup('<a{}>'.format(''.join( | 455 return jinja2.Markup('<a{}>'.format(''.join( |
| 456 ' {}="{}"'.format(name, jinja2.escape(value)) for name, value in [ | 456 ' {}="{}"'.format(name, jinja2.escape(value)) for name, value in [ |
| 457 ('href', url), | 457 ('href', url), |
| 458 ('hreflang', locale) | 458 ('hreflang', locale), |
| 459 ] + attrs.items() | 459 ] + attrs.items() |
| 460 ))) | 460 ))) |
| 461 | 461 |
| 462 def get_pages_metadata(self, filters=None): | 462 def get_pages_metadata(self, filters=None): |
| 463 if filters is not None and not isinstance(filters, dict): | 463 if filters is not None and not isinstance(filters, dict): |
| 464 raise TypeError('Filters are not a dictionary') | 464 raise TypeError('Filters are not a dictionary') |
| 465 | 465 |
| 466 return_data = [] | 466 return_data = [] |
| 467 for page_name, _format in self._params['source'].list_pages(): | 467 for page_name, _format in self._params['source'].list_pages(): |
| 468 data, filename = self._params['source'].read_page(page_name, | 468 data, filename = self._params['source'].read_page(page_name, |
| (...skipping 26 matching lines...) Expand all Loading... |
| 495 | 495 |
| 496 def get_canonical_url(self, page): | 496 def get_canonical_url(self, page): |
| 497 """Return canonical URL for the page (without locale code)""" | 497 """Return canonical URL for the page (without locale code)""" |
| 498 try: | 498 try: |
| 499 base_url = self._params['site_url'] | 499 base_url = self._params['site_url'] |
| 500 except KeyError: | 500 except KeyError: |
| 501 raise Exception('You must configure `siteurl` to use' | 501 raise Exception('You must configure `siteurl` to use' |
| 502 '`get_canonical_url()`') | 502 '`get_canonical_url()`') |
| 503 | 503 |
| 504 locale, page_url = self._params['source'].resolve_link( | 504 locale, page_url = self._params['source'].resolve_link( |
| 505 page, self._params['locale'] | 505 page, self._params['locale'], |
| 506 ) | 506 ) |
| 507 # Remove the locale component that `resolve_link` adds at the | 507 # Remove the locale component that `resolve_link` adds at the |
| 508 # beginning. | 508 # beginning. |
| 509 page_url = page_url[len(locale) + 1:] | 509 page_url = page_url[len(locale) + 1:] |
| 510 return urlparse.urljoin(base_url, page_url) | 510 return urlparse.urljoin(base_url, page_url) |
| 511 | 511 |
| 512 def toclist(self, content): | 512 def toclist(self, content): |
| 513 toc_re = r'<h(\d)\s[^<>]*\bid="([^<>"]+)"[^<>]*>(.*?)</h\1>' | 513 toc_re = r'<h(\d)\s[^<>]*\bid="([^<>"]+)"[^<>]*>(.*?)</h\1>' |
| 514 flat = [] | 514 flat = [] |
| 515 for match in re.finditer(toc_re, content, re.S): | 515 for match in re.finditer(toc_re, content, re.S): |
| (...skipping 12 matching lines...) Expand all Loading... |
| 528 stack[-1]['subitems'].append(item) | 528 stack[-1]['subitems'].append(item) |
| 529 stack.append(item) | 529 stack.append(item) |
| 530 return structured | 530 return structured |
| 531 | 531 |
| 532 | 532 |
| 533 converters = { | 533 converters = { |
| 534 'html': RawConverter, | 534 'html': RawConverter, |
| 535 'md': MarkdownConverter, | 535 'md': MarkdownConverter, |
| 536 'tmpl': TemplateConverter, | 536 'tmpl': TemplateConverter, |
| 537 } | 537 } |
| OLD | NEW |