Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Side by Side Diff: cms/converters.py

Issue 29472555: Issue 4867 - Add global get_pages_metadata to template converters (Closed)
Patch Set: fix interdependency, fix poor filter type checking Created July 3, 2017, 9:04 a.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | tests/expected_output/global » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 # This file is part of the Adblock Plus web scripts, 1 # This file is part of the Adblock Plus web scripts,
2 # Copyright (C) 2006-2017 eyeo GmbH 2 # Copyright (C) 2006-2017 eyeo GmbH
3 # 3 #
4 # Adblock Plus is free software: you can redistribute it and/or modify 4 # Adblock Plus is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU General Public License version 3 as 5 # it under the terms of the GNU General Public License version 3 as
6 # published by the Free Software Foundation. 6 # published by the Free Software Foundation.
7 # 7 #
8 # Adblock Plus is distributed in the hope that it will be useful, 8 # Adblock Plus is distributed in the hope that it will be useful,
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of 9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
(...skipping 98 matching lines...) Expand 10 before | Expand all | Expand 10 after
109 # the document. 109 # the document.
110 self._append_text(data) 110 self._append_text(data)
111 111
112 def handle_entityref(self, name): 112 def handle_entityref(self, name):
113 self._append_text(self.unescape('&{};'.format(name))) 113 self._append_text(self.unescape('&{};'.format(name)))
114 114
115 def handle_charref(self, name): 115 def handle_charref(self, name):
116 self._append_text(self.unescape('&#{};'.format(name))) 116 self._append_text(self.unescape('&#{};'.format(name)))
117 117
118 118
119 def get_page_metadata(page, data):
Vasily Kuznetsov 2017/07/03 17:42:44 Perhaps this function should be renamed now since
juliandoucette 2017/07/03 21:55:54 [`get`, `query`, `pages`, `get_pages`, ...] I lik
Vasily Kuznetsov 2017/07/04 07:43:48 Note that this is not the function that gets expos
juliandoucette 2017/07/04 09:57:25 Oh, sorry. I meant the get_pages_metadata function
Vasily Kuznetsov 2017/07/04 10:23:34 Currently the content is not included in metadata
juliandoucette 2017/07/04 10:42:56 Definitely more confusing. I don't really care if
120 """Generator which gets per page metadata and cleaned page content"""
Vasily Kuznetsov 2017/07/03 17:42:44 Whenever possible, it's best to write docstrings i
Jon Sonesen 2017/07/04 15:02:38 Acknowledged.
121 page_data = {'page': page}
122 lines = data.splitlines(True)
123 for i, line in enumerate(lines):
124 if not re.search(r'^\s*[\w\-]+\s*=', line):
125 break
126 name, value = line.split('=', 1)
127 value = value.strip()
128 if value.startswith('[') and value.endswith(']'):
129 value = [element.strip() for element in value[1:-1].split(',')]
130 lines[i] = '\n'
131 page_data[name.strip()] = value
132 return page_data, ''.join(lines)
133
134
119 class Converter: 135 class Converter:
120 whitelist = {'a', 'em', 'sup', 'strong', 'code', 'span'} 136 whitelist = {'a', 'em', 'sup', 'strong', 'code', 'span'}
121 missing_translations = 0 137 missing_translations = 0
122 total_translations = 0 138 total_translations = 0
123 139
124 def __init__(self, params, key='pagedata'): 140 def __init__(self, params, key='pagedata'):
125 self._params = params 141 self._params = params
126 self._key = key 142 self._key = key
127 self._attribute_parser = AttributeParser(self.whitelist) 143 self._attribute_parser = AttributeParser(self.whitelist)
128 self._seen_defaults = {} 144 self._seen_defaults = {}
129 145
130 # Read in any parameters specified at the beginning of the file 146 # Read in any parameters specified at the beginning of the file
147 # and override converter defaults with page specific params
131 data, filename = params[key] 148 data, filename = params[key]
132 lines = data.splitlines(True) 149 page_data, cleaned_page = get_page_metadata(params['page'], data)
Vasily Kuznetsov 2017/07/03 17:42:44 I think the variable naming is somewhat confusing
Jon Sonesen 2017/07/04 14:58:06 Agree here, ack
133 for i, line in enumerate(lines): 150 params.update(page_data)
134 if not re.search(r'^\s*[\w\-]+\s*=', line): 151 params[key] = (cleaned_page, filename)
135 break
136 name, value = line.split('=', 1)
137 params[name.strip()] = value.strip()
138 lines[i] = '\n'
139 params[key] = (''.join(lines), filename)
140 152
141 def localize_string( 153 def localize_string(
142 self, page, name, default, comment, localedata, escapes): 154 self, page, name, default, comment, localedata, escapes):
143 155
144 def escape(s): 156 def escape(s):
145 return re.sub(r'.', 157 return re.sub(r'.',
146 lambda match: escapes.get(match.group(0), 158 lambda match: escapes.get(match.group(0),
147 match.group(0)), 159 match.group(0)),
148 s, flags=re.S) 160 s, flags=re.S)
149 161
(...skipping 226 matching lines...) Expand 10 before | Expand all | Expand 10 after
376 filters = { 388 filters = {
377 'translate': self.translate, 389 'translate': self.translate,
378 'linkify': self.linkify, 390 'linkify': self.linkify,
379 'toclist': self.toclist, 391 'toclist': self.toclist,
380 } 392 }
381 393
382 globals = { 394 globals = {
383 'get_string': self.get_string, 395 'get_string': self.get_string,
384 'has_string': self.has_string, 396 'has_string': self.has_string,
385 'get_page_content': self.get_page_content, 397 'get_page_content': self.get_page_content,
398 'get_pages_metadata': self.get_pages_metadata,
386 } 399 }
387 400
388 for dirname, dictionary in [('filters', filters), 401 for dirname, dictionary in [('filters', filters),
389 ('globals', globals)]: 402 ('globals', globals)]:
390 for filename in self._params['source'].list_files(dirname): 403 for filename in self._params['source'].list_files(dirname):
391 root, ext = os.path.splitext(filename) 404 root, ext = os.path.splitext(filename)
392 if ext.lower() != '.py': 405 if ext.lower() != '.py':
393 continue 406 continue
394 407
395 path = os.path.join(dirname, filename) 408 path = os.path.join(dirname, filename)
(...skipping 68 matching lines...) Expand 10 before | Expand all | Expand 10 after
464 locale = self._params['locale'] 477 locale = self._params['locale']
465 478
466 locale, url = self._params['source'].resolve_link(page, locale) 479 locale, url = self._params['source'].resolve_link(page, locale)
467 return jinja2.Markup('<a{}>'.format(''.join( 480 return jinja2.Markup('<a{}>'.format(''.join(
468 ' {}="{}"'.format(name, jinja2.escape(value)) for name, value in [ 481 ' {}="{}"'.format(name, jinja2.escape(value)) for name, value in [
469 ('href', url), 482 ('href', url),
470 ('hreflang', locale) 483 ('hreflang', locale)
471 ] + attrs.items() 484 ] + attrs.items()
472 ))) 485 )))
473 486
487 def get_pages_metadata(self, filters=None):
488 if filters is not None and not isinstance(filters, dict):
489 raise TypeError('Filters are not a dictionary')
490
491 return_data = []
492 for page_name, _format in self._params['source'].list_pages():
493 data, filename = self._params['source'].read_page(page_name,
494 _format)
495 page_data, cleaned_page = get_page_metadata(page_name, data)
Vasily Kuznetsov 2017/07/03 17:42:44 We can just take the first part of the tuple that
Jon Sonesen 2017/07/04 14:58:06 Acknowledged.
496 if self.filter_metadata(filters, page_data) is True:
497 return_data.append(page_data)
498 return return_data
499
500 def filter_metadata(self, filters, metadata):
501 # if only the page key is in the metadata then there
502 # was no user defined metadata
503 if metadata.keys() == ['page']:
504 return False
505 if filters is None:
506 return True
507 for filter_name, filter_value in filters.items():
508 if filter_name not in metadata:
509 return False
510 if isinstance(metadata[filter_name], list):
511 if isinstance(filter_value, basestring):
512 filter_value = [filter_value]
513 for option in filter_value:
514 if str(option) not in metadata[filter_name]:
515 return False
516 elif filter_value != metadata[filter_name]:
517 return False
518 return True
519
474 def toclist(self, content): 520 def toclist(self, content):
475 toc_re = r'<h(\d)\s[^<>]*\bid="([^<>"]+)"[^<>]*>(.*?)</h\1>' 521 toc_re = r'<h(\d)\s[^<>]*\bid="([^<>"]+)"[^<>]*>(.*?)</h\1>'
476 flat = [] 522 flat = []
477 for match in re.finditer(toc_re, content, re.S): 523 for match in re.finditer(toc_re, content, re.S):
478 flat.append({ 524 flat.append({
479 'level': int(match.group(1)), 525 'level': int(match.group(1)),
480 'anchor': jinja2.Markup(match.group(2)).unescape(), 526 'anchor': jinja2.Markup(match.group(2)).unescape(),
481 'title': jinja2.Markup(match.group(3)).unescape(), 527 'title': jinja2.Markup(match.group(3)).unescape(),
482 'subitems': [], 528 'subitems': [],
483 }) 529 })
484 530
485 structured = [] 531 structured = []
486 stack = [{'level': 0, 'subitems': structured}] 532 stack = [{'level': 0, 'subitems': structured}]
487 for item in flat: 533 for item in flat:
488 while stack[-1]['level'] >= item['level']: 534 while stack[-1]['level'] >= item['level']:
489 stack.pop() 535 stack.pop()
490 stack[-1]['subitems'].append(item) 536 stack[-1]['subitems'].append(item)
491 stack.append(item) 537 stack.append(item)
492 return structured 538 return structured
493 539
494 converters = { 540 converters = {
495 'html': RawConverter, 541 'html': RawConverter,
496 'md': MarkdownConverter, 542 'md': MarkdownConverter,
497 'tmpl': TemplateConverter, 543 'tmpl': TemplateConverter,
498 } 544 }
OLDNEW
« no previous file with comments | « no previous file | tests/expected_output/global » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld