Index: cms/converters.py |
=================================================================== |
--- a/cms/converters.py |
+++ b/cms/converters.py |
@@ -111,37 +111,49 @@ |
def handle_entityref(self, name): |
self._append_text(self.unescape('&{};'.format(name))) |
def handle_charref(self, name): |
self._append_text(self.unescape('&#{};'.format(name))) |
+def get_page_metadata(page, data): |
Vasily Kuznetsov
2017/07/03 17:42:44
Perhaps this function should be renamed now since
juliandoucette
2017/07/03 21:55:54
[`get`, `query`, `pages`, `get_pages`, ...] I lik
Vasily Kuznetsov
2017/07/04 07:43:48
Note that this is not the function that gets expos
juliandoucette
2017/07/04 09:57:25
Oh, sorry. I meant the get_pages_metadata function
Vasily Kuznetsov
2017/07/04 10:23:34
Currently the content is not included in metadata
juliandoucette
2017/07/04 10:42:56
Definitely more confusing.
I don't really care if
|
+ """Generator which gets per page metadata and cleaned page content""" |
Vasily Kuznetsov
2017/07/03 17:42:44
Whenever possible, it's best to write docstrings i
Jon Sonesen
2017/07/04 15:02:38
Acknowledged.
|
+ page_data = {'page': page} |
+ lines = data.splitlines(True) |
+ for i, line in enumerate(lines): |
+ if not re.search(r'^\s*[\w\-]+\s*=', line): |
+ break |
+ name, value = line.split('=', 1) |
+ value = value.strip() |
+ if value.startswith('[') and value.endswith(']'): |
+ value = [element.strip() for element in value[1:-1].split(',')] |
+ lines[i] = '\n' |
+ page_data[name.strip()] = value |
+ return page_data, ''.join(lines) |
+ |
+ |
class Converter: |
whitelist = {'a', 'em', 'sup', 'strong', 'code', 'span'} |
missing_translations = 0 |
total_translations = 0 |
def __init__(self, params, key='pagedata'): |
self._params = params |
self._key = key |
self._attribute_parser = AttributeParser(self.whitelist) |
self._seen_defaults = {} |
# Read in any parameters specified at the beginning of the file |
+ # and override converter defaults with page specific params |
data, filename = params[key] |
- lines = data.splitlines(True) |
- for i, line in enumerate(lines): |
- if not re.search(r'^\s*[\w\-]+\s*=', line): |
- break |
- name, value = line.split('=', 1) |
- params[name.strip()] = value.strip() |
- lines[i] = '\n' |
- params[key] = (''.join(lines), filename) |
+ page_data, cleaned_page = get_page_metadata(params['page'], data) |
Vasily Kuznetsov
2017/07/03 17:42:44
I think the variable naming is somewhat confusing
Jon Sonesen
2017/07/04 14:58:06
Agree here, ack
|
+ params.update(page_data) |
+ params[key] = (cleaned_page, filename) |
def localize_string( |
self, page, name, default, comment, localedata, escapes): |
def escape(s): |
return re.sub(r'.', |
lambda match: escapes.get(match.group(0), |
match.group(0)), |
@@ -378,16 +390,17 @@ |
'linkify': self.linkify, |
'toclist': self.toclist, |
} |
globals = { |
'get_string': self.get_string, |
'has_string': self.has_string, |
'get_page_content': self.get_page_content, |
+ 'get_pages_metadata': self.get_pages_metadata, |
} |
for dirname, dictionary in [('filters', filters), |
('globals', globals)]: |
for filename in self._params['source'].list_files(dirname): |
root, ext = os.path.splitext(filename) |
if ext.lower() != '.py': |
continue |
@@ -466,16 +479,49 @@ |
locale, url = self._params['source'].resolve_link(page, locale) |
return jinja2.Markup('<a{}>'.format(''.join( |
' {}="{}"'.format(name, jinja2.escape(value)) for name, value in [ |
('href', url), |
('hreflang', locale) |
] + attrs.items() |
))) |
+ def get_pages_metadata(self, filters=None): |
+ if filters is not None and not isinstance(filters, dict): |
+ raise TypeError('Filters are not a dictionary') |
+ |
+ return_data = [] |
+ for page_name, _format in self._params['source'].list_pages(): |
+ data, filename = self._params['source'].read_page(page_name, |
+ _format) |
+ page_data, cleaned_page = get_page_metadata(page_name, data) |
Vasily Kuznetsov
2017/07/03 17:42:44
We can just take the first part of the tuple that
Jon Sonesen
2017/07/04 14:58:06
Acknowledged.
|
+ if self.filter_metadata(filters, page_data) is True: |
+ return_data.append(page_data) |
+ return return_data |
+ |
+ def filter_metadata(self, filters, metadata): |
+ # if only the page key is in the metadata then there |
+ # was no user defined metadata |
+ if metadata.keys() == ['page']: |
+ return False |
+ if filters is None: |
+ return True |
+ for filter_name, filter_value in filters.items(): |
+ if filter_name not in metadata: |
+ return False |
+ if isinstance(metadata[filter_name], list): |
+ if isinstance(filter_value, basestring): |
+ filter_value = [filter_value] |
+ for option in filter_value: |
+ if str(option) not in metadata[filter_name]: |
+ return False |
+ elif filter_value != metadata[filter_name]: |
+ return False |
+ return True |
+ |
def toclist(self, content): |
toc_re = r'<h(\d)\s[^<>]*\bid="([^<>"]+)"[^<>]*>(.*?)</h\1>' |
flat = [] |
for match in re.finditer(toc_re, content, re.S): |
flat.append({ |
'level': int(match.group(1)), |
'anchor': jinja2.Markup(match.group(2)).unescape(), |
'title': jinja2.Markup(match.group(3)).unescape(), |