| Index: cms/converters.py |
| =================================================================== |
| --- a/cms/converters.py |
| +++ b/cms/converters.py |
| @@ -111,37 +111,49 @@ |
| def handle_entityref(self, name): |
| self._append_text(self.unescape('&{};'.format(name))) |
| def handle_charref(self, name): |
| self._append_text(self.unescape('&#{};'.format(name))) |
| +def parse_page_content(page, data): |
| + """Separate page content into metadata (dict) and body text (str)""" |
| + page_data = {'page': page} |
| + lines = data.splitlines(True) |
| + for i, line in enumerate(lines): |
| + if not re.search(r'^\s*[\w\-]+\s*=', line): |
| + break |
| + name, value = line.split('=', 1) |
| + value = value.strip() |
| + if value.startswith('[') and value.endswith(']'): |
| + value = [element.strip() for element in value[1:-1].split(',')] |
| + lines[i] = '\n' |
| + page_data[name.strip()] = value |
| + return page_data, ''.join(lines) |
| + |
| + |
| class Converter: |
| whitelist = {'a', 'em', 'sup', 'strong', 'code', 'span'} |
| missing_translations = 0 |
| total_translations = 0 |
| def __init__(self, params, key='pagedata'): |
| self._params = params |
| self._key = key |
| self._attribute_parser = AttributeParser(self.whitelist) |
| self._seen_defaults = {} |
| # Read in any parameters specified at the beginning of the file |
| + # and override converter defaults with page specific params |
| data, filename = params[key] |
| - lines = data.splitlines(True) |
| - for i, line in enumerate(lines): |
| - if not re.search(r'^\s*[\w\-]+\s*=', line): |
| - break |
| - name, value = line.split('=', 1) |
| - params[name.strip()] = value.strip() |
| - lines[i] = '\n' |
| - params[key] = (''.join(lines), filename) |
| + page_data, body_text = parse_page_content(params['page'], data) |
| + params.update(page_data) |
| + params[key] = (body_text, filename) |
| def localize_string( |
| self, page, name, default, comment, localedata, escapes): |
| def escape(s): |
| return re.sub(r'.', |
| lambda match: escapes.get(match.group(0), |
| match.group(0)), |
| @@ -378,16 +390,17 @@ |
| 'linkify': self.linkify, |
| 'toclist': self.toclist, |
| } |
| globals = { |
| 'get_string': self.get_string, |
| 'has_string': self.has_string, |
| 'get_page_content': self.get_page_content, |
| + 'get_pages_metadata': self.get_pages_metadata, |
| } |
| for dirname, dictionary in [('filters', filters), |
| ('globals', globals)]: |
| for filename in self._params['source'].list_files(dirname): |
| root, ext = os.path.splitext(filename) |
| if ext.lower() != '.py': |
| continue |
| @@ -466,16 +479,49 @@ |
| locale, url = self._params['source'].resolve_link(page, locale) |
| return jinja2.Markup('<a{}>'.format(''.join( |
| ' {}="{}"'.format(name, jinja2.escape(value)) for name, value in [ |
| ('href', url), |
| ('hreflang', locale) |
| ] + attrs.items() |
| ))) |
| + def get_pages_metadata(self, filters=None): |
| + if filters is not None and not isinstance(filters, dict): |
| + raise TypeError('Filters are not a dictionary') |
| + |
| + return_data = [] |
| + for page_name, _format in self._params['source'].list_pages(): |
| + data, filename = self._params['source'].read_page(page_name, |
| + _format) |
| + page_data = parse_page_content(page_name, data)[0] |
| + if self.filter_metadata(filters, page_data) is True: |
| + return_data.append(page_data) |
| + return return_data |
| + |
| + def filter_metadata(self, filters, metadata): |
| + # if only the page key is in the metadata then there |
| + # was no user defined metadata |
| + if metadata.keys() == ['page']: |
| + return False |
| + if filters is None: |
| + return True |
| + for filter_name, filter_value in filters.items(): |
| + if filter_name not in metadata: |
| + return False |
| + if isinstance(metadata[filter_name], list): |
| + if isinstance(filter_value, basestring): |
| + filter_value = [filter_value] |
| + for option in filter_value: |
| + if str(option) not in metadata[filter_name]: |
| + return False |
| + elif filter_value != metadata[filter_name]: |
| + return False |
| + return True |
| + |
| def toclist(self, content): |
| toc_re = r'<h(\d)\s[^<>]*\bid="([^<>"]+)"[^<>]*>(.*?)</h\1>' |
| flat = [] |
| for match in re.finditer(toc_re, content, re.S): |
| flat.append({ |
| 'level': int(match.group(1)), |
| 'anchor': jinja2.Markup(match.group(2)).unescape(), |
| 'title': jinja2.Markup(match.group(3)).unescape(), |