Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Unified Diff: cms/converters.py

Issue 29516687: Issue 4488 - Add support for JSON page front matter (Closed) Base URL: https://hg.adblockplus.org/cms
Patch Set: Removed JSON postprocessing and integrated the cms tests Created Aug. 19, 2017, 1:55 a.m.
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « no previous file | tests/expected_output/en/metadata_json » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: cms/converters.py
===================================================================
--- a/cms/converters.py
+++ b/cms/converters.py
@@ -14,31 +14,33 @@
# along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.
from __future__ import unicode_literals
import os
import HTMLParser
import re
import urlparse
+import json
import jinja2
import markdown
# Monkey-patch Markdown's isBlockLevel function to ensure that no paragraphs
# are inserted into the <head> tag
orig_isBlockLevel = markdown.util.isBlockLevel
def isBlockLevel(tag):
if tag == 'head':
return True
return orig_isBlockLevel(tag)
+
markdown.util.isBlockLevel = isBlockLevel
html_escapes = {
'<': '&lt;',
'>': '&gt;',
'&': '&amp;',
'"': '&quot;',
"'": '&#39;',
@@ -112,33 +114,53 @@
def handle_entityref(self, name):
self._append_text(self.unescape('&{};'.format(name)))
def handle_charref(self, name):
self._append_text(self.unescape('&#{};'.format(name)))
+def parse_metadata(page, data):
Vasily Kuznetsov 2017/08/21 18:19:50 There's an implicit duplication here, where we man
rosie 2017/08/23 18:13:04 Done.
rosie 2017/08/23 18:13:04 Looks good. :)
+ try:
+ decoder = json.JSONDecoder()
+ json_data, index = decoder.raw_decode(data)
+ json_data['page'] = page
+ return json_data, data[index:].strip()
+ except ValueError:
+ metadata = {'page': page}
+ lines = data.splitlines(True)
+ for i, line in enumerate(lines):
+ if not re.search(r'^\s*[\w\-]+\s*=', line):
+ break
+ name, value = line.split('=', 1)
+ value = value.strip()
+ if value.startswith('[') and value.endswith(']'):
+ value = [element.strip() for element in value[1:-1].split(',')]
+ lines[i] = ''
+ metadata[name.strip()] = value
+ page_data = ''.join(lines)
+ return metadata, page_data
+
+
def parse_page_content(page, data):
"""Separate page content into metadata (dict) and body text (str)"""
- page_data = {'page': page}
- lines = data.splitlines(True)
- for i, line in enumerate(lines):
- if line.strip() in {'<!--', '-->'}:
- lines[i] = ''
- continue
- if not re.search(r'^\s*[\w\-]+\s*=', line):
- break
- name, value = line.split('=', 1)
- value = value.strip()
- if value.startswith('[') and value.endswith(']'):
- value = [element.strip() for element in value[1:-1].split(',')]
- lines[i] = '\n'
- page_data[name.strip()] = value
- return page_data, ''.join(lines)
+ # If metadata is in a comment block, extract it
+ if data.lstrip().startswith('<!--'):
+ start_index = re.search('<!--', data).end()
Vasily Kuznetsov 2017/08/21 18:19:50 I think regular expressions are a bit of an overki
rosie 2017/08/23 18:13:05 Yeah, that looks cleaner and avoids regular expres
Vasily Kuznetsov 2017/08/25 10:12:28 There's not much added complexity to the constants
+ end_match = re.search('-->', data)
+ end_index = end_match.start()
+ comment = data[start_index:end_index]
+ page_data = data[end_match.end():]
+ metadata, comment_data = parse_metadata(page, comment.strip())
+ page_data_result = '{}{}{}{}'.format('<!--\n', comment_data,
+ '\n-->\n\n', page_data.strip())
+ return metadata, page_data_result
+ metadata, page_data = parse_metadata(page, data.strip())
+ return metadata, page_data
class Converter:
whitelist = {'a', 'em', 'sup', 'strong', 'code', 'span'}
missing_translations = 0
total_translations = 0
def __init__(self, params, key='pagedata'):
@@ -553,13 +575,14 @@
stack = [{'level': 0, 'subitems': structured}]
for item in flat:
while stack[-1]['level'] >= item['level']:
stack.pop()
stack[-1]['subitems'].append(item)
stack.append(item)
return structured
+
converters = {
'html': RawConverter,
'md': MarkdownConverter,
'tmpl': TemplateConverter,
}
« no previous file with comments | « no previous file | tests/expected_output/en/metadata_json » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld