Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Unified Diff: cms/converters.py

Issue 29516687: Issue 4488 - Add support for JSON page front matter (Closed) Base URL: https://hg.adblockplus.org/cms
Patch Set: Cleaned up duplication, removed unnecessary regex Created Aug. 23, 2017, 6:12 p.m.
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « no previous file | tests/expected_output/en/metadata_json » ('j') | tests/expected_output/en/sitemap » ('J')
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: cms/converters.py
===================================================================
--- a/cms/converters.py
+++ b/cms/converters.py
@@ -14,31 +14,33 @@
# along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.
from __future__ import unicode_literals
import os
import HTMLParser
import re
import urlparse
+import json
import jinja2
import markdown
# Monkey-patch Markdown's isBlockLevel function to ensure that no paragraphs
# are inserted into the <head> tag
orig_isBlockLevel = markdown.util.isBlockLevel
def isBlockLevel(tag):
if tag == 'head':
return True
return orig_isBlockLevel(tag)
+
Sebastian Noack 2017/08/29 22:49:39 Adding this blank line is unrelated.
rosie 2018/03/26 02:32:21 True. My linter was showing a warning because ther
Sebastian Noack 2018/03/26 02:57:47 Strictly, there should be two blank lines surround
markdown.util.isBlockLevel = isBlockLevel
html_escapes = {
'<': '&lt;',
'>': '&gt;',
'&': '&amp;',
'"': '&quot;',
"'": '&#39;',
@@ -112,33 +114,54 @@
def handle_entityref(self, name):
self._append_text(self.unescape('&{};'.format(name)))
def handle_charref(self, name):
self._append_text(self.unescape('&#{};'.format(name)))
+def parse_metadata(page, data):
+ metadata = {'page': page}
+ try:
+ decoder = json.JSONDecoder()
+ json_data, index = decoder.raw_decode(data)
+ metadata.update(json_data)
+ page_data = data[index:].strip()
Sebastian Noack 2017/08/29 22:49:39 Note that when parsing the legacy non-JSON format,
Vasily Kuznetsov 2017/08/30 10:50:00 This consideration is rather non-obvious, for exam
rosie 2018/03/26 02:32:21 (This function was moved to utils.py.) Now, the li
+ except ValueError:
Sebastian Noack 2017/08/29 22:49:39 In case the data can be interpreted as JSON, but r
rosie 2018/03/26 02:32:21 The check to make sure a dict is returned now happ
+ lines = data.splitlines(True)
+ for i, line in enumerate(lines):
+ if not re.search(r'^\s*[\w\-]+\s*=', line):
Sebastian Noack 2017/08/29 22:49:39 There is some redundancy in the regular expression
rosie 2018/03/26 02:32:20 Done.
+ break
+ name, value = line.split('=', 1)
+ value = value.strip()
+ if value.startswith('[') and value.endswith(']'):
+ value = [element.strip() for element in value[1:-1].split(',')]
+ lines[i] = ''
+ metadata[name.strip()] = value
+ page_data = ''.join(lines)
+ return metadata, page_data
+
+
def parse_page_content(page, data):
"""Separate page content into metadata (dict) and body text (str)"""
- page_data = {'page': page}
- lines = data.splitlines(True)
- for i, line in enumerate(lines):
- if line.strip() in {'<!--', '-->'}:
- lines[i] = ''
- continue
- if not re.search(r'^\s*[\w\-]+\s*=', line):
- break
- name, value = line.split('=', 1)
- value = value.strip()
- if value.startswith('[') and value.endswith(']'):
- value = [element.strip() for element in value[1:-1].split(',')]
- lines[i] = '\n'
- page_data[name.strip()] = value
- return page_data, ''.join(lines)
+ # If metadata is in a comment block, extract it
+ comment_start = '<!--'
Sebastian Noack 2017/08/29 22:49:39 It seems much simpler to use a regular expression
Vasily Kuznetsov 2017/08/30 10:50:00 I think it was my advice to not use regexps here b
rosie 2018/03/26 02:32:20 Done.
+ comment_end = '-->'
+ if data.lstrip().startswith(comment_start):
+ start_index = data.index(comment_start) + len(comment_start)
+ end_index = data.index(comment_end)
+ comment = data[start_index:end_index]
+ page_tail = data[end_index + len(comment_end):]
+ metadata, comment_data = parse_metadata(page, comment.strip())
Sebastian Noack 2017/08/29 22:49:39 Does stripping the comment have any effect here? I
Vasily Kuznetsov 2017/08/30 10:50:01 JSON parsing fails on leading space, but if we wan
rosie 2018/03/26 02:32:21 Done.
+ page_data = '{}\n{}\n{}\n\n{}'.format(comment_start, comment_data,
+ comment_end, page_tail.strip())
+ else:
+ metadata, page_data = parse_metadata(page, data.strip())
+ return metadata, page_data
class Converter:
whitelist = {'a', 'em', 'sup', 'strong', 'code', 'span'}
missing_translations = 0
total_translations = 0
def __init__(self, params, key='pagedata'):
« no previous file with comments | « no previous file | tests/expected_output/en/metadata_json » ('j') | tests/expected_output/en/sitemap » ('J')

Powered by Google App Engine
This is Rietveld