Index: cms/converters.py |
=================================================================== |
--- a/cms/converters.py |
+++ b/cms/converters.py |
@@ -14,16 +14,18 @@ |
# along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. |
from __future__ import unicode_literals |
import os |
import HTMLParser |
import re |
import urlparse |
+import json |
+import collections |
import jinja2 |
import markdown |
# Monkey-patch Markdown's isBlockLevel function to ensure that no paragraphs |
# are inserted into the <head> tag |
orig_isBlockLevel = markdown.util.isBlockLevel |
@@ -112,33 +114,49 @@ |
def handle_entityref(self, name): |
self._append_text(self.unescape('&{};'.format(name))) |
def handle_charref(self, name): |
self._append_text(self.unescape('&#{};'.format(name))) |
+def parse_json(json_data, parent_key='', sep='_'): |
Vasily Kuznetsov
2017/08/16 18:34:56
This was not very clear from the ticket, but actua
rosie
2017/08/19 01:56:34
Done.
|
+ result = [] |
+ for key, value in json_data.items(): |
+ new_key = parent_key + sep + key if parent_key else key |
+ if isinstance(value, collections.MutableMapping): |
+ result.extend(parse_json(value, new_key, sep=sep).items()) |
+ else: |
+ result.append((new_key, value)) |
+ return dict(result) |
+ |
+ |
def parse_page_content(page, data): |
"""Separate page content into metadata (dict) and body text (str)""" |
page_data = {'page': page} |
- lines = data.splitlines(True) |
- for i, line in enumerate(lines): |
- if line.strip() in {'<!--', '-->'}: |
- lines[i] = '' |
- continue |
- if not re.search(r'^\s*[\w\-]+\s*=', line): |
- break |
- name, value = line.split('=', 1) |
- value = value.strip() |
- if value.startswith('[') and value.endswith(']'): |
- value = [element.strip() for element in value[1:-1].split(',')] |
- lines[i] = '\n' |
- page_data[name.strip()] = value |
- return page_data, ''.join(lines) |
+ try: |
+ data = data.replace('<!--', '') |
Vasily Kuznetsov
2017/08/16 18:34:56
We need to be more careful here: this line (togeth
rosie
2017/08/19 01:56:34
Done.
|
+ data = data.replace('-->', '') |
+ decoder = json.JSONDecoder() |
+ json_data, index = decoder.raw_decode(data) |
+ json_data['page'] = page |
+ return parse_json(json_data), data[index:] |
+ except ValueError: |
+ lines = data.splitlines(True) |
+ for i, line in enumerate(lines): |
+ if not re.search(r'^\s*[\w\-]+\s*=', line): |
+ break |
+ name, value = line.split('=', 1) |
+ value = value.strip() |
+ if value.startswith('[') and value.endswith(']'): |
+ value = [element.strip() for element in value[1:-1].split(',')] |
+ lines[i] = '\n' |
+ page_data[name.strip()] = value |
+ return page_data, ''.join(lines) |
class Converter: |
whitelist = {'a', 'em', 'sup', 'strong', 'code', 'span'} |
missing_translations = 0 |
total_translations = 0 |
def __init__(self, params, key='pagedata'): |