 Issue 29516687:
  Issue 4488 - Add support for JSON page front matter  (Closed) 
  Base URL: https://hg.adblockplus.org/cms
    
  
    Issue 29516687:
  Issue 4488 - Add support for JSON page front matter  (Closed) 
  Base URL: https://hg.adblockplus.org/cms| Index: cms/converters.py | 
| =================================================================== | 
| --- a/cms/converters.py | 
| +++ b/cms/converters.py | 
| @@ -14,31 +14,33 @@ | 
| # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. | 
| from __future__ import unicode_literals | 
| import os | 
| import HTMLParser | 
| import re | 
| import urlparse | 
| +import json | 
| import jinja2 | 
| import markdown | 
| # Monkey-patch Markdown's isBlockLevel function to ensure that no paragraphs | 
| # are inserted into the <head> tag | 
| orig_isBlockLevel = markdown.util.isBlockLevel | 
| def isBlockLevel(tag): | 
| if tag == 'head': | 
| return True | 
| return orig_isBlockLevel(tag) | 
| + | 
| markdown.util.isBlockLevel = isBlockLevel | 
| html_escapes = { | 
| '<': '<', | 
| '>': '>', | 
| '&': '&', | 
| '"': '"', | 
| "'": ''', | 
| @@ -112,33 +114,53 @@ | 
| def handle_entityref(self, name): | 
| self._append_text(self.unescape('&{};'.format(name))) | 
| def handle_charref(self, name): | 
| self._append_text(self.unescape('&#{};'.format(name))) | 
| +def parse_metadata(page, data): | 
| 
Vasily Kuznetsov
2017/08/21 18:19:50
There's an implicit duplication here, where we man
 
rosie
2017/08/23 18:13:04
Done.
 
rosie
2017/08/23 18:13:04
Looks good. :)
 | 
| + try: | 
| + decoder = json.JSONDecoder() | 
| + json_data, index = decoder.raw_decode(data) | 
| + json_data['page'] = page | 
| + return json_data, data[index:].strip() | 
| + except ValueError: | 
| + metadata = {'page': page} | 
| + lines = data.splitlines(True) | 
| + for i, line in enumerate(lines): | 
| + if not re.search(r'^\s*[\w\-]+\s*=', line): | 
| + break | 
| + name, value = line.split('=', 1) | 
| + value = value.strip() | 
| + if value.startswith('[') and value.endswith(']'): | 
| + value = [element.strip() for element in value[1:-1].split(',')] | 
| + lines[i] = '' | 
| + metadata[name.strip()] = value | 
| + page_data = ''.join(lines) | 
| + return metadata, page_data | 
| + | 
| + | 
| def parse_page_content(page, data): | 
| """Separate page content into metadata (dict) and body text (str)""" | 
| - page_data = {'page': page} | 
| - lines = data.splitlines(True) | 
| - for i, line in enumerate(lines): | 
| - if line.strip() in {'<!--', '-->'}: | 
| - lines[i] = '' | 
| - continue | 
| - if not re.search(r'^\s*[\w\-]+\s*=', line): | 
| - break | 
| - name, value = line.split('=', 1) | 
| - value = value.strip() | 
| - if value.startswith('[') and value.endswith(']'): | 
| - value = [element.strip() for element in value[1:-1].split(',')] | 
| - lines[i] = '\n' | 
| - page_data[name.strip()] = value | 
| - return page_data, ''.join(lines) | 
| + # If metadata is in a comment block, extract it | 
| + if data.lstrip().startswith('<!--'): | 
| + start_index = re.search('<!--', data).end() | 
| 
Vasily Kuznetsov
2017/08/21 18:19:50
I think regular expressions are a bit of an overki
 
rosie
2017/08/23 18:13:05
Yeah, that looks cleaner and avoids regular expres
 
Vasily Kuznetsov
2017/08/25 10:12:28
There's not much added complexity to the constants
 | 
| + end_match = re.search('-->', data) | 
| + end_index = end_match.start() | 
| + comment = data[start_index:end_index] | 
| + page_data = data[end_match.end():] | 
| + metadata, comment_data = parse_metadata(page, comment.strip()) | 
| + page_data_result = '{}{}{}{}'.format('<!--\n', comment_data, | 
| + '\n-->\n\n', page_data.strip()) | 
| + return metadata, page_data_result | 
| + metadata, page_data = parse_metadata(page, data.strip()) | 
| + return metadata, page_data | 
| class Converter: | 
| whitelist = {'a', 'em', 'sup', 'strong', 'code', 'span'} | 
| missing_translations = 0 | 
| total_translations = 0 | 
| def __init__(self, params, key='pagedata'): | 
| @@ -553,13 +575,14 @@ | 
| stack = [{'level': 0, 'subitems': structured}] | 
| for item in flat: | 
| while stack[-1]['level'] >= item['level']: | 
| stack.pop() | 
| stack[-1]['subitems'].append(item) | 
| stack.append(item) | 
| return structured | 
| + | 
| converters = { | 
| 'html': RawConverter, | 
| 'md': MarkdownConverter, | 
| 'tmpl': TemplateConverter, | 
| } |