Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Side by Side Diff: cms/converters.py

Issue 29516687: Issue 4488 - Add support for JSON page front matter (Closed) Base URL: https://hg.adblockplus.org/cms
Patch Set: Removed JSON postprocessing and integrated the cms tests Created Aug. 19, 2017, 1:55 a.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | tests/expected_output/en/metadata_json » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 # This file is part of the Adblock Plus web scripts, 1 # This file is part of the Adblock Plus web scripts,
2 # Copyright (C) 2006-2017 eyeo GmbH 2 # Copyright (C) 2006-2017 eyeo GmbH
3 # 3 #
4 # Adblock Plus is free software: you can redistribute it and/or modify 4 # Adblock Plus is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU General Public License version 3 as 5 # it under the terms of the GNU General Public License version 3 as
6 # published by the Free Software Foundation. 6 # published by the Free Software Foundation.
7 # 7 #
8 # Adblock Plus is distributed in the hope that it will be useful, 8 # Adblock Plus is distributed in the hope that it will be useful,
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of 9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # GNU General Public License for more details. 11 # GNU General Public License for more details.
12 # 12 #
13 # You should have received a copy of the GNU General Public License 13 # You should have received a copy of the GNU General Public License
14 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. 14 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.
15 15
16 from __future__ import unicode_literals 16 from __future__ import unicode_literals
17 17
18 import os 18 import os
19 import HTMLParser 19 import HTMLParser
20 import re 20 import re
21 import urlparse 21 import urlparse
22 import json
22 23
23 import jinja2 24 import jinja2
24 import markdown 25 import markdown
25 26
26 27
27 # Monkey-patch Markdown's isBlockLevel function to ensure that no paragraphs 28 # Monkey-patch Markdown's isBlockLevel function to ensure that no paragraphs
28 # are inserted into the <head> tag 29 # are inserted into the <head> tag
29 orig_isBlockLevel = markdown.util.isBlockLevel 30 orig_isBlockLevel = markdown.util.isBlockLevel
30 31
31 32
32 def isBlockLevel(tag): 33 def isBlockLevel(tag):
33 if tag == 'head': 34 if tag == 'head':
34 return True 35 return True
35 return orig_isBlockLevel(tag) 36 return orig_isBlockLevel(tag)
36 37
38
37 markdown.util.isBlockLevel = isBlockLevel 39 markdown.util.isBlockLevel = isBlockLevel
38 40
39 html_escapes = { 41 html_escapes = {
40 '<': '&lt;', 42 '<': '&lt;',
41 '>': '&gt;', 43 '>': '&gt;',
42 '&': '&amp;', 44 '&': '&amp;',
43 '"': '&quot;', 45 '"': '&quot;',
44 "'": '&#39;', 46 "'": '&#39;',
45 } 47 }
46 48
(...skipping 63 matching lines...) Expand 10 before | Expand all | Expand 10 after
110 # the document. 112 # the document.
111 self._append_text(data) 113 self._append_text(data)
112 114
113 def handle_entityref(self, name): 115 def handle_entityref(self, name):
114 self._append_text(self.unescape('&{};'.format(name))) 116 self._append_text(self.unescape('&{};'.format(name)))
115 117
116 def handle_charref(self, name): 118 def handle_charref(self, name):
117 self._append_text(self.unescape('&#{};'.format(name))) 119 self._append_text(self.unescape('&#{};'.format(name)))
118 120
119 121
122 def parse_metadata(page, data):
Vasily Kuznetsov 2017/08/21 18:19:50 There's an implicit duplication here, where we man
rosie 2017/08/23 18:13:04 Done.
rosie 2017/08/23 18:13:04 Looks good. :)
123 try:
124 decoder = json.JSONDecoder()
125 json_data, index = decoder.raw_decode(data)
126 json_data['page'] = page
127 return json_data, data[index:].strip()
128 except ValueError:
129 metadata = {'page': page}
130 lines = data.splitlines(True)
131 for i, line in enumerate(lines):
132 if not re.search(r'^\s*[\w\-]+\s*=', line):
133 break
134 name, value = line.split('=', 1)
135 value = value.strip()
136 if value.startswith('[') and value.endswith(']'):
137 value = [element.strip() for element in value[1:-1].split(',')]
138 lines[i] = ''
139 metadata[name.strip()] = value
140 page_data = ''.join(lines)
141 return metadata, page_data
142
143
120 def parse_page_content(page, data): 144 def parse_page_content(page, data):
121 """Separate page content into metadata (dict) and body text (str)""" 145 """Separate page content into metadata (dict) and body text (str)"""
122 page_data = {'page': page} 146 # If metadata is in a comment block, extract it
123 lines = data.splitlines(True) 147 if data.lstrip().startswith('<!--'):
124 for i, line in enumerate(lines): 148 start_index = re.search('<!--', data).end()
Vasily Kuznetsov 2017/08/21 18:19:50 I think regular expressions are a bit of an overki
rosie 2017/08/23 18:13:05 Yeah, that looks cleaner and avoids regular expres
Vasily Kuznetsov 2017/08/25 10:12:28 There's not much added complexity to the constants
125 if line.strip() in {'<!--', '-->'}: 149 end_match = re.search('-->', data)
126 lines[i] = '' 150 end_index = end_match.start()
127 continue 151 comment = data[start_index:end_index]
128 if not re.search(r'^\s*[\w\-]+\s*=', line): 152 page_data = data[end_match.end():]
129 break 153 metadata, comment_data = parse_metadata(page, comment.strip())
130 name, value = line.split('=', 1) 154 page_data_result = '{}{}{}{}'.format('<!--\n', comment_data,
131 value = value.strip() 155 '\n-->\n\n', page_data.strip())
132 if value.startswith('[') and value.endswith(']'): 156 return metadata, page_data_result
133 value = [element.strip() for element in value[1:-1].split(',')] 157 metadata, page_data = parse_metadata(page, data.strip())
134 lines[i] = '\n' 158 return metadata, page_data
135 page_data[name.strip()] = value
136 return page_data, ''.join(lines)
137 159
138 160
139 class Converter: 161 class Converter:
140 whitelist = {'a', 'em', 'sup', 'strong', 'code', 'span'} 162 whitelist = {'a', 'em', 'sup', 'strong', 'code', 'span'}
141 missing_translations = 0 163 missing_translations = 0
142 total_translations = 0 164 total_translations = 0
143 165
144 def __init__(self, params, key='pagedata'): 166 def __init__(self, params, key='pagedata'):
145 self._params = params 167 self._params = params
146 self._key = key 168 self._key = key
(...skipping 404 matching lines...) Expand 10 before | Expand all | Expand 10 after
551 573
552 structured = [] 574 structured = []
553 stack = [{'level': 0, 'subitems': structured}] 575 stack = [{'level': 0, 'subitems': structured}]
554 for item in flat: 576 for item in flat:
555 while stack[-1]['level'] >= item['level']: 577 while stack[-1]['level'] >= item['level']:
556 stack.pop() 578 stack.pop()
557 stack[-1]['subitems'].append(item) 579 stack[-1]['subitems'].append(item)
558 stack.append(item) 580 stack.append(item)
559 return structured 581 return structured
560 582
583
561 converters = { 584 converters = {
562 'html': RawConverter, 585 'html': RawConverter,
563 'md': MarkdownConverter, 586 'md': MarkdownConverter,
564 'tmpl': TemplateConverter, 587 'tmpl': TemplateConverter,
565 } 588 }
OLDNEW
« no previous file with comments | « no previous file | tests/expected_output/en/metadata_json » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld