cms/converters.py - Issue 29472555: Issue 4867 - Add global get_pages_metadata to template converters

Delta Between Two Patch Sets: cms/converters.py

Issue 29472555: Issue 4867 - Add global get_pages_metadata to template converters (Closed)

Left Patch Set: Created June 23, 2017, 9:54 a.m.

Right Patch Set: address naming, temp var assignment Created July 4, 2017, 3:19 p.m.

Left:
Right:

Use n/p to move between diff chunks; N/P to move between comments.

Jump to:

Left: Side by side diff | Download
Right: Side by side diff | Download

LEFT	RIGHT
1 # This file is part of the Adblock Plus web scripts,	1 # This file is part of the Adblock Plus web scripts,

2 # Copyright (C) 2006-2017 eyeo GmbH	2 # Copyright (C) 2006-2017 eyeo GmbH

3 #	3 #

4 # Adblock Plus is free software: you can redistribute it and/or modify	4 # Adblock Plus is free software: you can redistribute it and/or modify

5 # it under the terms of the GNU General Public License version 3 as	5 # it under the terms of the GNU General Public License version 3 as

6 # published by the Free Software Foundation.	6 # published by the Free Software Foundation.

7 #	7 #

8 # Adblock Plus is distributed in the hope that it will be useful,	8 # Adblock Plus is distributed in the hope that it will be useful,

9 # but WITHOUT ANY WARRANTY; without even the implied warranty of	9 # but WITHOUT ANY WARRANTY; without even the implied warranty of

10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the	10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

(...skipping 98 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
109 # the document.	109 # the document.

110 self._append_text(data)	110 self._append_text(data)

111	111

112 def handle_entityref(self, name):	112 def handle_entityref(self, name):

113 self._append_text(self.unescape('&{};'.format(name)))	113 self._append_text(self.unescape('&{};'.format(name)))

114	114

115 def handle_charref(self, name):	115 def handle_charref(self, name):

116 self._append_text(self.unescape('&#{};'.format(name)))	116 self._append_text(self.unescape('&#{};'.format(name)))

117	117

118	118

	119 def parse_page_content(page, data):

	120 """Separate page content into metadata (dict) and body text (str)"""

	121 page_data = {'page': page}

	122 lines = data.splitlines(True)

	123 for i, line in enumerate(lines):

	124 if not re.search(r'^\s[\w\-]+\s=', line):

	125 break

	126 name, value = line.split('=', 1)

	127 value = value.strip()

	128 if value.startswith('[') and value.endswith(']'):

	129 value = [element.strip() for element in value[1:-1].split(',')]

	130 lines[i] = '\n'

	131 page_data[name.strip()] = value

	132 return page_data, ''.join(lines)

	133

	134

119 class Converter:	135 class Converter:

120 whitelist = {'a', 'em', 'sup', 'strong', 'code', 'span'}	136 whitelist = {'a', 'em', 'sup', 'strong', 'code', 'span'}

121 missing_translations = 0	137 missing_translations = 0

122 total_translations = 0	138 total_translations = 0

123	139

124 def __init__(self, params, key='pagedata'):	140 def __init__(self, params, key='pagedata'):

125 self._params = params	141 self._params = params

126 self._key = key	142 self._key = key

127 self._attribute_parser = AttributeParser(self.whitelist)	143 self._attribute_parser = AttributeParser(self.whitelist)

128 self._seen_defaults = {}	144 self._seen_defaults = {}

129	145

130 # Read in any parameters specified at the beginning of the file	146 # Read in any parameters specified at the beginning of the file

	147 # and override converter defaults with page specific params

131 data, filename = params[key]	148 data, filename = params[key]

132 lines = data.splitlines(True)	149 page_data, body_text = parse_page_content(params['page'], data)

133 for i, line in enumerate(lines):	150 params.update(page_data)

134 if not re.search(r'^\s[\w\-]+\s=', line):	151 params[key] = (body_text, filename)

135 break

136 name, value = line.split('=', 1)

137 params[name.strip()] = value.strip()

138 lines[i] = '\n'

139 params[key] = (''.join(lines), filename)

140	152

141 def localize_string(	153 def localize_string(

142 self, page, name, default, comment, localedata, escapes):	154 self, page, name, default, comment, localedata, escapes):

143	155

144 def escape(s):	156 def escape(s):

145 return re.sub(r'.',	157 return re.sub(r'.',

146 lambda match: escapes.get(match.group(0),	158 lambda match: escapes.get(match.group(0),

147 match.group(0)),	159 match.group(0)),

148 s, flags=re.S)	160 s, flags=re.S)

149	161

(...skipping 316 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
466	478

467 locale, url = self._params['source'].resolve_link(page, locale)	479 locale, url = self._params['source'].resolve_link(page, locale)

468 return jinja2.Markup('<a{}>'.format(''.join(	480 return jinja2.Markup('<a{}>'.format(''.join(

469 ' {}="{}"'.format(name, jinja2.escape(value)) for name, value in [	481 ' {}="{}"'.format(name, jinja2.escape(value)) for name, value in [

470 ('href', url),	482 ('href', url),

471 ('hreflang', locale)	483 ('hreflang', locale)

472 ] + attrs.items()	484 ] + attrs.items()

473 )))	485 )))

474	486

475 def get_pages_metadata(self, filters=None):	487 def get_pages_metadata(self, filters=None):

476 if not isinstance(filters, dict) and filters:	488 if filters is not None and not isinstance(filters, dict):

477 raise TypeError('Filters are not a dictionary')	489 raise TypeError('Filters are not a dictionary')

478	490

479 return_data = []	491 return_data = []

480 for page_name, _format in self._params['source'].list_pages():	492 for page_name, _format in self._params['source'].list_pages():

481 data, filename = self._params['source'].read_page(page_name,	493 data, filename = self._params['source'].read_page(page_name,

482 _format)	494 _format)

483 page_data = self.parse_page_metadata(data, page_name)	495 page_data = parse_page_content(page_name, data)[0]

484

485 if self.filter_metadata(filters, page_data) is True:	496 if self.filter_metadata(filters, page_data) is True:

486 return_data.append(page_data)	497 return_data.append(page_data)

487

488 return return_data	498 return return_data

489	499

490 def parse_page_metadata(self, data, page):
Jon Sonesen 2017/06/23 10:09:26 This code is essentially duplicating the logic in This code is essentially duplicating the logic in the init function of the Converter class, Vasily and I discussed this and the options were to break the logic out into a class function of Converters, make it an utils.py function or use it as a function in the converters namespace. We chose to put it in the converters.py namespace as a function because it makes no sense in utils since it is page specific logic, but it is not specific enough to a given page's instance of its own converter class to be a class function. I will break this out into its own function in the next patch set if everyone agrees this makes sense Vasily Kuznetsov 2017/06/23 14:15:24 As discussed, this approach sounds good. Now look Show quoted text On 2017/06/23 10:09:26, Jon Sonesen wrote: > This code is essentially duplicating the logic in the init function of the > Converter class, Vasily and I discussed this and the options were to break the > logic out into a class function of Converters, make it an utils.py function or > use it as a function in the converters namespace. > > We chose to put it in the converters.py namespace as a function because it makes > no sense in utils since it is page specific logic, but it is not specific enough > to a given page's instance of its own converter class to be a class function. > > I will break this out into its own function in the next patch set if everyone > agrees this makes sense As discussed, this approach sounds good. Now looking at these 3 functions that we're adding to `TemplateConverter` it starts looking like we should separate all the default globals out into their own file(s). They are not really part of the converter logic but are more like a set of services that we provide to the template -- it doesn't seem right to pollute the converter class with this stuff. The globals often access `self._params`, which technically is a private attribute of the converter, but logically that thing is a rendering context and it actually becomes the context (in jinja sense) of the templates so we will be able to get it using `contextfunction` decorator. There's also `self._get_locale_data()` that is used by the globals, but I'm actually wondering if `self._params['localedata']` should be used instead (it wouldn't load file from the disk the locale every time and it also supports locale overrides...). I guess we should ask Wladimir why it's done this way (it's from this change: https://hg.adblockplus.org/cms/rev/b022896ef69a). Anyway, you can do the metadata loading refactoring already and perhaps the separation of the globals will land as a separate change. Jon Sonesen 2017/06/26 07:22:43 Yeah I totally agree here, and actually we talked Show quoted text On 2017/06/23 14:15:24, Vasily Kuznetsov wrote: > On 2017/06/23 10:09:26, Jon Sonesen wrote: > > This code is essentially duplicating the logic in the init function of the > > Converter class, Vasily and I discussed this and the options were to break the > > logic out into a class function of Converters, make it an utils.py function or > > use it as a function in the converters namespace. > > > > We chose to put it in the converters.py namespace as a function because it > makes > > no sense in utils since it is page specific logic, but it is not specific > enough > > to a given page's instance of its own converter class to be a class function. > > > > I will break this out into its own function in the next patch set if everyone > > agrees this makes sense > > As discussed, this approach sounds good. > > Now looking at these 3 functions that we're adding to `TemplateConverter` it > starts looking like we should separate all the default globals out into their > own file(s). They are not really part of the converter logic but are more like a > set of services that we provide to the template -- it doesn't seem right to > pollute the converter class with this stuff. The globals often access > `self._params`, which technically is a private attribute of the converter, but > logically that thing is a rendering context and it actually becomes the context > (in jinja sense) of the templates so we will be able to get it using > `contextfunction` decorator. There's also `self._get_locale_data()` that is used > by the globals, but I'm actually wondering if `self._params['localedata']` > should be used instead (it wouldn't load file from the disk the locale every > time and it also supports locale overrides...). I guess we should ask Wladimir > why it's done this way (it's from this change: > https://hg.adblockplus.org/cms/rev/b022896ef69a). > > Anyway, you can do the metadata loading refactoring already and perhaps the > separation of the globals will land as a separate change. Yeah I totally agree here, and actually we talked about this in the past (not to this extent detail wise) the fact that we could break out globals and/or filters out of the converters file tp make it cleaner to extend in the future. Regarding the locale_data changes I agree here, since instantiating any converter will override the locale data with user specified parameters. But maybe there is a side effect we are not considering, or are unaware of.
491 page_metadata = {'page': page}

492 lines = data.splitlines(True)

493 for i, line in enumerate(lines):

494 if not re.search(r'^\s[\w\-]+\s=', line):

495 break

496 name, value = line.split('=', 1)

497 value = value.strip()

498 if value.startswith('[') and value.endswith(']'):

499 value = [element.strip() for element in value[1:-1].split(',')]

500 page_metadata[name.strip()] = value

501 return page_metadata

502

503 def filter_metadata(self, filters, metadata):	500 def filter_metadata(self, filters, metadata):

	501 # if only the page key is in the metadata then there

	502 # was no user defined metadata

	503 if metadata.keys() == ['page']:

	504 return False

504 if filters is None:	505 if filters is None:

505 return True	506 return True

506 for filter_name, filter_value in filters.items():	507 for filter_name, filter_value in filters.items():

507 if filter_name not in metadata:	508 if filter_name not in metadata:

508 return False	509 return False

509 if isinstance(metadata[filter_name], list):	510 if isinstance(metadata[filter_name], list):

510 if isinstance(filter_value, basestring):	511 if isinstance(filter_value, basestring):

511 filter_value = [filter_value]	512 filter_value = [filter_value]

512 for option in filter_value:	513 for option in filter_value:

513 if str(option) not in metadata[filter_name]:	514 if str(option) not in metadata[filter_name]:

(...skipping 20 matching lines...) Expand all Loading...
534 stack.pop()	535 stack.pop()

535 stack[-1]['subitems'].append(item)	536 stack[-1]['subitems'].append(item)

536 stack.append(item)	537 stack.append(item)

537 return structured	538 return structured

538	539

539 converters = {	540 converters = {

540 'html': RawConverter,	541 'html': RawConverter,

541 'md': MarkdownConverter,	542 'md': MarkdownConverter,

542 'tmpl': TemplateConverter,	543 'tmpl': TemplateConverter,

543 }	544 }

LEFT	RIGHT