cms/converters.py - Issue 29472555: Issue 4867 - Add global get_pages_metadata to template converters

Side by Side Diff: cms/converters.py

Issue 29472555: Issue 4867 - Add global get_pages_metadata to template converters (Closed)

Patch Set: Created June 23, 2017, 9:54 a.m.

Left:
Right:

Use n/p to move between diff chunks; N/P to move between comments.

Jump to:

View unified diff | Download patch

OLD	NEW
1 # This file is part of the Adblock Plus web scripts,	1 # This file is part of the Adblock Plus web scripts,

2 # Copyright (C) 2006-2017 eyeo GmbH	2 # Copyright (C) 2006-2017 eyeo GmbH

3 #	3 #

4 # Adblock Plus is free software: you can redistribute it and/or modify	4 # Adblock Plus is free software: you can redistribute it and/or modify

5 # it under the terms of the GNU General Public License version 3 as	5 # it under the terms of the GNU General Public License version 3 as

6 # published by the Free Software Foundation.	6 # published by the Free Software Foundation.

7 #	7 #

8 # Adblock Plus is distributed in the hope that it will be useful,	8 # Adblock Plus is distributed in the hope that it will be useful,

9 # but WITHOUT ANY WARRANTY; without even the implied warranty of	9 # but WITHOUT ANY WARRANTY; without even the implied warranty of

10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the	10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

(...skipping 365 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
376 filters = {	376 filters = {

377 'translate': self.translate,	377 'translate': self.translate,

378 'linkify': self.linkify,	378 'linkify': self.linkify,

379 'toclist': self.toclist,	379 'toclist': self.toclist,

380 }	380 }

381	381

382 globals = {	382 globals = {

383 'get_string': self.get_string,	383 'get_string': self.get_string,

384 'has_string': self.has_string,	384 'has_string': self.has_string,

385 'get_page_content': self.get_page_content,	385 'get_page_content': self.get_page_content,

	386 'get_pages_metadata': self.get_pages_metadata,

386 }	387 }

387	388

388 for dirname, dictionary in [('filters', filters),	389 for dirname, dictionary in [('filters', filters),

389 ('globals', globals)]:	390 ('globals', globals)]:

390 for filename in self._params['source'].list_files(dirname):	391 for filename in self._params['source'].list_files(dirname):

391 root, ext = os.path.splitext(filename)	392 root, ext = os.path.splitext(filename)

392 if ext.lower() != '.py':	393 if ext.lower() != '.py':

393 continue	394 continue

394	395

395 path = os.path.join(dirname, filename)	396 path = os.path.join(dirname, filename)

(...skipping 68 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
464 locale = self._params['locale']	465 locale = self._params['locale']

465	466

466 locale, url = self._params['source'].resolve_link(page, locale)	467 locale, url = self._params['source'].resolve_link(page, locale)

467 return jinja2.Markup('<a{}>'.format(''.join(	468 return jinja2.Markup('<a{}>'.format(''.join(

468 ' {}="{}"'.format(name, jinja2.escape(value)) for name, value in [	469 ' {}="{}"'.format(name, jinja2.escape(value)) for name, value in [

469 ('href', url),	470 ('href', url),

470 ('hreflang', locale)	471 ('hreflang', locale)

471 ] + attrs.items()	472 ] + attrs.items()

472 )))	473 )))

473	474

	475 def get_pages_metadata(self, filters=None):

	476 if not isinstance(filters, dict) and filters:

	477 raise TypeError('Filters are not a dictionary')

	478

	479 return_data = []

	480 for page_name, _format in self._params['source'].list_pages():

	481 data, filename = self._params['source'].read_page(page_name,

	482 _format)

	483 page_data = self.parse_page_metadata(data, page_name)

	484

	485 if self.filter_metadata(filters, page_data) is True:

	486 return_data.append(page_data)

	487

	488 return return_data

	489

	490 def parse_page_metadata(self, data, page):
	Jon Sonesen 2017/06/23 10:09:26 This code is essentially duplicating the logic in This code is essentially duplicating the logic in the init function of the Converter class, Vasily and I discussed this and the options were to break the logic out into a class function of Converters, make it an utils.py function or use it as a function in the converters namespace. We chose to put it in the converters.py namespace as a function because it makes no sense in utils since it is page specific logic, but it is not specific enough to a given page's instance of its own converter class to be a class function. I will break this out into its own function in the next patch set if everyone agrees this makes sense Vasily Kuznetsov 2017/06/23 14:15:24 As discussed, this approach sounds good. Now look Show quoted text On 2017/06/23 10:09:26, Jon Sonesen wrote: > This code is essentially duplicating the logic in the init function of the > Converter class, Vasily and I discussed this and the options were to break the > logic out into a class function of Converters, make it an utils.py function or > use it as a function in the converters namespace. > > We chose to put it in the converters.py namespace as a function because it makes > no sense in utils since it is page specific logic, but it is not specific enough > to a given page's instance of its own converter class to be a class function. > > I will break this out into its own function in the next patch set if everyone > agrees this makes sense As discussed, this approach sounds good. Now looking at these 3 functions that we're adding to `TemplateConverter` it starts looking like we should separate all the default globals out into their own file(s). They are not really part of the converter logic but are more like a set of services that we provide to the template -- it doesn't seem right to pollute the converter class with this stuff. The globals often access `self._params`, which technically is a private attribute of the converter, but logically that thing is a rendering context and it actually becomes the context (in jinja sense) of the templates so we will be able to get it using `contextfunction` decorator. There's also `self._get_locale_data()` that is used by the globals, but I'm actually wondering if `self._params['localedata']` should be used instead (it wouldn't load file from the disk the locale every time and it also supports locale overrides...). I guess we should ask Wladimir why it's done this way (it's from this change: https://hg.adblockplus.org/cms/rev/b022896ef69a). Anyway, you can do the metadata loading refactoring already and perhaps the separation of the globals will land as a separate change. Jon Sonesen 2017/06/26 07:22:43 Yeah I totally agree here, and actually we talked Show quoted text On 2017/06/23 14:15:24, Vasily Kuznetsov wrote: > On 2017/06/23 10:09:26, Jon Sonesen wrote: > > This code is essentially duplicating the logic in the init function of the > > Converter class, Vasily and I discussed this and the options were to break the > > logic out into a class function of Converters, make it an utils.py function or > > use it as a function in the converters namespace. > > > > We chose to put it in the converters.py namespace as a function because it > makes > > no sense in utils since it is page specific logic, but it is not specific > enough > > to a given page's instance of its own converter class to be a class function. > > > > I will break this out into its own function in the next patch set if everyone > > agrees this makes sense > > As discussed, this approach sounds good. > > Now looking at these 3 functions that we're adding to `TemplateConverter` it > starts looking like we should separate all the default globals out into their > own file(s). They are not really part of the converter logic but are more like a > set of services that we provide to the template -- it doesn't seem right to > pollute the converter class with this stuff. The globals often access > `self._params`, which technically is a private attribute of the converter, but > logically that thing is a rendering context and it actually becomes the context > (in jinja sense) of the templates so we will be able to get it using > `contextfunction` decorator. There's also `self._get_locale_data()` that is used > by the globals, but I'm actually wondering if `self._params['localedata']` > should be used instead (it wouldn't load file from the disk the locale every > time and it also supports locale overrides...). I guess we should ask Wladimir > why it's done this way (it's from this change: > https://hg.adblockplus.org/cms/rev/b022896ef69a). > > Anyway, you can do the metadata loading refactoring already and perhaps the > separation of the globals will land as a separate change. Yeah I totally agree here, and actually we talked about this in the past (not to this extent detail wise) the fact that we could break out globals and/or filters out of the converters file tp make it cleaner to extend in the future. Regarding the locale_data changes I agree here, since instantiating any converter will override the locale data with user specified parameters. But maybe there is a side effect we are not considering, or are unaware of.
	491 page_metadata = {'page': page}

	492 lines = data.splitlines(True)

	493 for i, line in enumerate(lines):

	494 if not re.search(r'^\s[\w\-]+\s=', line):

	495 break

	496 name, value = line.split('=', 1)

	497 value = value.strip()

	498 if value.startswith('[') and value.endswith(']'):

	499 value = [element.strip() for element in value[1:-1].split(',')]

	500 page_metadata[name.strip()] = value

	501 return page_metadata

	502

	503 def filter_metadata(self, filters, metadata):

	504 if filters is None:

	505 return True

	506 for filter_name, filter_value in filters.items():

	507 if filter_name not in metadata:

	508 return False

	509 if isinstance(metadata[filter_name], list):

	510 if isinstance(filter_value, basestring):

	511 filter_value = [filter_value]

	512 for option in filter_value:

	513 if str(option) not in metadata[filter_name]:

	514 return False

	515 elif filter_value != metadata[filter_name]:

	516 return False

	517 return True

	518

474 def toclist(self, content):	519 def toclist(self, content):

475 toc_re = r'<h(\d)\s[^<>]\bid="([^<>"]+)"[^<>]>(.*?)</h\1>'	520 toc_re = r'<h(\d)\s[^<>]\bid="([^<>"]+)"[^<>]>(.*?)</h\1>'

476 flat = []	521 flat = []

477 for match in re.finditer(toc_re, content, re.S):	522 for match in re.finditer(toc_re, content, re.S):

478 flat.append({	523 flat.append({

479 'level': int(match.group(1)),	524 'level': int(match.group(1)),

480 'anchor': jinja2.Markup(match.group(2)).unescape(),	525 'anchor': jinja2.Markup(match.group(2)).unescape(),

481 'title': jinja2.Markup(match.group(3)).unescape(),	526 'title': jinja2.Markup(match.group(3)).unescape(),

482 'subitems': [],	527 'subitems': [],

483 })	528 })

484	529

485 structured = []	530 structured = []

486 stack = [{'level': 0, 'subitems': structured}]	531 stack = [{'level': 0, 'subitems': structured}]

487 for item in flat:	532 for item in flat:

488 while stack[-1]['level'] >= item['level']:	533 while stack[-1]['level'] >= item['level']:

489 stack.pop()	534 stack.pop()

490 stack[-1]['subitems'].append(item)	535 stack[-1]['subitems'].append(item)

491 stack.append(item)	536 stack.append(item)

492 return structured	537 return structured

493	538

494 converters = {	539 converters = {

495 'html': RawConverter,	540 'html': RawConverter,

496 'md': MarkdownConverter,	541 'md': MarkdownConverter,

497 'tmpl': TemplateConverter,	542 'tmpl': TemplateConverter,

498 }	543 }

OLD	NEW

« no previous file with comments | « no previous file | tests/conftest.py » ('j') | tests/conftest.py » ('J')