cms/utils.py - Issue 29590620: Noissue - Refactor Converter.__init__ and __call__

Unified Diff: cms/utils.py

Issue 29590620: Noissue - Refactor Converter.__init__ and __call__ (Closed)

Patch Set: Created Oct. 27, 2017, 5:54 p.m.

Use n/p to move between diff chunks; N/P to move between comments.

Jump to:

View side-by-side diff with in-line comments

Index: cms/utils.py

===================================================================

--- a/cms/utils.py

+++ b/cms/utils.py

@@ -8,84 +8,155 @@

# Adblock Plus is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License

# along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.

-from cms.converters import converters, TemplateConverter

+import re

+__all__ = [

+ 'get_page_params',

+ 'process_page',

+ 'split_head_body',

+ 'extract_page_metadata'

+def split_head_body(html):

Vasily Kuznetsov 2017/10/27 18:35:01 Maybe this function should be called `decapitate`.

mathias 2017/10/30 15:37:00 Funny, but no. The name is perfect, there's nothin

+ """Split HTML page into head and remaining content.

+ This is used to pass head and body of the page to the template as two

+ separate variables.

+ Parameters

+ ----------

+ html: str

+ Source HTML to split.

+ Returns

+ -------

+ (str, str)

+ Everything inside of <head> tags found in the page,

+ the rest of the page text with <head> tags removed.

+ """

+ head = []

+ def add_to_head(match):

+ head.append(match.group(1))

+ return ''

+ body = re.sub(r'<head>(.*?)</head>', add_to_head, html, flags=re.S)

+ return ''.join(head), body

+def extract_page_metadata(source):

+ """Extract metadata variables from source text of the page.

+ Parameters

+ ----------

+ source: str

+ Source text of the page.

+ Returns

+ -------

+ (dict, str)

+ Metadata of the page, remaining source text without metadata.

+ """

+ metadata = {}

+ lines = source.splitlines(True)

+ for i, line in enumerate(lines):

+ if line.strip() in {''}:

+ lines[i] = ''

+ continue

+ if not re.search(r'^\s*[\w\-]+\s*=', line):

+ break

+ name, value = line.split('=', 1)

+ value = value.strip()

+ if value.startswith('[') and value.endswith(']'):

+ value = [element.strip() for element in value[1:-1].split(',')]

+ lines[i] = '\n'

+ metadata[name.strip()] = value

+ return metadata, ''.join(lines)

def get_page_params(source, locale, page, format=None, site_url_override=None,

localized_string_callback=None):

+ from cms.converters import converters

Vasily Kuznetsov 2017/10/27 18:35:02 It makes more sense to do contrived imports on thi

mathias 2017/10/30 15:37:00 Acknowledged.

# Guess page format if omitted, but default to Markdown for friendlier exceptions

if format is None:

for format in converters.iterkeys():

if source.has_page(page, format):

break

else:

format = 'md'

params = {

'source': source,

'template': 'default',

'locale': locale,

'page': page,

- 'pagedata': source.read_page(page, format),

'config': source.read_config(),

'localized_string_callback': localized_string_callback,

}

localefile = page

if params['config'].has_option('locale_overrides', page):

localefile = params['config'].get('locale_overrides', page)

params['localedata'] = source.read_locale(params['locale'], localefile)

if params['config'].has_option('general', 'siteurl'):

if site_url_override:

params['site_url'] = site_url_override

else:

params['site_url'] = params['config'].get('general', 'siteurl')

- try:

- converter_class = converters[format]

- except KeyError:

- raise Exception('Page %s uses unknown format %s' % (page, format))

+ data, filename = source.read_page(page, format)

Vasily Kuznetsov 2017/10/27 18:35:01 This was done in the constructor of `Converter` be

mathias 2017/10/30 15:37:00 Acknowledged.

+ metadata, body = extract_page_metadata(data)

+ params['pagedata'] = body, filename

+ params.update(metadata)

- converter = converter_class(params)

- # Note: The converter might change some parameters so we can only read in

- # template data here.

params['templatedata'] = source.read_template(params['template'])

defaultlocale = params['config'].get('general', 'defaultlocale')

params['defaultlocale'] = defaultlocale

locales = [

for l in source.list_locales()

if source.has_locale(l, localefile)

]

if defaultlocale not in locales:

locales.append(defaultlocale)

locales.sort()

params['available_locales'] = locales

- params['head'], params['body'] = converter()

+ try:

Vasily Kuznetsov 2017/10/27 18:35:02 Lines 137-143 should probably be in `converters.py

mathias 2017/10/30 15:37:00 Acknowledged.

+ converter_class = converters[format]

+ converter = converter_class(body, filename, params)

mathias 2017/10/30 15:37:00 This line should not be part of this try-block any

Vasily Kuznetsov 2017/11/07 17:08:29 Done.

+ except KeyError:

+ raise Exception('Page %s uses unknown format %s' % (page, format))

+ converted = converter()

+ params['head'], params['body'] = split_head_body(converted)

if converter.total_translations > 0:

params['translation_ratio'] = (

1 - float(converter.missing_translations) / converter.total_translations

)

else:

params['translation_ratio'] = 1

return params

def process_page(source, locale, page, format=None, site_url_override=None,

localized_string_callback=None):

- return TemplateConverter(

- get_page_params(source, locale, page, format,

- site_url_override, localized_string_callback),

- key='templatedata'

- )()

+ from cms.converters import TemplateConverter

+ params = get_page_params(source, locale, page, format, site_url_override,

+ localized_string_callback)

+ return TemplateConverter(*params['templatedata'], params=params)()

« cms/converters.py ('K') | « cms/converters.py ('k') | tests/expected_output/en/includer » ('j') | tests/test_site/includes/example.html » ('J')