| Index: cms/utils.py |
| =================================================================== |
| --- a/cms/utils.py |
| +++ b/cms/utils.py |
| @@ -8,84 +8,155 @@ |
| # Adblock Plus is distributed in the hope that it will be useful, |
| # but WITHOUT ANY WARRANTY; without even the implied warranty of |
| # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| # GNU General Public License for more details. |
| # |
| # You should have received a copy of the GNU General Public License |
| # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. |
| -from cms.converters import converters, TemplateConverter |
| +import re |
| + |
| +__all__ = [ |
| + 'get_page_params', |
| + 'process_page', |
| + 'split_head_body', |
| + 'extract_page_metadata' |
| +] |
| + |
| + |
| +def split_head_body(html): |
|
Vasily Kuznetsov
2017/10/27 18:35:01
Maybe this function should be called `decapitate`.
mathias
2017/10/30 15:37:00
Funny, but no. The name is perfect, there's nothin
|
| + """Split HTML page into head and remaining content. |
| + |
| + This is used to pass head and body of the page to the template as two |
| + separate variables. |
| + |
| + Parameters |
| + ---------- |
| + html: str |
| + Source HTML to split. |
| + |
| + Returns |
| + ------- |
| + (str, str) |
| + Everything inside of <head> tags found in the page, |
| + the rest of the page text with <head> tags removed. |
| + |
| + """ |
| + head = [] |
| + |
| + def add_to_head(match): |
| + head.append(match.group(1)) |
| + return '' |
| + |
| + body = re.sub(r'<head>(.*?)</head>', add_to_head, html, flags=re.S) |
| + return ''.join(head), body |
| + |
| + |
| +def extract_page_metadata(source): |
| + """Extract metadata variables from source text of the page. |
| + |
| + Parameters |
| + ---------- |
| + source: str |
| + Source text of the page. |
| + |
| + Returns |
| + ------- |
| + (dict, str) |
| + Metadata of the page, remaining source text without metadata. |
| + |
| + """ |
| + metadata = {} |
| + lines = source.splitlines(True) |
| + for i, line in enumerate(lines): |
| + if line.strip() in {'<!--', '-->'}: |
| + lines[i] = '' |
| + continue |
| + if not re.search(r'^\s*[\w\-]+\s*=', line): |
| + break |
| + name, value = line.split('=', 1) |
| + value = value.strip() |
| + if value.startswith('[') and value.endswith(']'): |
| + value = [element.strip() for element in value[1:-1].split(',')] |
| + lines[i] = '\n' |
| + metadata[name.strip()] = value |
| + return metadata, ''.join(lines) |
| def get_page_params(source, locale, page, format=None, site_url_override=None, |
| localized_string_callback=None): |
| + from cms.converters import converters |
|
Vasily Kuznetsov
2017/10/27 18:35:02
It makes more sense to do contrived imports on thi
mathias
2017/10/30 15:37:00
Acknowledged.
|
| + |
| # Guess page format if omitted, but default to Markdown for friendlier exceptions |
| if format is None: |
| for format in converters.iterkeys(): |
| if source.has_page(page, format): |
| break |
| else: |
| format = 'md' |
| params = { |
| 'source': source, |
| 'template': 'default', |
| 'locale': locale, |
| 'page': page, |
| - 'pagedata': source.read_page(page, format), |
| 'config': source.read_config(), |
| 'localized_string_callback': localized_string_callback, |
| } |
| localefile = page |
| if params['config'].has_option('locale_overrides', page): |
| localefile = params['config'].get('locale_overrides', page) |
| params['localedata'] = source.read_locale(params['locale'], localefile) |
| if params['config'].has_option('general', 'siteurl'): |
| if site_url_override: |
| params['site_url'] = site_url_override |
| else: |
| params['site_url'] = params['config'].get('general', 'siteurl') |
| - try: |
| - converter_class = converters[format] |
| - except KeyError: |
| - raise Exception('Page %s uses unknown format %s' % (page, format)) |
| + data, filename = source.read_page(page, format) |
|
Vasily Kuznetsov
2017/10/27 18:35:01
This was done in the constructor of `Converter` be
mathias
2017/10/30 15:37:00
Acknowledged.
|
| + metadata, body = extract_page_metadata(data) |
| + params['pagedata'] = body, filename |
| + params.update(metadata) |
| - converter = converter_class(params) |
| - |
| - # Note: The converter might change some parameters so we can only read in |
| - # template data here. |
| params['templatedata'] = source.read_template(params['template']) |
| defaultlocale = params['config'].get('general', 'defaultlocale') |
| params['defaultlocale'] = defaultlocale |
| locales = [ |
| l |
| for l in source.list_locales() |
| if source.has_locale(l, localefile) |
| ] |
| if defaultlocale not in locales: |
| locales.append(defaultlocale) |
| locales.sort() |
| params['available_locales'] = locales |
| - params['head'], params['body'] = converter() |
| + try: |
|
Vasily Kuznetsov
2017/10/27 18:35:02
Lines 137-143 should probably be in `converters.py
mathias
2017/10/30 15:37:00
Acknowledged.
|
| + converter_class = converters[format] |
| + converter = converter_class(body, filename, params) |
|
mathias
2017/10/30 15:37:00
This line should not be part of this try-block any
Vasily Kuznetsov
2017/11/07 17:08:29
Done.
|
| + except KeyError: |
| + raise Exception('Page %s uses unknown format %s' % (page, format)) |
| + |
| + converted = converter() |
| + params['head'], params['body'] = split_head_body(converted) |
| + |
| if converter.total_translations > 0: |
| params['translation_ratio'] = ( |
| 1 - float(converter.missing_translations) / converter.total_translations |
| ) |
| else: |
| params['translation_ratio'] = 1 |
| return params |
| def process_page(source, locale, page, format=None, site_url_override=None, |
| localized_string_callback=None): |
| - return TemplateConverter( |
| - get_page_params(source, locale, page, format, |
| - site_url_override, localized_string_callback), |
| - key='templatedata' |
| - )() |
| + from cms.converters import TemplateConverter |
| + |
| + params = get_page_params(source, locale, page, format, site_url_override, |
| + localized_string_callback) |
| + return TemplateConverter(*params['templatedata'], params=params)() |