cms/converters.py - Issue 29590620: Noissue - Refactor Converter.__init__ and __call__

Side by Side Diff: cms/converters.py

Issue 29590620: Noissue - Refactor Converter.__init__ and __call__ (Closed)

Patch Set: Address comments on PS1 Created Nov. 7, 2017, 5:01 p.m.

Left:
Right:

Use n/p to move between diff chunks; N/P to move between comments.

Jump to:

View unified diff | Download patch

OLD	NEW
1 # This file is part of the Adblock Plus web scripts,	1 # This file is part of the Adblock Plus web scripts,

2 # Copyright (C) 2006-present eyeo GmbH	2 # Copyright (C) 2006-present eyeo GmbH

3 #	3 #

4 # Adblock Plus is free software: you can redistribute it and/or modify	4 # Adblock Plus is free software: you can redistribute it and/or modify

5 # it under the terms of the GNU General Public License version 3 as	5 # it under the terms of the GNU General Public License version 3 as

6 # published by the Free Software Foundation.	6 # published by the Free Software Foundation.

7 #	7 #

8 # Adblock Plus is distributed in the hope that it will be useful,	8 # Adblock Plus is distributed in the hope that it will be useful,

9 # but WITHOUT ANY WARRANTY; without even the implied warranty of	9 # but WITHOUT ANY WARRANTY; without even the implied warranty of

10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the	10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

11 # GNU General Public License for more details.	11 # GNU General Public License for more details.

12 #	12 #

13 # You should have received a copy of the GNU General Public License	13 # You should have received a copy of the GNU General Public License

14 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.	14 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.

15	15

16 from __future__ import unicode_literals	16 from __future__ import unicode_literals

17	17

18 import os	18 import os

19 import HTMLParser	19 import HTMLParser

20 import re	20 import re

21 import urlparse	21 import urlparse

22	22

23 import jinja2	23 import jinja2

24 import markdown	24 import markdown

25	25

	26 from cms import utils

26	27

27 # Monkey-patch Markdown's isBlockLevel function to ensure that no paragraphs	28 # Monkey-patch Markdown's isBlockLevel function to ensure that no paragraphs

28 # are inserted into the <head> tag	29 # are inserted into the <head> tag

29 orig_isBlockLevel = markdown.util.isBlockLevel	30 orig_isBlockLevel = markdown.util.isBlockLevel

30	31

31	32

32 def isBlockLevel(tag):	33 def isBlockLevel(tag):

33 if tag == 'head':	34 if tag == 'head':

34 return True	35 return True

35 return orig_isBlockLevel(tag)	36 return orig_isBlockLevel(tag)

(...skipping 75 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
111 # the document.	112 # the document.

112 self._append_text(data)	113 self._append_text(data)

113	114

114 def handle_entityref(self, name):	115 def handle_entityref(self, name):

115 self._append_text(self.unescape('&{};'.format(name)))	116 self._append_text(self.unescape('&{};'.format(name)))

116	117

117 def handle_charref(self, name):	118 def handle_charref(self, name):

118 self._append_text(self.unescape('&#{};'.format(name)))	119 self._append_text(self.unescape('&#{};'.format(name)))

119	120

120	121

121 def parse_page_content(page, data):

122 """Separate page content into metadata (dict) and body text (str)"""

123 page_data = {'page': page}

124 lines = data.splitlines(True)

125 for i, line in enumerate(lines):

126 if line.strip() in {'<!--', '-->'}:

127 lines[i] = ''

128 continue

129 if not re.search(r'^\s[\w\-]+\s=', line):

130 break

131 name, value = line.split('=', 1)

132 value = value.strip()

133 if value.startswith('[') and value.endswith(']'):

134 value = [element.strip() for element in value[1:-1].split(',')]

135 lines[i] = '\n'

136 page_data[name.strip()] = value

137 return page_data, ''.join(lines)

138

139

140 class Converter:	122 class Converter:

141 whitelist = {'a', 'em', 'sup', 'strong', 'code', 'span'}	123 whitelist = {'a', 'em', 'sup', 'strong', 'code', 'span'}

142 missing_translations = 0	124 missing_translations = 0

143 total_translations = 0	125 total_translations = 0

144	126

145 def __init__(self, params, key='pagedata'):	127 def __init__(self, data, filename, params):

	128 self._data = data

	129 self._filename = filename

146 self._params = params	130 self._params = params

147 self._key = key

148 self._attribute_parser = AttributeParser(self.whitelist)	131 self._attribute_parser = AttributeParser(self.whitelist)

149 self._seen_defaults = {}	132 self._seen_defaults = {}

150	133

151 # Read in any parameters specified at the beginning of the file

152 # and override converter defaults with page specific params

153 data, filename = params[key]

154 page_data, body_text = parse_page_content(params['page'], data)

155 params.update(page_data)

156 params[key] = (body_text, filename)

157

158 def localize_string(	134 def localize_string(

159 self, page, name, default, comment, localedata, escapes):	135 self, page, name, default, comment, localedata, escapes):

160	136

161 def escape(s):	137 def escape(s):

162 return re.sub(r'.',	138 return re.sub(r'.',

163 lambda match: escapes.get(match.group(0),	139 lambda match: escapes.get(match.group(0),

164 match.group(0)),	140 match.group(0)),

165 s, flags=re.S)	141 s, flags=re.S)

166	142

167 def re_escape(s):	143 def re_escape(s):

(...skipping 120 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
288 return text	264 return text

289	265

290 include_start_regex = '<'	266 include_start_regex = '<'

291 include_end_regex = '>'	267 include_end_regex = '>'

292	268

293 def resolve_includes(self, text):	269 def resolve_includes(self, text):

294 def resolve_include(match):	270 def resolve_include(match):

295 name = match.group(1)	271 name = match.group(1)

296 for format_, converter_class in converters.iteritems():	272 for format_, converter_class in converters.iteritems():

297 if self._params['source'].has_include(name, format_):	273 if self._params['source'].has_include(name, format_):

298 self._params['includedata'] = (	274 data, filename = (

299 self._params['source'].read_include(name, format_))	275 self._params['source'].read_include(name, format_))

300	276

301 converter = converter_class(self._params,	277 # XXX: allowing includes to modify params of the whole page

302 key='includedata')	278 # seems like a bad idea but we have to support this because

	279 # it's used by www.adblockplus.org.

	280 metadata, rest = utils.extract_page_metadata(data)

	281 self._params.update(metadata)

	282

	283 converter = converter_class(rest, filename, self._params)

303 result = converter()	284 result = converter()

304 self.missing_translations += converter.missing_translations	285 self.missing_translations += converter.missing_translations

305 self.total_translations += converter.total_translations	286 self.total_translations += converter.total_translations

306 return result	287 return result

307 raise Exception('Failed to resolve include {}'	288 raise Exception('Failed to resolve include {}'

308 ' on page {}'.format(name, self._params['page']))	289 ' on page {}'.format(name, self._params['page']))

309	290

310 return re.sub(	291 return re.sub(

311 r'{}\?\sinclude\s+([^\s<>"]+)\s\?{}'.format(	292 r'{}\?\sinclude\s+([^\s<>"]+)\s\?{}'.format(

312 self.include_start_regex,	293 self.include_start_regex,

313 self.include_end_regex	294 self.include_end_regex

314 ),	295 ),

315 resolve_include,	296 resolve_include,

316 text	297 text

317 )	298 )

318	299

319 def __call__(self):	300 def __call__(self):

320 result = self.get_html(*self._params[self._key])	301 result = self.get_html(self._data, self._filename)

321 result = self.resolve_includes(result)	302 return self.resolve_includes(result)

322 if self._key == 'pagedata':

323 head = []

324

325 def add_to_head(match):

326 head.append(match.group(1))

327 return ''

328 body = re.sub(r'<head>(.*?)</head>', add_to_head, result,

329 flags=re.S)

330 return ''.join(head), body

331 return result

332	303

333	304

334 class RawConverter(Converter):	305 class RawConverter(Converter):

335 def get_html(self, source, filename):	306 def get_html(self, source, filename):

336 result = self.insert_localized_strings(source, html_escapes)	307 result = self.insert_localized_strings(source, html_escapes)

337 result = self.process_links(result)	308 result = self.process_links(result)

338 return result	309 return result

339	310

340	311

341 class MarkdownConverter(Converter):	312 class MarkdownConverter(Converter):

(...skipping 123 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
465 ))	436 ))

466	437

467 def has_string(self, name, page=None):	438 def has_string(self, name, page=None):

468 if page is None:	439 if page is None:

469 page = self._params['page']	440 page = self._params['page']

470	441

471 localedata = self._get_locale_data(page)	442 localedata = self._get_locale_data(page)

472 return name in localedata	443 return name in localedata

473	444

474 def get_page_content(self, page, locale=None):	445 def get_page_content(self, page, locale=None):

475 from cms.utils import get_page_params

476

477 if locale is None:	446 if locale is None:

478 locale = self._params['locale']	447 locale = self._params['locale']

479 return get_page_params(self._params['source'], locale, page)	448 return utils.get_page_params(self._params['source'], locale, page)

480	449

481 def linkify(self, page, locale=None, **attrs):	450 def linkify(self, page, locale=None, **attrs):

482 if locale is None:	451 if locale is None:

483 locale = self._params['locale']	452 locale = self._params['locale']

484	453

485 locale, url = self._params['source'].resolve_link(page, locale)	454 locale, url = self._params['source'].resolve_link(page, locale)

486 return jinja2.Markup('<a{}>'.format(''.join(	455 return jinja2.Markup('<a{}>'.format(''.join(

487 ' {}="{}"'.format(name, jinja2.escape(value)) for name, value in [	456 ' {}="{}"'.format(name, jinja2.escape(value)) for name, value in [

488 ('href', url),	457 ('href', url),

489 ('hreflang', locale)	458 ('hreflang', locale)

490 ] + attrs.items()	459 ] + attrs.items()

491 )))	460 )))

492	461

493 def get_pages_metadata(self, filters=None):	462 def get_pages_metadata(self, filters=None):

494 if filters is not None and not isinstance(filters, dict):	463 if filters is not None and not isinstance(filters, dict):

495 raise TypeError('Filters are not a dictionary')	464 raise TypeError('Filters are not a dictionary')

496	465

497 return_data = []	466 return_data = []

498 for page_name, _format in self._params['source'].list_pages():	467 for page_name, _format in self._params['source'].list_pages():

499 data, filename = self._params['source'].read_page(page_name,	468 data, filename = self._params['source'].read_page(page_name,

500 _format)	469 _format)

501 page_data = parse_page_content(page_name, data)[0]	470 page_data = utils.extract_page_metadata(data)[0]

	471 page_data.setdefault('page', page_name)

502 if self.filter_metadata(filters, page_data) is True:	472 if self.filter_metadata(filters, page_data) is True:

503 return_data.append(page_data)	473 return_data.append(page_data)

504 return return_data	474 return return_data

505	475

506 def filter_metadata(self, filters, metadata):	476 def filter_metadata(self, filters, metadata):

507 # if only the page key is in the metadata then there	477 # if only the page key is in the metadata then there

508 # was no user defined metadata	478 # was no user defined metadata

509 if metadata.keys() == ['page']:	479 if metadata.keys() == ['page']:

510 return False	480 return False

511 if filters is None:	481 if filters is None:

(...skipping 46 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
558 stack[-1]['subitems'].append(item)	528 stack[-1]['subitems'].append(item)

559 stack.append(item)	529 stack.append(item)

560 return structured	530 return structured

561	531

562	532

563 converters = {	533 converters = {

564 'html': RawConverter,	534 'html': RawConverter,

565 'md': MarkdownConverter,	535 'md': MarkdownConverter,

566 'tmpl': TemplateConverter,	536 'tmpl': TemplateConverter,

567 }	537 }

OLD	NEW

« no previous file with comments | « no previous file | cms/utils.py » ('j') | no next file with comments »