Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Side by Side Diff: cms/converters.py

Issue 29590620: Noissue - Refactor Converter.__init__ and __call__ (Closed)
Patch Set: Created Oct. 27, 2017, 5:54 p.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | cms/utils.py » ('j') | cms/utils.py » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 # This file is part of the Adblock Plus web scripts, 1 # This file is part of the Adblock Plus web scripts,
2 # Copyright (C) 2006-present eyeo GmbH 2 # Copyright (C) 2006-present eyeo GmbH
3 # 3 #
4 # Adblock Plus is free software: you can redistribute it and/or modify 4 # Adblock Plus is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU General Public License version 3 as 5 # it under the terms of the GNU General Public License version 3 as
6 # published by the Free Software Foundation. 6 # published by the Free Software Foundation.
7 # 7 #
8 # Adblock Plus is distributed in the hope that it will be useful, 8 # Adblock Plus is distributed in the hope that it will be useful,
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of 9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # GNU General Public License for more details. 11 # GNU General Public License for more details.
12 # 12 #
13 # You should have received a copy of the GNU General Public License 13 # You should have received a copy of the GNU General Public License
14 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. 14 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.
15 15
16 from __future__ import unicode_literals 16 from __future__ import unicode_literals
17 17
18 import os 18 import os
19 import HTMLParser 19 import HTMLParser
20 import re 20 import re
21 import urlparse 21 import urlparse
22 22
23 import jinja2 23 import jinja2
24 import markdown 24 import markdown
25 25
26 from cms import utils
Vasily Kuznetsov 2017/10/27 18:35:01 It makes more sense for `utils` to be imported int
mathias 2017/10/30 15:37:00 Acknowledged.
26 27
27 # Monkey-patch Markdown's isBlockLevel function to ensure that no paragraphs 28 # Monkey-patch Markdown's isBlockLevel function to ensure that no paragraphs
28 # are inserted into the <head> tag 29 # are inserted into the <head> tag
29 orig_isBlockLevel = markdown.util.isBlockLevel 30 orig_isBlockLevel = markdown.util.isBlockLevel
30 31
31 32
32 def isBlockLevel(tag): 33 def isBlockLevel(tag):
33 if tag == 'head': 34 if tag == 'head':
34 return True 35 return True
35 return orig_isBlockLevel(tag) 36 return orig_isBlockLevel(tag)
(...skipping 75 matching lines...) Expand 10 before | Expand all | Expand 10 after
111 # the document. 112 # the document.
112 self._append_text(data) 113 self._append_text(data)
113 114
114 def handle_entityref(self, name): 115 def handle_entityref(self, name):
115 self._append_text(self.unescape('&{};'.format(name))) 116 self._append_text(self.unescape('&{};'.format(name)))
116 117
117 def handle_charref(self, name): 118 def handle_charref(self, name):
118 self._append_text(self.unescape('&#{};'.format(name))) 119 self._append_text(self.unescape('&#{};'.format(name)))
119 120
120 121
121 def parse_page_content(page, data):
Vasily Kuznetsov 2017/10/27 18:35:01 This function has nothing to do with converters, i
122 """Separate page content into metadata (dict) and body text (str)"""
123 page_data = {'page': page}
124 lines = data.splitlines(True)
125 for i, line in enumerate(lines):
126 if line.strip() in {'<!--', '-->'}:
127 lines[i] = ''
128 continue
129 if not re.search(r'^\s*[\w\-]+\s*=', line):
130 break
131 name, value = line.split('=', 1)
132 value = value.strip()
133 if value.startswith('[') and value.endswith(']'):
134 value = [element.strip() for element in value[1:-1].split(',')]
135 lines[i] = '\n'
136 page_data[name.strip()] = value
137 return page_data, ''.join(lines)
138
139
140 class Converter: 122 class Converter:
141 whitelist = {'a', 'em', 'sup', 'strong', 'code', 'span'} 123 whitelist = {'a', 'em', 'sup', 'strong', 'code', 'span'}
142 missing_translations = 0 124 missing_translations = 0
143 total_translations = 0 125 total_translations = 0
144 126
145 def __init__(self, params, key='pagedata'): 127 def __init__(self, data, filename, params):
Vasily Kuznetsov 2017/10/27 18:35:01 This signature is rather cryptic, what we want to
128 self._data = data
129 self._filename = filename
146 self._params = params 130 self._params = params
147 self._key = key
148 self._attribute_parser = AttributeParser(self.whitelist) 131 self._attribute_parser = AttributeParser(self.whitelist)
149 self._seen_defaults = {} 132 self._seen_defaults = {}
150 133
151 # Read in any parameters specified at the beginning of the file
152 # and override converter defaults with page specific params
153 data, filename = params[key]
Vasily Kuznetsov 2017/10/27 18:35:01 This is a side effect that is not related to conve
154 page_data, body_text = parse_page_content(params['page'], data)
155 params.update(page_data)
156 params[key] = (body_text, filename)
157
158 def localize_string( 134 def localize_string(
159 self, page, name, default, comment, localedata, escapes): 135 self, page, name, default, comment, localedata, escapes):
160 136
161 def escape(s): 137 def escape(s):
162 return re.sub(r'.', 138 return re.sub(r'.',
163 lambda match: escapes.get(match.group(0), 139 lambda match: escapes.get(match.group(0),
164 match.group(0)), 140 match.group(0)),
165 s, flags=re.S) 141 s, flags=re.S)
166 142
167 def re_escape(s): 143 def re_escape(s):
(...skipping 120 matching lines...) Expand 10 before | Expand all | Expand 10 after
288 return text 264 return text
289 265
290 include_start_regex = '<' 266 include_start_regex = '<'
291 include_end_regex = '>' 267 include_end_regex = '>'
292 268
293 def resolve_includes(self, text): 269 def resolve_includes(self, text):
294 def resolve_include(match): 270 def resolve_include(match):
295 name = match.group(1) 271 name = match.group(1)
296 for format_, converter_class in converters.iteritems(): 272 for format_, converter_class in converters.iteritems():
297 if self._params['source'].has_include(name, format_): 273 if self._params['source'].has_include(name, format_):
298 self._params['includedata'] = ( 274 data, filename = (
Vasily Kuznetsov 2017/10/27 18:35:01 This key is not used anywhere, so we don't need to
299 self._params['source'].read_include(name, format_)) 275 self._params['source'].read_include(name, format_))
300 276
301 converter = converter_class(self._params, 277 # XXX: allowing includes to modify params of the whole page
302 key='includedata') 278 # seems like a bad idea but we have to support this because
279 # it's used by www.adblockplus.org.
280 metadata, rest = utils.extract_page_metadata(data)
Vasily Kuznetsov 2017/10/27 18:35:01 We have to maintain the ability of includes to wri
mathias 2017/10/30 15:37:00 Acknowledged.
281 self._params.update(metadata)
282
283 converter = converter_class(rest, filename, self._params)
303 result = converter() 284 result = converter()
304 self.missing_translations += converter.missing_translations 285 self.missing_translations += converter.missing_translations
305 self.total_translations += converter.total_translations 286 self.total_translations += converter.total_translations
306 return result 287 return result
307 raise Exception('Failed to resolve include {}' 288 raise Exception('Failed to resolve include {}'
308 ' on page {}'.format(name, self._params['page'])) 289 ' on page {}'.format(name, self._params['page']))
309 290
310 return re.sub( 291 return re.sub(
311 r'{}\?\s*include\s+([^\s<>"]+)\s*\?{}'.format( 292 r'{}\?\s*include\s+([^\s<>"]+)\s*\?{}'.format(
312 self.include_start_regex, 293 self.include_start_regex,
313 self.include_end_regex 294 self.include_end_regex
314 ), 295 ),
315 resolve_include, 296 resolve_include,
316 text 297 text
317 ) 298 )
318 299
319 def __call__(self): 300 def __call__(self):
320 result = self.get_html(*self._params[self._key]) 301 result = self.get_html(self._data, self._filename)
321 result = self.resolve_includes(result) 302 return self.resolve_includes(result)
322 if self._key == 'pagedata':
Vasily Kuznetsov 2017/10/27 18:35:01 The return types of the two branches of if are dif
323 head = []
324
325 def add_to_head(match):
326 head.append(match.group(1))
327 return ''
328 body = re.sub(r'<head>(.*?)</head>', add_to_head, result,
329 flags=re.S)
330 return ''.join(head), body
331 return result
332 303
333 304
334 class RawConverter(Converter): 305 class RawConverter(Converter):
335 def get_html(self, source, filename): 306 def get_html(self, source, filename):
336 result = self.insert_localized_strings(source, html_escapes) 307 result = self.insert_localized_strings(source, html_escapes)
337 result = self.process_links(result) 308 result = self.process_links(result)
338 return result 309 return result
339 310
340 311
341 class MarkdownConverter(Converter): 312 class MarkdownConverter(Converter):
(...skipping 123 matching lines...) Expand 10 before | Expand all | Expand 10 after
465 )) 436 ))
466 437
467 def has_string(self, name, page=None): 438 def has_string(self, name, page=None):
468 if page is None: 439 if page is None:
469 page = self._params['page'] 440 page = self._params['page']
470 441
471 localedata = self._get_locale_data(page) 442 localedata = self._get_locale_data(page)
472 return name in localedata 443 return name in localedata
473 444
474 def get_page_content(self, page, locale=None): 445 def get_page_content(self, page, locale=None):
475 from cms.utils import get_page_params
476
477 if locale is None: 446 if locale is None:
478 locale = self._params['locale'] 447 locale = self._params['locale']
479 return get_page_params(self._params['source'], locale, page) 448 return utils.get_page_params(self._params['source'], locale, page)
480 449
481 def linkify(self, page, locale=None, **attrs): 450 def linkify(self, page, locale=None, **attrs):
482 if locale is None: 451 if locale is None:
483 locale = self._params['locale'] 452 locale = self._params['locale']
484 453
485 locale, url = self._params['source'].resolve_link(page, locale) 454 locale, url = self._params['source'].resolve_link(page, locale)
486 return jinja2.Markup('<a{}>'.format(''.join( 455 return jinja2.Markup('<a{}>'.format(''.join(
487 ' {}="{}"'.format(name, jinja2.escape(value)) for name, value in [ 456 ' {}="{}"'.format(name, jinja2.escape(value)) for name, value in [
488 ('href', url), 457 ('href', url),
489 ('hreflang', locale) 458 ('hreflang', locale)
490 ] + attrs.items() 459 ] + attrs.items()
491 ))) 460 )))
492 461
493 def get_pages_metadata(self, filters=None): 462 def get_pages_metadata(self, filters=None):
494 if filters is not None and not isinstance(filters, dict): 463 if filters is not None and not isinstance(filters, dict):
495 raise TypeError('Filters are not a dictionary') 464 raise TypeError('Filters are not a dictionary')
496 465
497 return_data = [] 466 return_data = []
498 for page_name, _format in self._params['source'].list_pages(): 467 for page_name, _format in self._params['source'].list_pages():
499 data, filename = self._params['source'].read_page(page_name, 468 data, filename = self._params['source'].read_page(page_name,
500 _format) 469 _format)
501 page_data = parse_page_content(page_name, data)[0] 470 page_data = utils.extract_page_metadata(data)[0]
471 page_data['page'] = page_name
mathias 2017/10/30 15:37:00 Shouldn't this use setdefault(), in order to allow
Vasily Kuznetsov 2017/11/07 17:08:29 Yeah, you're right, this would be needed to preser
502 if self.filter_metadata(filters, page_data) is True: 472 if self.filter_metadata(filters, page_data) is True:
503 return_data.append(page_data) 473 return_data.append(page_data)
504 return return_data 474 return return_data
505 475
506 def filter_metadata(self, filters, metadata): 476 def filter_metadata(self, filters, metadata):
507 # if only the page key is in the metadata then there 477 # if only the page key is in the metadata then there
508 # was no user defined metadata 478 # was no user defined metadata
509 if metadata.keys() == ['page']: 479 if metadata.keys() == ['page']:
510 return False 480 return False
511 if filters is None: 481 if filters is None:
(...skipping 46 matching lines...) Expand 10 before | Expand all | Expand 10 after
558 stack[-1]['subitems'].append(item) 528 stack[-1]['subitems'].append(item)
559 stack.append(item) 529 stack.append(item)
560 return structured 530 return structured
561 531
562 532
563 converters = { 533 converters = {
564 'html': RawConverter, 534 'html': RawConverter,
565 'md': MarkdownConverter, 535 'md': MarkdownConverter,
566 'tmpl': TemplateConverter, 536 'tmpl': TemplateConverter,
567 } 537 }
OLDNEW
« no previous file with comments | « no previous file | cms/utils.py » ('j') | cms/utils.py » ('J')

Powered by Google App Engine
This is Rietveld