| Left: | ||
| Right: |
| OLD | NEW |
|---|---|
| 1 # This file is part of the Adblock Plus web scripts, | 1 # This file is part of the Adblock Plus web scripts, |
| 2 # Copyright (C) 2006-present eyeo GmbH | 2 # Copyright (C) 2006-present eyeo GmbH |
| 3 # | 3 # |
| 4 # Adblock Plus is free software: you can redistribute it and/or modify | 4 # Adblock Plus is free software: you can redistribute it and/or modify |
| 5 # it under the terms of the GNU General Public License version 3 as | 5 # it under the terms of the GNU General Public License version 3 as |
| 6 # published by the Free Software Foundation. | 6 # published by the Free Software Foundation. |
| 7 # | 7 # |
| 8 # Adblock Plus is distributed in the hope that it will be useful, | 8 # Adblock Plus is distributed in the hope that it will be useful, |
| 9 # but WITHOUT ANY WARRANTY; without even the implied warranty of | 9 # but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | 10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 11 # GNU General Public License for more details. | 11 # GNU General Public License for more details. |
| 12 # | 12 # |
| 13 # You should have received a copy of the GNU General Public License | 13 # You should have received a copy of the GNU General Public License |
| 14 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. | 14 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. |
| 15 | 15 |
| 16 from __future__ import unicode_literals | 16 from __future__ import unicode_literals |
| 17 | 17 |
| 18 import os | 18 import os |
| 19 import HTMLParser | 19 import HTMLParser |
| 20 import re | 20 import re |
| 21 import urlparse | 21 import urlparse |
| 22 import json | |
| 22 | 23 |
| 23 import jinja2 | 24 import jinja2 |
| 24 import markdown | 25 import markdown |
| 25 | 26 |
| 26 | 27 |
| 27 # Monkey-patch Markdown's isBlockLevel function to ensure that no paragraphs | 28 # Monkey-patch Markdown's isBlockLevel function to ensure that no paragraphs |
| 28 # are inserted into the <head> tag | 29 # are inserted into the <head> tag |
| 29 orig_isBlockLevel = markdown.util.isBlockLevel | 30 orig_isBlockLevel = markdown.util.isBlockLevel |
| 30 | 31 |
| 31 | 32 |
| 32 def isBlockLevel(tag): | 33 def isBlockLevel(tag): |
| 33 if tag == 'head': | 34 if tag == 'head': |
| 34 return True | 35 return True |
| 35 return orig_isBlockLevel(tag) | 36 return orig_isBlockLevel(tag) |
| 36 | 37 |
| 38 | |
|
Sebastian Noack
2017/08/29 22:49:39
Adding this blank line is unrelated.
rosie
2018/03/26 02:32:21
True. My linter was showing a warning because ther
Sebastian Noack
2018/03/26 02:57:47
Strictly, there should be two blank lines surround
| |
| 37 markdown.util.isBlockLevel = isBlockLevel | 39 markdown.util.isBlockLevel = isBlockLevel |
| 38 | 40 |
| 39 html_escapes = { | 41 html_escapes = { |
| 40 '<': '<', | 42 '<': '<', |
| 41 '>': '>', | 43 '>': '>', |
| 42 '&': '&', | 44 '&': '&', |
| 43 '"': '"', | 45 '"': '"', |
| 44 "'": ''', | 46 "'": ''', |
| 45 } | 47 } |
| 46 | 48 |
| (...skipping 63 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 110 # the document. | 112 # the document. |
| 111 self._append_text(data) | 113 self._append_text(data) |
| 112 | 114 |
| 113 def handle_entityref(self, name): | 115 def handle_entityref(self, name): |
| 114 self._append_text(self.unescape('&{};'.format(name))) | 116 self._append_text(self.unescape('&{};'.format(name))) |
| 115 | 117 |
| 116 def handle_charref(self, name): | 118 def handle_charref(self, name): |
| 117 self._append_text(self.unescape('&#{};'.format(name))) | 119 self._append_text(self.unescape('&#{};'.format(name))) |
| 118 | 120 |
| 119 | 121 |
| 122 def parse_metadata(page, data): | |
| 123 metadata = {'page': page} | |
| 124 try: | |
| 125 decoder = json.JSONDecoder() | |
| 126 json_data, index = decoder.raw_decode(data) | |
| 127 metadata.update(json_data) | |
| 128 page_data = data[index:].strip() | |
|
Sebastian Noack
2017/08/29 22:49:39
Note that when parsing the legacy non-JSON format,
Vasily Kuznetsov
2017/08/30 10:50:00
This consideration is rather non-obvious, for exam
rosie
2018/03/26 02:32:21
(This function was moved to utils.py.) Now, the li
| |
| 129 except ValueError: | |
|
Sebastian Noack
2017/08/29 22:49:39
In case the data can be interpreted as JSON, but r
rosie
2018/03/26 02:32:21
The check to make sure a dict is returned now happ
| |
| 130 lines = data.splitlines(True) | |
| 131 for i, line in enumerate(lines): | |
| 132 if not re.search(r'^\s*[\w\-]+\s*=', line): | |
|
Sebastian Noack
2017/08/29 22:49:39
There is some redundancy in the regular expression
rosie
2018/03/26 02:32:20
Done.
| |
| 133 break | |
| 134 name, value = line.split('=', 1) | |
| 135 value = value.strip() | |
| 136 if value.startswith('[') and value.endswith(']'): | |
| 137 value = [element.strip() for element in value[1:-1].split(',')] | |
| 138 lines[i] = '' | |
| 139 metadata[name.strip()] = value | |
| 140 page_data = ''.join(lines) | |
| 141 return metadata, page_data | |
| 142 | |
| 143 | |
| 120 def parse_page_content(page, data): | 144 def parse_page_content(page, data): |
| 121 """Separate page content into metadata (dict) and body text (str)""" | 145 """Separate page content into metadata (dict) and body text (str)""" |
| 122 page_data = {'page': page} | 146 # If metadata is in a comment block, extract it |
| 123 lines = data.splitlines(True) | 147 comment_start = '<!--' |
|
Sebastian Noack
2017/08/29 22:49:39
It seems much simpler to use a regular expression
Vasily Kuznetsov
2017/08/30 10:50:00
I think it was my advice to not use regexps here b
rosie
2018/03/26 02:32:20
Done.
| |
| 124 for i, line in enumerate(lines): | 148 comment_end = '-->' |
| 125 if line.strip() in {'<!--', '-->'}: | 149 if data.lstrip().startswith(comment_start): |
| 126 lines[i] = '' | 150 start_index = data.index(comment_start) + len(comment_start) |
| 127 continue | 151 end_index = data.index(comment_end) |
| 128 if not re.search(r'^\s*[\w\-]+\s*=', line): | 152 comment = data[start_index:end_index] |
| 129 break | 153 page_tail = data[end_index + len(comment_end):] |
| 130 name, value = line.split('=', 1) | 154 metadata, comment_data = parse_metadata(page, comment.strip()) |
|
Sebastian Noack
2017/08/29 22:49:39
Does stripping the comment have any effect here? I
Vasily Kuznetsov
2017/08/30 10:50:01
JSON parsing fails on leading space, but if we wan
rosie
2018/03/26 02:32:21
Done.
| |
| 131 value = value.strip() | 155 page_data = '{}\n{}\n{}\n\n{}'.format(comment_start, comment_data, |
| 132 if value.startswith('[') and value.endswith(']'): | 156 comment_end, page_tail.strip()) |
| 133 value = [element.strip() for element in value[1:-1].split(',')] | 157 else: |
| 134 lines[i] = '\n' | 158 metadata, page_data = parse_metadata(page, data.strip()) |
| 135 page_data[name.strip()] = value | 159 return metadata, page_data |
| 136 return page_data, ''.join(lines) | |
| 137 | 160 |
| 138 | 161 |
| 139 class Converter: | 162 class Converter: |
| 140 whitelist = {'a', 'em', 'sup', 'strong', 'code', 'span'} | 163 whitelist = {'a', 'em', 'sup', 'strong', 'code', 'span'} |
| 141 missing_translations = 0 | 164 missing_translations = 0 |
| 142 total_translations = 0 | 165 total_translations = 0 |
| 143 | 166 |
| 144 def __init__(self, params, key='pagedata'): | 167 def __init__(self, params, key='pagedata'): |
| 145 self._params = params | 168 self._params = params |
| 146 self._key = key | 169 self._key = key |
| (...skipping 409 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 556 stack.pop() | 579 stack.pop() |
| 557 stack[-1]['subitems'].append(item) | 580 stack[-1]['subitems'].append(item) |
| 558 stack.append(item) | 581 stack.append(item) |
| 559 return structured | 582 return structured |
| 560 | 583 |
| 561 converters = { | 584 converters = { |
| 562 'html': RawConverter, | 585 'html': RawConverter, |
| 563 'md': MarkdownConverter, | 586 'md': MarkdownConverter, |
| 564 'tmpl': TemplateConverter, | 587 'tmpl': TemplateConverter, |
| 565 } | 588 } |
| OLD | NEW |