| Index: cms/converters.py | 
| =================================================================== | 
| --- a/cms/converters.py | 
| +++ b/cms/converters.py | 
| @@ -10,17 +10,24 @@ | 
| # Adblock Plus is distributed in the hope that it will be useful, | 
| # but WITHOUT ANY WARRANTY; without even the implied warranty of | 
| # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | 
| # GNU General Public License for more details. | 
| # | 
| # You should have received a copy of the GNU General Public License | 
| # along with Adblock Plus.  If not, see <http://www.gnu.org/licenses/>. | 
|  | 
| -import os, imp, re, jinja2, markdown | 
| +import os | 
| +import HTMLParser | 
| +import imp | 
| +import re | 
| + | 
| +import jinja2 | 
| +import markdown | 
| + | 
|  | 
| # Monkey-patch Markdown's isBlockLevel function to ensure that no paragraphs are | 
| # inserted into the <head> tag | 
| orig_isBlockLevel = markdown.util.isBlockLevel | 
| def isBlockLevel(tag): | 
| if tag == "head": | 
| return True | 
| else: | 
| @@ -30,76 +37,121 @@ markdown.util.isBlockLevel = isBlockLeve | 
| html_escapes = { | 
| "<": "<", | 
| ">": ">", | 
| "&": "&", | 
| "\"": """, | 
| "'": "'", | 
| } | 
|  | 
| +class AttributeParser(HTMLParser.HTMLParser): | 
| +  _string = None | 
| +  _attrs = None | 
| + | 
| +  def __init__(self, whitelist): | 
| +    self._whitelist = whitelist | 
| + | 
| +  def parse(self, text, pagename): | 
| +    self.reset() | 
| +    self._string = [] | 
| +    self._attrs = {} | 
| +    self._pagename = pagename | 
| + | 
| +    try: | 
| +      self.feed(text) | 
| +      return "".join(self._string), self._attrs | 
| +    finally: | 
| +      self._string = None | 
| +      self._attrs = None | 
| +      self._pagename = None | 
| + | 
| +  def handle_starttag(self, tag, attrs): | 
| +    if tag not in self._whitelist: | 
| +      raise Exception("Unexpected HTML tag '%s' in localizable string on page %s" % (tag, self._pagename)) | 
| +    self._attrs.setdefault(tag, []).append(attrs) | 
| +    self._string.append("<%s>" % tag) | 
| + | 
| +  def handle_endtag(self, tag): | 
| +    self._string.append("</%s>" % tag) | 
| + | 
| +  def handle_data(self, data): | 
| +    # Note: lack of escaping here is intentional. The result is a locale string, | 
| +    # HTML escaping is applied when this string is inserted into the document. | 
| +    self._string.append(data) | 
| + | 
| +  def handle_entityref(self, name): | 
| +    self._string.append(self.unescape("&%s;" % name)) | 
| + | 
| +  def handle_charref(self, name): | 
| +    self._string.append(self.unescape("&#%s;" % name)) | 
| + | 
| class Converter: | 
| +  whitelist = set(["a", "em", "strong"]) | 
| + | 
| def __init__(self, params, key="pagedata"): | 
| self._params = params | 
| self._key = key | 
| +    self._attribute_parser = AttributeParser(self.whitelist) | 
|  | 
| # Read in any parameters specified at the beginning of the file | 
| lines = params[key].splitlines(True) | 
| while lines and re.search(r"^\s*[\w\-]+\s*=", lines[0]): | 
| name, value = lines.pop(0).split("=", 1) | 
| params[name.strip()] = value.strip() | 
| params[key] = "".join(lines) | 
|  | 
| -  def localize_string(self, name, localedata, escapes, links=[]): | 
| +  def localize_string(self, name, default, localedata, escapes): | 
| def escape(s): | 
| return re.sub(r".", | 
| lambda match: escapes.get(match.group(0), match.group(0)), | 
| s, flags=re.S) | 
| def re_escape(s): | 
| return re.escape(escape(s)) | 
|  | 
| -    try: | 
| +    # Extract tag attributes from default string | 
| +    default, saved_attributes = self._attribute_parser.parse(default, self._params["page"]) | 
| + | 
| +    # Get translation | 
| +    if self._params["locale"] != self._params["defaultlocale"] and name in localedata: | 
| result = localedata[name].strip() | 
| -    except KeyError: | 
| -      raise Exception("Lookup failed for string %s used on page %s" % (name, self._params["page"])) | 
| +    else: | 
| +      result = default | 
|  | 
| -    # Insert links | 
| +    # Insert attributes | 
| result = escape(result) | 
| -    while links: | 
| +    for tag in self.whitelist: | 
| +      saved = saved_attributes.get(tag, []) | 
| +      for attrs in saved: | 
| +        attrs = map(lambda (name, value): '%s="%s"' % (escape(name), escape(value)), attrs) | 
| +        result = re.sub( | 
| +          r"%s([^<>]*?)%s" % (re_escape("<%s>" % tag), re_escape("</%s>" % tag)), | 
| +          r'<%s %s>\1</%s>' % (tag, " ".join(attrs), tag), | 
| +          result, 1, flags=re.S | 
| +        ) | 
| result = re.sub( | 
| -        r"%s([^<>]*?)%s" % (re_escape("<a>"), re_escape("</a>")), | 
| -        r'<a href="%s">\1</a>' % links.pop(0), | 
| -        result, 1, flags=re.S | 
| +        r"%s([^<>]*?)%s" % (re_escape("<%s>" % tag), re_escape("</%s>" % tag)), | 
| +        r"<%s>\1</%s>" % (tag, tag), | 
| +        result, flags=re.S | 
| ) | 
| - | 
| -    # <strong> and <em> tags are allowed | 
| -    result = re.sub( | 
| -      r"%s([^<>]*?)%s" % (re_escape("<strong>"), re_escape("</strong>")), | 
| -      r"<strong>\1</strong>", | 
| -      result, flags=re.S | 
| -    ) | 
| -    result = re.sub( | 
| -      r"%s([^<>]*?)%s" % (re_escape("<em>"), re_escape("</em>")), | 
| -      r"<em>\1</em>", | 
| -      result, flags=re.S | 
| -    ) | 
| return result | 
|  | 
| -  def insert_localized_strings(self, text, escapes): | 
| +  def insert_localized_strings(self, text, escapes, to_html=lambda s: s): | 
| def lookup_string(match): | 
| -      name, links = match.groups() | 
| -      if links: | 
| -        links = map(unicode.strip, links.strip("()").split(",")) | 
| -      else: | 
| -        links = [] | 
| -      return self.localize_string(name, self._params["localedata"], escapes, links) | 
| +      name, comment, default = match.groups() | 
| +      default = to_html(default).strip() | 
| + | 
| +      # Note: We currently ignore the comment, it is only relevant when | 
| +      # generating the master translation. | 
| +      return self.localize_string(name, default, self._params["localedata"], escapes) | 
|  | 
| return re.sub( | 
| -      r"\$([\w\-]+)(\([^()$]+\))?\$", | 
| +      r"\{\{\s*([\w\-]+)(?:\[(.*?)\])?\s+(.*?)\}\}", | 
| lookup_string, | 
| -      text | 
| +      text, | 
| +      flags=re.S | 
| ) | 
|  | 
| def process_links(self, text): | 
| def process_link(match): | 
| pre, attr, url, post = match.groups() | 
| url = jinja2.Markup(url).unescape() | 
|  | 
| locale, new_url = self._params["source"].resolve_link(url, self._params["locale"]) | 
| @@ -121,17 +173,17 @@ class Converter: | 
| def resolve_include(match): | 
| global converters | 
| name = match.group(1) | 
| for format, converter_class in converters.iteritems(): | 
| if self._params["source"].has_include(name, format): | 
| self._params["includedata"] = self._params["source"].read_include(name, format) | 
| converter = converter_class(self._params, key="includedata") | 
| return converter() | 
| -      raise Exception("Failed to resolve include %s in page %s" % (name, self._params["page"])) | 
| +      raise Exception("Failed to resolve include %s on page %s" % (name, self._params["page"])) | 
|  | 
| return re.sub( | 
| r'%s\?\s*include\s+([^\s<>"]+)\s*\?%s' % ( | 
| self.include_start_regex, | 
| self.include_end_regex | 
| ), | 
| resolve_include, | 
| text | 
| @@ -178,17 +230,20 @@ class MarkdownConverter(Converter): | 
| for char in markdown.Markdown.ESCAPED_CHARS: | 
| escapes[char] = "&#" + str(ord(char)) + ";" | 
| for key, value in html_escapes.iteritems(): | 
| escapes[key] = value | 
|  | 
| md = markdown.Markdown(output="html5", extensions=["attr_list"]) | 
| md.preprocessors["html_block"].markdown_in_raw = True | 
|  | 
| -    result = self.insert_localized_strings(source, escapes) | 
| +    def to_html(s): | 
| +      return re.sub(r'</?p>', '', md.convert(s)) | 
| + | 
| +    result = self.insert_localized_strings(source, escapes, to_html) | 
| result = md.convert(result) | 
| result = re.sub(r"&#(\d+);", remove_unnecessary_entities, result) | 
| result = self.process_links(result) | 
| return result | 
|  | 
| class TemplateConverter(Converter): | 
| class _SourceLoader(jinja2.BaseLoader): | 
| def __init__(self, source): | 
| @@ -204,16 +259,20 @@ class TemplateConverter(Converter): | 
| Converter.__init__(self, *args, **kwargs) | 
|  | 
| filters = { | 
| "translate": self.translate, | 
| "linkify": self.linkify, | 
| "toclist": self.toclist, | 
| } | 
|  | 
| +    globals = { | 
| +      "get_string": self.get_string, | 
| +    } | 
| + | 
| for filename in self._params["source"].list_files("filters"): | 
| root, ext = os.path.splitext(filename) | 
| if ext.lower() != ".py": | 
| continue | 
|  | 
| path = "%s/%s" % ("filters", filename) | 
| code = self._params["source"].read_file(path) | 
| module = imp.new_module(root.replace("/", ".")) | 
| @@ -222,30 +281,35 @@ class TemplateConverter(Converter): | 
| func = os.path.basename(root) | 
| if not hasattr(module, func): | 
| raise Exception("Expected function %s not found in filter file %s" % (func, filename)) | 
| filters[func] = getattr(module, func) | 
| filters[func].module_ref = module  # Prevent garbage collection | 
|  | 
| self._env = jinja2.Environment(loader=self._SourceLoader(self._params["source"]), autoescape=True) | 
| self._env.filters.update(filters) | 
| +    self._env.globals.update(globals) | 
|  | 
| def get_html(self, source): | 
| template = self._env.from_string(source) | 
| return template.render(self._params) | 
|  | 
| -  def translate(self, name, page=None, links=[]): | 
| -    if page == None: | 
| -      localedata = self._params["localedata"] | 
| -    else: | 
| -      localedata = self._params["source"].read_locale(self._params["locale"], page) | 
| -    return jinja2.Markup(self.localize_string(name, localedata, html_escapes, links=links)) | 
| +  def translate(self, default, name, comment=None): | 
| +    # Note: We currently ignore the comment, it is only relevant when | 
| +    # generating the master translation. | 
| +    localedata = self._params["localedata"] | 
| +    return jinja2.Markup(self.localize_string(name, default, localedata, html_escapes)) | 
| + | 
| +  def get_string(self, name, page): | 
| +    localedata = self._params["source"].read_locale(self._params["locale"], page) | 
| +    default = localedata[name] | 
| +    return jinja2.Markup(self.localize_string(name, default, localedata, html_escapes)) | 
|  | 
| def linkify(self, page, locale=None, **attrs): | 
| -    if locale == None: | 
| +    if locale is None: | 
| locale = self._params["locale"] | 
|  | 
| locale, url = self._params["source"].resolve_link(page, locale) | 
| return jinja2.Markup('<a%s>' % ''.join( | 
| ' %s="%s"' % (name, jinja2.escape(value)) for name, value in [ | 
| ('href', url), | 
| ('hreflang', locale) | 
| ] + attrs.items() | 
|  |