| Index: compiled/RegExpFilter.cpp | 
| =================================================================== | 
| new file mode 100644 | 
| --- /dev/null | 
| +++ b/compiled/RegExpFilter.cpp | 
| @@ -0,0 +1,301 @@ | 
| +#include <climits> | 
| +#include <unordered_map> | 
| + | 
| +#include <emscripten.h> | 
| + | 
| +#include "RegExpFilter.h" | 
| +#include "WhiteListFilter.h" | 
| +#include "InvalidFilter.h" | 
| +#include "StringScanner.h" | 
| + | 
| +namespace | 
| +{ | 
| +  enum | 
| +  { | 
| +    TYPE_OTHER = 0x1, | 
| +    TYPE_SCRIPT = 0x2, | 
| +    TYPE_IMAGE = 0x4, | 
| +    TYPE_STYLESHEET = 0x8, | 
| +    TYPE_OBJECT = 0x10, | 
| +    TYPE_SUBDOCUMENT = 0x20, | 
| +    TYPE_DOCUMENT = 0x40, | 
| +    TYPE_PING = 0x400, | 
| +    TYPE_XMLHTTPREQUEST = 0x800, | 
| +    TYPE_OBJECT_SUBREQUEST = 0x1000, | 
| +    TYPE_MEDIA = 0x4000, | 
| +    TYPE_FONT = 0x8000, | 
| +    TYPE_POPUP = 0x8000000, | 
| +    TYPE_GENERICBLOCK = 0x10000000, | 
| +    TYPE_GENERICHIDE = 0x20000000, | 
| +    TYPE_ELEMHIDE = 0x40000000, | 
| +  }; | 
| + | 
| +  std::unordered_map<std::u16string,int> typeMap({ | 
| +    {u"OTHER", TYPE_OTHER}, | 
| +    {u"SCRIPT", TYPE_SCRIPT}, | 
| +    {u"IMAGE", TYPE_IMAGE}, | 
| +    {u"STYLESHEET", TYPE_STYLESHEET}, | 
| +    {u"OBJECT", TYPE_OBJECT}, | 
| +    {u"SUBDOCUMENT", TYPE_SUBDOCUMENT}, | 
| +    {u"DOCUMENT", TYPE_DOCUMENT}, | 
| +    {u"XBL", TYPE_OTHER},          // Backwards compat | 
| +    {u"PING", TYPE_PING}, | 
| +    {u"XMLHTTPREQUEST", TYPE_XMLHTTPREQUEST}, | 
| +    {u"OBJECT_SUBREQUEST", TYPE_OBJECT_SUBREQUEST}, | 
| +    {u"DTD", TYPE_OTHER},          // Backwards compat | 
| +    {u"MEDIA", TYPE_MEDIA}, | 
| +    {u"FONT", TYPE_FONT}, | 
| +    {u"BACKGROUND", TYPE_IMAGE},   // Backwards compat | 
| + | 
| +    {u"POPUP", TYPE_POPUP}, | 
| +    {u"GENERICBLOCK", TYPE_GENERICBLOCK}, | 
| +    {u"GENERICHIDE", TYPE_GENERICHIDE}, | 
| +    {u"ELEMHIDE", TYPE_ELEMHIDE}, | 
| +  }); | 
| + | 
| +  int defaultTypeMask = INT_MAX & ~(TYPE_DOCUMENT | TYPE_ELEMHIDE | TYPE_POPUP | | 
| +      TYPE_GENERICBLOCK | TYPE_GENERICHIDE); | 
| + | 
| +  int GenerateRegExp(const std::u16string& source, bool matchCase) | 
| +  { | 
| +    // Note: This doesn't remove trailing wildcards, otherwise the result should | 
| +    // be identical to Filter.toRegExp(). | 
| +    std::u16string result; | 
| +    char16_t prevChar = u'*'; | 
| +    for (size_t i = 0, l = source.length(); i < l; ++i) | 
| +    { | 
| +      char16_t currChar = source[i]; | 
| +      switch (currChar) | 
| +      { | 
| +        case u'*': | 
| +          if (prevChar != u'*') | 
| +            result += u".*"; | 
| +          break; | 
| +        case u'^': | 
| +          result += u"(?:[\\x00-\\x24\\x26-\\x2C\\x2F\\x3A-\\x40\\x5B-\\x5E\\x60\\x7B-\\x7F]|$)"; | 
| +          break; | 
| +        case u'|': | 
| +          if (i == 0) | 
| +          { | 
| +            // Anchor at expression start, maybe extended anchor? | 
| +            if (i + 1 < l && source[i + 1] == u'|') | 
| +            { | 
| +              result += u"^[\\w\\-]+:\\/+(?!\\/)(?:[^\\/]+\\.)?"; | 
| +              ++i; | 
| +            } | 
| +            else | 
| +              result += u"^"; | 
| +          } | 
| +          else if (i == l - 1) | 
| +          { | 
| +            // Anchor at expression end, ignore if following separator placeholder | 
| +            if (prevChar != u'^') | 
| +              result += u"$"; | 
| +          } | 
| +          else | 
| +          { | 
| +            // Not actually an anchor, escape it | 
| +            result += u"\\|"; | 
| +          } | 
| +          break; | 
| +        default: | 
| +          if ((currChar >= u'a' && currChar <= u'z') || | 
| +              (currChar >= u'A' && currChar <= u'Z') || | 
| +              (currChar >= u'0' && currChar <= u'9') || | 
| +              currChar >= 128) | 
| +          { | 
| +            result += currChar; | 
| +          } | 
| +          else | 
| +          { | 
| +            result += u"\\"; | 
| +            result.append(1, currChar); | 
| +          } | 
| +      } | 
| +      prevChar = currChar; | 
| +    } | 
| +    return EM_ASM_INT(return regexps.create($0, $1), &result, matchCase); | 
| +  } | 
| +} | 
| + | 
| +RegExpFilter::RegExpFilter(const std::u16string& text, | 
| +    const std::u16string& pattern, const std::u16string& options) | 
| +    : ActiveFilter(text, true), regexpId(0), contentType(-1), matchCase(false), | 
| +      thirdParty(TrippleState::ANY) | 
| +{ | 
| +  int optionStart = 0; | 
| +  int optionEnd = -1; | 
| +  int valueStart = -1; | 
| +  StringScanner scanner(options + u","); | 
| +  while (!scanner.done()) | 
| +  { | 
| +    switch (scanner.next()) | 
| +    { | 
| +      case u'=': | 
| +        if (optionEnd < 0) | 
| +        { | 
| +          optionEnd = scanner.position(); | 
| +          valueStart = optionEnd + 1; | 
| +        } | 
| +        break; | 
| +      case u',': | 
| +        if (optionEnd < 0) | 
| +          optionEnd = scanner.position(); | 
| +        ProcessOption(options, optionStart, optionEnd, valueStart, scanner.position()); | 
| +        optionStart = scanner.position() + 1; | 
| +        optionEnd = -1; | 
| +        valueStart = -1; | 
| +        break; | 
| +    } | 
| +  } | 
| +  if (contentType < 0) | 
| +    contentType = defaultTypeMask; | 
| + | 
| +  size_t len = pattern.length(); | 
| +  if (len >= 2 && pattern[0] == u'/' && pattern[len - 1] == u'/') | 
| +  { | 
| +    std::u16string param = pattern.substr(1, len - 2); | 
| +    regexpId = EM_ASM_INT(return regexps.create($0, $1), ¶m, matchCase); | 
| + | 
| +    std::u16string* error = reinterpret_cast<std::u16string*>(EM_ASM_INT(return regexps.getError($0), regexpId)); | 
| +    if (error) | 
| +    { | 
| +      EM_ASM_ARGS(regexps.delete($0), regexpId); | 
| +      throw std::u16string(*error); | 
| +    } | 
| +  } | 
| +  else | 
| +    regexpSource = pattern; | 
| +} | 
| + | 
| +RegExpFilter::~RegExpFilter() | 
| +{ | 
| +  if (regexpId) | 
| +    EM_ASM_ARGS(regexps.delete($0), regexpId); | 
| +} | 
| + | 
| +void RegExpFilter::ProcessOption(const std::u16string& options, | 
| +    int optionStart, int optionEnd, int valueStart, int valueEnd) | 
| +{ | 
| +  if (optionEnd <= optionStart) | 
| +    return; | 
| + | 
| +  bool reverse = false; | 
| +  if (options[optionStart] == u'~') | 
| +  { | 
| +    reverse = true; | 
| +    optionStart++; | 
| +  } | 
| + | 
| +  std::u16string name(options.substr(optionStart, optionEnd - optionStart)); | 
| +  for (size_t i = 0, l = name.length(); i < l; ++i) | 
| +  { | 
| +    char16_t currChar = name[i]; | 
| +    if (currChar >= u'a' && currChar <= u'z') | 
| +      name[i] = currChar + u'A' - u'a'; | 
| +    else if (currChar == u'-') | 
| +      name[i] = u'_'; | 
| +  } | 
| + | 
| +  auto it = typeMap.find(name); | 
| +  if (it != typeMap.end()) | 
| +  { | 
| +    if (contentType < 0) | 
| +      contentType = reverse ? defaultTypeMask : 0; | 
| +    if (reverse) | 
| +      contentType &= ~it->second; | 
| +    else | 
| +      contentType |= it->second; | 
| +  } | 
| +  else if (!name.compare(u"DOMAIN")) | 
| +  { | 
| +    if (valueStart >= 0 && valueEnd > valueStart) | 
| +      ParseDomains(options.substr(valueStart, valueEnd - valueStart), u'|'); | 
| +  } | 
| +  else if (!name.compare(u"SITEKEY")) | 
| +  { | 
| +    if (valueStart >= 0 && valueEnd > valueStart) | 
| +    { | 
| +      StringScanner scanner(options.substr(valueStart, valueEnd - valueStart) + u"|"); | 
| +      size_t start = 0; | 
| +      while (!scanner.done()) | 
| +      { | 
| +        if (scanner.next() == u'|') | 
| +        { | 
| +          if (scanner.position() > start) | 
| +            sitekeys.insert(options.substr(valueStart + start, scanner.position() - start)); | 
| +          start = scanner.position() + 1; | 
| +        } | 
| +      } | 
| +    } | 
| +  } | 
| +  else if (!name.compare(u"MATCH_CASE")) | 
| +    matchCase = !reverse; | 
| +  else if (!name.compare(u"THIRD_PARTY")) | 
| +    thirdParty = reverse ? TrippleState::NO : TrippleState::YES; | 
| +  else if (!name.compare(u"COLLAPSE")) | 
| +    collapse = reverse ? TrippleState::NO : TrippleState::YES; | 
| +  else | 
| +    throw std::u16string(u"Unknown option " + name); | 
| +} | 
| + | 
| +Filter* RegExpFilter::Create(const std::u16string& text) | 
| +{ | 
| +  bool blocking = true; | 
| +  size_t patternStart = 0; | 
| +  if (!text.compare(0, 2, u"@@")) | 
| +  { | 
| +    blocking = false; | 
| +    patternStart = 2; | 
| +  } | 
| + | 
| +  size_t patternEnd = text.find(u'$', patternStart); | 
| +  size_t patternLength = (patternEnd != std::u16string::npos ? | 
| +      patternEnd - patternStart : patternEnd); | 
| +  std::u16string pattern(text.substr(patternStart, patternLength)); | 
| +  std::u16string options(patternEnd != std::u16string::npos ? | 
| +      text.substr(patternEnd + 1) : u""); | 
| + | 
| +  try | 
| +  { | 
| +    if (blocking) | 
| +      return new RegExpFilter(text, pattern, options); | 
| +    else | 
| +      return new WhiteListFilter(text, pattern, options); | 
| +  } | 
| +  catch (const std::u16string& reason) | 
| +  { | 
| +    return new InvalidFilter(text, reason); | 
| +  } | 
| +} | 
| + | 
| +void RegExpFilter::InitJSTypes() | 
| +{ | 
| +  for (auto it = typeMap.begin(); it != typeMap.end(); ++it) | 
| +    EM_ASM_ARGS(Module.RegExpFilter_typeMap[getStringData($0)] = $1, &(it->first), it->second); | 
| +} | 
| + | 
| +Filter::Type RegExpFilter::GetType() const | 
| +{ | 
| +  return Type::BLOCKING; | 
| +} | 
| + | 
| +bool RegExpFilter::Matches(const std::u16string& location, int typeMask, | 
| +    const std::u16string& docDomain, bool thirdParty, | 
| +    const std::u16string& sitekey) | 
| +{ | 
| +  if (!(this->contentType & typeMask) || | 
| +      (this->thirdParty == TrippleState::YES && !thirdParty) || | 
| +      (this->thirdParty == TrippleState::NO && thirdParty) || | 
| +      !IsActiveOnDomain(docDomain, sitekey)) | 
| +  { | 
| +    return false; | 
| +  } | 
| + | 
| +  if (!regexpId) | 
| +  { | 
| +    regexpId = GenerateRegExp(regexpSource, matchCase); | 
| +    regexpSource.resize(0); | 
| +  } | 
| +  return EM_ASM_INT(return regexps.test($0, $1), regexpId, &location); | 
| +} | 
|  |