| Index: compiled/RegExpFilter.cpp |
| =================================================================== |
| new file mode 100644 |
| --- /dev/null |
| +++ b/compiled/RegExpFilter.cpp |
| @@ -0,0 +1,141 @@ |
| +#include <emscripten.h> |
| + |
| +#include "RegExpFilter.h" |
| +#include "WhiteListFilter.h" |
| +#include "InvalidFilter.h" |
| + |
| +namespace |
| +{ |
| + int GenerateRegExp(const std::u16string& source) |
| + { |
| + // Note: This doesn't remove trailing wildcards, otherwise the result should |
| + // be identical to Filter.toRegExp(). |
| + std::u16string result; |
| + char16_t prevChar = u'*'; |
| + for (size_t i = 0, l = source.length(); i < l; ++i) |
| + { |
| + char16_t currChar = source[i]; |
| + switch (currChar) |
| + { |
| + case u'*': |
| + if (prevChar != u'*') |
| + result += u".*"; |
| + break; |
| + case u'^': |
| + result += u"(?:[\\x00-\\x24\\x26-\\x2C\\x2F\\x3A-\\x40\\x5B-\\x5E\\x60\\x7B-\\x7F]|$)"; |
| + break; |
| + case u'|': |
| + if (i == 0) |
| + { |
| + // Anchor at expression start, maybe extended anchor? |
| + if (i + 1 < l && source[i + 1] == u'|') |
| + { |
| + result += u"^[\\w\\-]+:\\/+(?!\\/)(?:[^\\/]+\\.)?"; |
| + ++i; |
| + } |
| + else |
| + result += u"^"; |
| + } |
| + else if (i == l - 1) |
| + { |
| + // Anchor at expression end, ignore if following separator placeholder |
| + if (prevChar != u'^') |
| + result += u"$"; |
| + } |
| + else |
| + { |
| + // Not actually an anchor, escape it |
| + result += u"\\|"; |
| + } |
| + break; |
| + default: |
| + if ((currChar >= u'a' && currChar <= u'z') || |
| + (currChar >= u'A' && currChar <= u'Z') || |
| + (currChar >= u'0' && currChar <= u'9') || |
| + currChar >= 128) |
| + { |
| + result += currChar; |
| + } |
| + else |
| + { |
| + result += u"\\"; |
| + result.append(1, currChar); |
| + } |
| + } |
| + prevChar = currChar; |
| + } |
| + return EM_ASM_INT(return regexps.create($0, $1), &result, false); |
| + } |
| +} |
| + |
| +RegExpFilter::RegExpFilter(const std::u16string& text, |
| + const std::u16string& pattern, const std::u16string& options) |
| + : ActiveFilter(text), regexpId(0) |
| +{ |
| + size_t len = pattern.length(); |
| + if (len >= 2 && pattern[0] == u'/' && pattern[len - 1] == u'/') |
| + { |
| + std::u16string param = pattern.substr(1, len - 2); |
| + regexpId = EM_ASM_INT(return regexps.create($0, $1), ¶m, false); |
| + |
| + std::u16string* error = reinterpret_cast<std::u16string*>(EM_ASM_INT(return regexps.getError($0), regexpId)); |
| + if (error) |
| + { |
| + EM_ASM_ARGS(regexps.delete($0), regexpId); |
| + throw std::u16string(*error); |
| + } |
| + } |
| + else |
| + regexpSource = pattern; |
| +} |
| + |
| +RegExpFilter::~RegExpFilter() |
| +{ |
| + if (regexpId) |
| + EM_ASM_ARGS(regexps.delete($0), regexpId); |
| +} |
| + |
| +Filter* RegExpFilter::Create(const std::u16string& text) |
| +{ |
| + bool blocking = true; |
| + size_t patternStart = 0; |
| + if (!text.compare(0, 2, u"@@")) |
| + { |
| + blocking = false; |
| + patternStart = 2; |
| + } |
| + |
| + size_t patternEnd = text.find(u'$', patternStart); |
| + size_t patternLength = (patternEnd != std::u16string::npos ? |
| + patternEnd - patternStart : patternEnd); |
| + std::u16string pattern(text.substr(patternStart, patternLength)); |
| + std::u16string options(patternEnd != std::u16string::npos ? |
| + text.substr(patternEnd) : u""); |
| + |
| + try |
| + { |
| + if (blocking) |
| + return new RegExpFilter(text, pattern, options); |
| + else |
| + return new WhiteListFilter(text, pattern, options); |
| + } |
| + catch (const std::u16string& reason) |
| + { |
| + return new InvalidFilter(text, reason); |
| + } |
| +} |
| + |
| +Filter::Type RegExpFilter::GetType() const |
| +{ |
| + return Type::BLOCKING; |
| +} |
| + |
| +bool RegExpFilter::Matches(const std::u16string& location) |
| +{ |
| + if (!regexpId) |
| + { |
| + regexpId = GenerateRegExp(regexpSource); |
| + regexpSource.resize(0); |
| + } |
| + return EM_ASM_INT(return regexps.test($0, $1), regexpId, &location); |
| +} |