Index: compiled/RegExpFilter.cpp |
=================================================================== |
new file mode 100644 |
--- /dev/null |
+++ b/compiled/RegExpFilter.cpp |
@@ -0,0 +1,141 @@ |
+#include <emscripten.h> |
+ |
+#include "RegExpFilter.h" |
+#include "WhiteListFilter.h" |
+#include "InvalidFilter.h" |
+ |
+namespace |
+{ |
+ int GenerateRegExp(const std::u16string& source) |
+ { |
+ // Note: This doesn't remove trailing wildcards, otherwise the result should |
+ // be identical to Filter.toRegExp(). |
+ std::u16string result; |
+ char16_t prevChar = u'*'; |
+ for (size_t i = 0, l = source.length(); i < l; ++i) |
+ { |
+ char16_t currChar = source[i]; |
+ switch (currChar) |
+ { |
+ case u'*': |
+ if (prevChar != u'*') |
+ result += u".*"; |
+ break; |
+ case u'^': |
+ result += u"(?:[\\x00-\\x24\\x26-\\x2C\\x2F\\x3A-\\x40\\x5B-\\x5E\\x60\\x7B-\\x7F]|$)"; |
+ break; |
+ case u'|': |
+ if (i == 0) |
+ { |
+ // Anchor at expression start, maybe extended anchor? |
+ if (i + 1 < l && source[i + 1] == u'|') |
+ { |
+ result += u"^[\\w\\-]+:\\/+(?!\\/)(?:[^\\/]+\\.)?"; |
+ ++i; |
+ } |
+ else |
+ result += u"^"; |
+ } |
+ else if (i == l - 1) |
+ { |
+ // Anchor at expression end, ignore if following separator placeholder |
+ if (prevChar != u'^') |
+ result += u"$"; |
+ } |
+ else |
+ { |
+ // Not actually an anchor, escape it |
+ result += u"\\|"; |
+ } |
+ break; |
+ default: |
+ if ((currChar >= u'a' && currChar <= u'z') || |
+ (currChar >= u'A' && currChar <= u'Z') || |
+ (currChar >= u'0' && currChar <= u'9') || |
+ currChar >= 128) |
+ { |
+ result += currChar; |
+ } |
+ else |
+ { |
+ result += u"\\"; |
+ result.append(1, currChar); |
+ } |
+ } |
+ prevChar = currChar; |
+ } |
+ return EM_ASM_INT(return regexps.create($0, $1), &result, false); |
+ } |
+} |
+ |
+RegExpFilter::RegExpFilter(const std::u16string& text, |
+ const std::u16string& pattern, const std::u16string& options) |
+ : ActiveFilter(text), regexpId(0) |
+{ |
+ size_t len = pattern.length(); |
+ if (len >= 2 && pattern[0] == u'/' && pattern[len - 1] == u'/') |
+ { |
+ std::u16string param = pattern.substr(1, len - 2); |
+ regexpId = EM_ASM_INT(return regexps.create($0, $1), ¶m, false); |
+ |
+ std::u16string* error = reinterpret_cast<std::u16string*>(EM_ASM_INT(return regexps.getError($0), regexpId)); |
+ if (error) |
+ { |
+ EM_ASM_ARGS(regexps.delete($0), regexpId); |
+ throw std::u16string(*error); |
+ } |
+ } |
+ else |
+ regexpSource = pattern; |
+} |
+ |
+RegExpFilter::~RegExpFilter() |
+{ |
+ if (regexpId) |
+ EM_ASM_ARGS(regexps.delete($0), regexpId); |
+} |
+ |
+Filter* RegExpFilter::Create(const std::u16string& text) |
+{ |
+ bool blocking = true; |
+ size_t patternStart = 0; |
+ if (!text.compare(0, 2, u"@@")) |
+ { |
+ blocking = false; |
+ patternStart = 2; |
+ } |
+ |
+ size_t patternEnd = text.find(u'$', patternStart); |
+ size_t patternLength = (patternEnd != std::u16string::npos ? |
+ patternEnd - patternStart : patternEnd); |
+ std::u16string pattern(text.substr(patternStart, patternLength)); |
+ std::u16string options(patternEnd != std::u16string::npos ? |
+ text.substr(patternEnd) : u""); |
+ |
+ try |
+ { |
+ if (blocking) |
+ return new RegExpFilter(text, pattern, options); |
+ else |
+ return new WhiteListFilter(text, pattern, options); |
+ } |
+ catch (const std::u16string& reason) |
+ { |
+ return new InvalidFilter(text, reason); |
+ } |
+} |
+ |
+Filter::Type RegExpFilter::GetType() const |
+{ |
+ return Type::BLOCKING; |
+} |
+ |
+bool RegExpFilter::Matches(const std::u16string& location) |
+{ |
+ if (!regexpId) |
+ { |
+ regexpId = GenerateRegExp(regexpSource); |
+ regexpSource.resize(0); |
+ } |
+ return EM_ASM_INT(return regexps.test($0, $1), regexpId, &location); |
+} |