Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Unified Diff: compiled/RegExpFilter.cpp

Issue 29333474: Issue 4125 - [emscripten] Convert filter classes to C++ (Closed)
Patch Set: Minor improvements Created Jan. 20, 2016, 2:41 p.m.
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « compiled/RegExpFilter.h ('k') | compiled/StringScanner.h » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: compiled/RegExpFilter.cpp
===================================================================
new file mode 100644
--- /dev/null
+++ b/compiled/RegExpFilter.cpp
@@ -0,0 +1,305 @@
+#include <climits>
+#include <unordered_map>
+
+#include <emscripten.h>
+
+#include "RegExpFilter.h"
+#include "WhiteListFilter.h"
+#include "InvalidFilter.h"
+#include "StringScanner.h"
+
+namespace
+{
+ enum
+ {
+ TYPE_OTHER = 0x1,
+ TYPE_SCRIPT = 0x2,
+ TYPE_IMAGE = 0x4,
+ TYPE_STYLESHEET = 0x8,
+ TYPE_OBJECT = 0x10,
+ TYPE_SUBDOCUMENT = 0x20,
+ TYPE_DOCUMENT = 0x40,
+ TYPE_PING = 0x400,
+ TYPE_XMLHTTPREQUEST = 0x800,
+ TYPE_OBJECT_SUBREQUEST = 0x1000,
+ TYPE_MEDIA = 0x4000,
+ TYPE_FONT = 0x8000,
+ TYPE_POPUP = 0x8000000,
+ TYPE_GENERICBLOCK = 0x10000000,
+ TYPE_GENERICHIDE = 0x20000000,
+ TYPE_ELEMHIDE = 0x40000000,
+ };
+
+ std::unordered_map<std::u16string,int> typeMap({
+ {u"OTHER", TYPE_OTHER},
+ {u"SCRIPT", TYPE_SCRIPT},
+ {u"IMAGE", TYPE_IMAGE},
+ {u"STYLESHEET", TYPE_STYLESHEET},
+ {u"OBJECT", TYPE_OBJECT},
+ {u"SUBDOCUMENT", TYPE_SUBDOCUMENT},
+ {u"DOCUMENT", TYPE_DOCUMENT},
+ {u"XBL", TYPE_OTHER}, // Backwards compat
+ {u"PING", TYPE_PING},
+ {u"XMLHTTPREQUEST", TYPE_XMLHTTPREQUEST},
+ {u"OBJECT_SUBREQUEST", TYPE_OBJECT_SUBREQUEST},
+ {u"DTD", TYPE_OTHER}, // Backwards compat
+ {u"MEDIA", TYPE_MEDIA},
+ {u"FONT", TYPE_FONT},
+ {u"BACKGROUND", TYPE_IMAGE}, // Backwards compat
+
+ {u"POPUP", TYPE_POPUP},
+ {u"GENERICBLOCK", TYPE_GENERICBLOCK},
+ {u"GENERICHIDE", TYPE_GENERICHIDE},
+ {u"ELEMHIDE", TYPE_ELEMHIDE},
+ });
+
+ int defaultTypeMask = INT_MAX & ~(TYPE_DOCUMENT | TYPE_ELEMHIDE | TYPE_POPUP |
+ TYPE_GENERICBLOCK | TYPE_GENERICHIDE);
+
+ int GenerateRegExp(const std::u16string& regexp, bool matchCase)
+ {
+ return EM_ASM_INT(return regexps.create($0, $1), &regexp, matchCase);
+ }
+}
+
+RegExpFilter::RegExpFilter(const std::u16string& text,
+ const std::u16string& pattern, const std::u16string& options)
+ : ActiveFilter(text, true), regexpId(0), contentType(-1), matchCase(false),
+ thirdParty(TrippleState::ANY)
+{
+ int optionStart = 0;
+ int optionEnd = -1;
+ int valueStart = -1;
+ StringScanner scanner(options + u",");
+ while (!scanner.done())
+ {
+ switch (scanner.next())
+ {
+ case u'=':
+ if (optionEnd < 0)
+ {
+ optionEnd = scanner.position();
+ valueStart = optionEnd + 1;
+ }
+ break;
+ case u',':
+ if (optionEnd < 0)
+ optionEnd = scanner.position();
+ ProcessOption(options, optionStart, optionEnd, valueStart, scanner.position());
+ optionStart = scanner.position() + 1;
+ optionEnd = -1;
+ valueStart = -1;
+ break;
+ }
+ }
+ if (contentType < 0)
+ contentType = defaultTypeMask;
+
+ size_t len = pattern.length();
+ if (len >= 2 && pattern[0] == u'/' && pattern[len - 1] == u'/')
+ {
+ regexpId = GenerateRegExp(pattern.substr(1, len - 2), matchCase);
+
+ std::u16string* error = reinterpret_cast<std::u16string*>(EM_ASM_INT(return regexps.getError($0), regexpId));
+ if (error)
+ {
+ EM_ASM_ARGS(regexps.delete($0), regexpId);
+ throw std::u16string(*error);
+ }
+ }
+ else
+ regexpSource = pattern;
+}
+
+RegExpFilter::~RegExpFilter()
+{
+ if (regexpId)
+ EM_ASM_ARGS(regexps.delete($0), regexpId);
+}
+
+void RegExpFilter::ProcessOption(const std::u16string& options,
+ int optionStart, int optionEnd, int valueStart, int valueEnd)
+{
+ if (optionEnd <= optionStart)
+ return;
+
+ bool reverse = false;
+ if (options[optionStart] == u'~')
+ {
+ reverse = true;
+ optionStart++;
+ }
+
+ std::u16string name(options.substr(optionStart, optionEnd - optionStart));
+ for (size_t i = 0, l = name.length(); i < l; ++i)
+ {
+ char16_t currChar = name[i];
+ if (currChar >= u'a' && currChar <= u'z')
+ name[i] = currChar + u'A' - u'a';
+ else if (currChar == u'-')
+ name[i] = u'_';
+ }
+
+ auto it = typeMap.find(name);
+ if (it != typeMap.end())
+ {
+ if (contentType < 0)
+ contentType = reverse ? defaultTypeMask : 0;
+ if (reverse)
+ contentType &= ~it->second;
+ else
+ contentType |= it->second;
+ }
+ else if (!name.compare(u"DOMAIN"))
+ {
+ if (valueStart >= 0 && valueEnd > valueStart)
+ ParseDomains(options.substr(valueStart, valueEnd - valueStart), u'|');
+ }
+ else if (!name.compare(u"SITEKEY"))
+ {
+ if (valueStart >= 0 && valueEnd > valueStart)
+ {
+ StringScanner scanner(options.substr(valueStart, valueEnd - valueStart) + u"|");
+ size_t start = 0;
+ while (!scanner.done())
+ {
+ if (scanner.next() == u'|')
+ {
+ if (scanner.position() > start)
+ sitekeys.insert(options.substr(valueStart + start, scanner.position() - start));
+ start = scanner.position() + 1;
+ }
+ }
+ }
+ }
+ else if (!name.compare(u"MATCH_CASE"))
+ matchCase = !reverse;
+ else if (!name.compare(u"THIRD_PARTY"))
+ thirdParty = reverse ? TrippleState::NO : TrippleState::YES;
+ else if (!name.compare(u"COLLAPSE"))
+ collapse = reverse ? TrippleState::NO : TrippleState::YES;
+ else
+ throw std::u16string(u"Unknown option " + name);
+}
+
+Filter* RegExpFilter::Create(const std::u16string& text)
+{
+ bool blocking = true;
+ size_t patternStart = 0;
+ if (!text.compare(0, 2, u"@@"))
+ {
+ blocking = false;
+ patternStart = 2;
+ }
+
+ size_t patternEnd = text.find(u'$', patternStart);
+ size_t patternLength = (patternEnd != std::u16string::npos ?
+ patternEnd - patternStart : patternEnd);
+ std::u16string pattern(text.substr(patternStart, patternLength));
+ std::u16string options(patternEnd != std::u16string::npos ?
+ text.substr(patternEnd + 1) : u"");
+
+ try
+ {
+ if (blocking)
+ return new RegExpFilter(text, pattern, options);
+ else
+ return new WhiteListFilter(text, pattern, options);
+ }
+ catch (const std::u16string& reason)
+ {
+ return new InvalidFilter(text, reason);
+ }
+}
+
+void RegExpFilter::InitJSTypes()
+{
+ for (auto it = typeMap.begin(); it != typeMap.end(); ++it)
+ EM_ASM_ARGS(Module.RegExpFilter_typeMap[getStringData($0)] = $1, &(it->first), it->second);
+}
+
+const std::u16string RegExpFilter::RegExpFromSource(const std::u16string& source)
+{
+ // Note: This doesn't remove trailing wildcards, otherwise the result should
+ // be identical to Filter.toRegExp().
+ std::u16string result;
+ char16_t prevChar = u'*';
+ for (size_t i = 0, l = source.length(); i < l; ++i)
+ {
+ char16_t currChar = source[i];
+ switch (currChar)
+ {
+ case u'*':
+ if (prevChar != u'*')
+ result += u".*";
+ break;
+ case u'^':
+ result += u"(?:[\\x00-\\x24\\x26-\\x2C\\x2F\\x3A-\\x40\\x5B-\\x5E\\x60\\x7B-\\x7F]|$)";
+ break;
+ case u'|':
+ if (i == 0)
+ {
+ // Anchor at expression start, maybe extended anchor?
+ if (i + 1 < l && source[i + 1] == u'|')
+ {
+ result += u"^[\\w\\-]+:\\/+(?!\\/)(?:[^\\/]+\\.)?";
+ ++i;
+ }
+ else
+ result += u"^";
+ }
+ else if (i == l - 1)
+ {
+ // Anchor at expression end, ignore if following separator placeholder
+ if (prevChar != u'^')
+ result += u"$";
+ }
+ else
+ {
+ // Not actually an anchor, escape it
+ result += u"\\|";
+ }
+ break;
+ default:
+ if ((currChar >= u'a' && currChar <= u'z') ||
+ (currChar >= u'A' && currChar <= u'Z') ||
+ (currChar >= u'0' && currChar <= u'9') ||
+ currChar >= 128)
+ {
+ result += currChar;
+ }
+ else
+ {
+ result += u"\\";
+ result.append(1, currChar);
+ }
+ }
+ prevChar = currChar;
+ }
+ return result;
+}
+
+Filter::Type RegExpFilter::GetType() const
+{
+ return Type::BLOCKING;
+}
+
+bool RegExpFilter::Matches(const std::u16string& location, int typeMask,
+ const std::u16string& docDomain, bool thirdParty,
+ const std::u16string& sitekey)
+{
+ if (!(this->contentType & typeMask) ||
+ (this->thirdParty == TrippleState::YES && !thirdParty) ||
+ (this->thirdParty == TrippleState::NO && thirdParty) ||
+ !IsActiveOnDomain(docDomain, sitekey))
+ {
+ return false;
+ }
+
+ if (!regexpId)
+ {
+ regexpId = GenerateRegExp(RegExpFromSource(regexpSource), matchCase);
+ regexpSource.resize(0);
+ }
+ return EM_ASM_INT(return regexps.test($0, $1), regexpId, &location);
+}
« no previous file with comments | « compiled/RegExpFilter.h ('k') | compiled/StringScanner.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld