Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Unified Diff: compiled/RegExpFilter.cpp

Issue 29333474: Issue 4125 - [emscripten] Convert filter classes to C++ (Closed)
Patch Set: Addressed comments, made String class slightly more sane, slightly cleaned up bindings.cpp Created Feb. 2, 2016, 5:48 p.m.
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « compiled/RegExpFilter.h ('k') | compiled/String.h » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: compiled/RegExpFilter.cpp
===================================================================
new file mode 100644
--- /dev/null
+++ b/compiled/RegExpFilter.cpp
@@ -0,0 +1,307 @@
+#include <climits>
+
+#include <emscripten.h>
+
+#include "RegExpFilter.h"
+#include "WhitelistFilter.h"
+#include "InvalidFilter.h"
+#include "StringScanner.h"
+#include "StringMap.h"
+
+namespace
+{
+ enum
+ {
+ TYPE_OTHER = 0x1,
+ TYPE_SCRIPT = 0x2,
+ TYPE_IMAGE = 0x4,
+ TYPE_STYLESHEET = 0x8,
+ TYPE_OBJECT = 0x10,
+ TYPE_SUBDOCUMENT = 0x20,
+ TYPE_DOCUMENT = 0x40,
+ TYPE_PING = 0x400,
+ TYPE_XMLHTTPREQUEST = 0x800,
+ TYPE_OBJECT_SUBREQUEST = 0x1000,
+ TYPE_MEDIA = 0x4000,
+ TYPE_FONT = 0x8000,
+ TYPE_POPUP = 0x8000000,
+ TYPE_GENERICBLOCK = 0x10000000,
+ TYPE_GENERICHIDE = 0x20000000,
+ TYPE_ELEMHIDE = 0x40000000,
+ };
+
+ StringMap<int> typeMap {
+ {u"other"_str, TYPE_OTHER},
+ {u"script"_str, TYPE_SCRIPT},
+ {u"image"_str, TYPE_IMAGE},
+ {u"stylesheet"_str, TYPE_STYLESHEET},
+ {u"object"_str, TYPE_OBJECT},
+ {u"subdocument"_str, TYPE_SUBDOCUMENT},
+ {u"document"_str, TYPE_DOCUMENT},
+ {u"xbl"_str, TYPE_OTHER}, // Backwards compat
+ {u"ping"_str, TYPE_PING},
+ {u"xmlhttprequest"_str, TYPE_XMLHTTPREQUEST},
+ {u"object-subrequest"_str, TYPE_OBJECT_SUBREQUEST},
+ {u"dtd"_str, TYPE_OTHER}, // Backwards compat
+ {u"media"_str, TYPE_MEDIA},
+ {u"font"_str, TYPE_FONT},
+ {u"background"_str, TYPE_IMAGE}, // Backwards compat
+
+ {u"popup"_str, TYPE_POPUP},
+ {u"genericblock"_str, TYPE_GENERICBLOCK},
+ {u"generichide"_str, TYPE_GENERICHIDE},
+ {u"elemhide"_str, TYPE_ELEMHIDE},
+ };
+
+ int defaultTypeMask = INT_MAX & ~(TYPE_DOCUMENT | TYPE_ELEMHIDE | TYPE_POPUP |
+ TYPE_GENERICBLOCK | TYPE_GENERICHIDE);
+
+ int GenerateRegExp(const String& regexp, bool matchCase)
+ {
+ return EM_ASM_INT(return regexps.create($0, $1), &regexp, matchCase);
+ }
+}
+
+RegExpFilter::RegExpFilter(const String& text,
+ String::size_type patternStart, String::size_type patternEnd)
+ : ActiveFilter(text, true), mRegexpId(0),
+ mRegexpSource(String(mText, patternStart, patternEnd - patternStart)),
+ mContentType(-1), mMatchCase(false), mThirdParty(TrippleState::ANY)
+{
+ String options(mText, patternEnd + 1);
+ StringScanner scanner(options, u',');
+ int optionStart = 0;
+ int optionEnd = -1;
+ int valueStart = -1;
+ bool done = false;
+ while (!done)
+ {
+ done = scanner.done();
+ switch (scanner.next())
+ {
+ case u'=':
+ if (optionEnd < 0)
+ {
+ optionEnd = scanner.position();
+ valueStart = optionEnd + 1;
+ }
+ break;
+ case u',':
+ if (optionEnd < 0)
+ optionEnd = scanner.position();
+ ProcessOption(options, optionStart, optionEnd, valueStart, scanner.position());
+ optionStart = scanner.position() + 1;
+ optionEnd = -1;
+ valueStart = -1;
+ break;
+ }
+ }
+ if (mContentType < 0)
+ mContentType = defaultTypeMask;
+
+ size_t len = mRegexpSource.length();
+ if (len >= 2 && mRegexpSource[0] == u'/' && mRegexpSource[len - 1] == u'/')
+ {
+ mRegexpSource.reset(mRegexpSource, 1 , len - 2);
+ mRegexpId = GenerateRegExp(mRegexpSource, mMatchCase);
+
+ int errorLength = EM_ASM_INT(return regexps.getErrorLength($0), mRegexpId);
+ if (errorLength >= 0)
+ {
+ String error(errorLength);
+ EM_ASM_ARGS(regexps.getError($0, $1), mRegexpId, error.data());
+ throw error;
+ }
+ }
+}
+
+RegExpFilter::~RegExpFilter()
+{
+ if (mRegexpId)
+ EM_ASM_ARGS(regexps.delete($0), mRegexpId);
+}
+
+void RegExpFilter::ProcessOption(String& options, int optionStart,
+ int optionEnd, int valueStart, int valueEnd)
+{
+ if (optionEnd <= optionStart)
+ return;
+
+ bool reverse = false;
+ if (options[optionStart] == u'~')
+ {
+ reverse = true;
+ optionStart++;
+ }
+
+ String name(options, optionStart, optionEnd - optionStart);
+ for (size_t i = 0; i < name.length(); ++i)
+ {
+ char16_t currChar = name[i];
+ if (currChar >= u'A' && currChar <= u'Z')
+ name[i] = currChar + u'a' - u'A';
+ else if (currChar == u'_')
+ name[i] = u'-';
+ }
+
+ auto it = typeMap.find(name);
+ if (it != typeMap.end())
+ {
+ if (mContentType < 0)
+ mContentType = reverse ? defaultTypeMask : 0;
+ if (reverse)
+ mContentType &= ~it->second;
+ else
+ mContentType |= it->second;
+ }
+ else if (name.equals(u"domain"_str))
+ {
+ if (valueStart >= 0 && valueEnd > valueStart)
+ ParseDomains(options, valueStart, valueEnd - valueStart, u'|');
+ }
+ else if (name.equals(u"sitekey"_str))
+ {
+ if (valueStart >= 0 && valueEnd > valueStart)
+ {
+ StringScanner scanner(String(options, valueStart, valueEnd - valueStart), u'|');
+ size_t start = 0;
+ bool done = false;
+ while (!done)
+ {
+ done = scanner.done();
+ if (scanner.next() == u'|')
+ {
+ if (scanner.position() > start)
+ AddSitekey(String(options, valueStart + start, scanner.position() - start));
+ start = scanner.position() + 1;
+ }
+ }
+ }
+ }
+ else if (name.equals(u"match-case"_str))
+ mMatchCase = !reverse;
+ else if (name.equals(u"third-party"_str))
+ mThirdParty = reverse ? TrippleState::NO : TrippleState::YES;
+ else if (name.equals(u"collapse"_str))
+ mCollapse = reverse ? TrippleState::NO : TrippleState::YES;
+ else
+ {
+ String error(u"Unknown option "_str);
+ error.append(name);
+ throw std::move(error.ensure_own_buffer());
+ }
+}
+
+Filter* RegExpFilter::Create(const String& text)
+{
+ bool blocking = true;
+ String::size_type patternStart = 0;
+ if (text.length() >= 2 && text[0] == u'@' && text[1] == u'@')
+ {
+ blocking = false;
+ patternStart = 2;
+ }
+
+ String::size_type patternEnd = text.find(u'$', patternStart);
+ if (patternEnd == text.npos)
+ patternEnd = text.length();
+
+ try
+ {
+ if (blocking)
+ return new RegExpFilter(text, patternStart, patternEnd);
+ else
+ return new WhitelistFilter(text, patternStart, patternEnd);
+ }
+ catch (const String& reason)
+ {
+ return new InvalidFilter(text, reason);
+ }
+}
+
+void RegExpFilter::InitJSTypes()
+{
+ EM_ASM(exports.RegExpFilter.typeMap = {};);
+ for (auto it = typeMap.begin(); it != typeMap.end(); ++it)
+ EM_ASM_ARGS(exports.RegExpFilter.typeMap[getStringData($0).replace("-", "_").toUpperCase()] = $1, &(it->first), it->second);
+}
+
+String RegExpFilter::RegExpFromSource(const String& source)
+{
+ /* TODO: this is very inefficient */
+
+ // Note: This doesn't remove trailing wildcards, otherwise the result should
+ // be identical to Filter.toRegExp().
+ String result;
+ String::value_type prevChar = u'*';
+ for (String::size_type i = 0; i < source.length(); ++i)
+ {
+ String::value_type currChar = source[i];
+ switch (currChar)
+ {
+ case u'*':
+ if (prevChar != u'*')
+ result.append(u".*"_str);
+ break;
+ case u'^':
+ result.append(u"(?:[\\x00-\\x24\\x26-\\x2C\\x2F\\x3A-\\x40\\x5B-\\x5E\\x60\\x7B-\\x7F]|$)"_str);
+ break;
+ case u'|':
+ if (i == 0)
+ {
+ // Anchor at expression start, maybe extended anchor?
+ if (i + 1 < source.length() && source[i + 1] == u'|')
+ {
+ result.append(u"^[\\w\\-]+:\\/+(?!\\/)(?:[^\\/]+\\.)?"_str);
+ ++i;
+ }
+ else
+ result.append(u'^');
+ }
+ else if (i == source.length() - 1)
+ {
+ // Anchor at expression end, ignore if following separator placeholder
+ if (prevChar != u'^')
+ result.append(u'$');
+ }
+ else
+ {
+ // Not actually an anchor, escape it
+ result.append(u"\\|"_str);
+ }
+ break;
+ default:
+ if (!(currChar >= u'a' && currChar <= u'z') &&
+ !(currChar >= u'A' && currChar <= u'Z') &&
+ !(currChar >= u'0' && currChar <= u'9') &&
+ currChar < 128)
+ {
+ result.append(u'\\');
+ }
+ result.append(currChar);
+ }
+ prevChar = currChar;
+ }
+ return std::move(result.ensure_own_buffer());
+}
+
+Filter::Type RegExpFilter::GetType() const
+{
+ return Type::BLOCKING;
+}
+
+bool RegExpFilter::Matches(const String& location, int typeMask,
+ String& docDomain, bool thirdParty, const String& sitekey) const
+{
+ if (!(mContentType & typeMask) ||
+ (mThirdParty == TrippleState::YES && !thirdParty) ||
+ (mThirdParty == TrippleState::NO && thirdParty) ||
+ !IsActiveOnDomain(docDomain, sitekey))
+ {
+ return false;
+ }
+
+ if (!mRegexpId)
+ mRegexpId = GenerateRegExp(RegExpFromSource(mRegexpSource), mMatchCase);
+ return EM_ASM_INT(return regexps.test($0, $1), mRegexpId, &location);
+}
« no previous file with comments | « compiled/RegExpFilter.h ('k') | compiled/String.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld