compiled/ActiveFilter.cpp - Issue 29333474: Issue 4125 - [emscripten] Convert filter classes to C++

Unified Diff: compiled/ActiveFilter.cpp

Issue 29333474: Issue 4125 - [emscripten] Convert filter classes to C++ (Closed)

Patch Set: Addressed Sergei`s comments again and added some asserts Created Feb. 23, 2016, 12:30 p.m.

Use n/p to move between diff chunks; N/P to move between comments.

Jump to:

View side-by-side diff with in-line comments

Index: compiled/ActiveFilter.cpp

===================================================================

new file mode 100644

--- /dev/null

+++ b/compiled/ActiveFilter.cpp

@@ -0,0 +1,209 @@

+#include <cstdio>

+#include "ActiveFilter.h"

+#include "StringScanner.h"

+namespace

+ OwnedString to_string(unsigned int i)

+ {

+ char buffer[11];

+ int len = sprintf(buffer, "%u", i);

+ OwnedString result(len);

+ for (String::size_type i = 0; i < len; i++)

+ result[i] = buffer[i];

+ return result;

+ }

+ActiveFilter::ActiveFilter(const String& text, bool ignoreTrailingDot)

+ : Filter(text), mDisabled(false), mHitCount(0), mLastHit(0),

+ mIgnoreTrailingDot(ignoreTrailingDot)

+void ActiveFilter::ToLower(DependentString&& str)

+ for (String::size_type i = 0; i < str.length(); ++i)

+ {

+ String::value_type currChar = str[i];

+ // This should be more efficient with a lookup table but I couldn't measure

+ // any performance difference.

+ if (currChar >= u'A' && currChar <= u'Z')

+ str[i] = currChar + u'a' - u'A';

+ else if (currChar >= 128)

+ {

+ // It seems that calling JS it the easiest solution for lowercasing

+ // Unicode characters.

+ str[i] = EM_ASM_INT({

+ return String.fromCharCode($0).toLowerCase().charCodeAt(0);

+ }, currChar);

+ }

+ActiveFilter::DomainMap* ActiveFilter::GetDomains() const

+ return mDomains.get();

+ActiveFilter::SitekeySet* ActiveFilter::GetSitekeys() const

+ return mSitekeys.get();

+void ActiveFilter::ParseDomains(const String& domains,

+ String::value_type separator) const

+ DomainMap::size_type count = 2;

+ for (String::size_type i = 0; i < domains.length(); i++)

+ if (domains[i] == separator)

+ count++;

+ mDomains.reset(new DomainMap(count));

+ annotate_address(mDomains.get(), "DomainMap");

+ StringScanner scanner(domains, 0, separator);

+ String::size_type start = 0;

+ bool reverse = false;

+ bool hasIncludes = false;

+ bool done = false;

+ while (!done)

+ {

+ done = scanner.done();

+ String::value_type currChar = scanner.next();

+ if (currChar == u'~' && scanner.position() == start)

+ {

+ start++;

+ reverse = true;

+ }

+ else if (currChar == separator)

+ {

+ String::size_type len = scanner.position() - start;

+ if (len > 0 && mIgnoreTrailingDot && domains[start + len - 1] == '.')

+ len--;

+ if (len > 0)

+ {

+ enter_context("Adding to ActiveFilter.mDomains");

+ (*mDomains)[DependentString(domains, start, len)] = !reverse;

+ exit_context();

+ if (!reverse)

+ hasIncludes = true;

+ }

+ start = scanner.position() + 1;

+ reverse = false;

+ }

+ enter_context("Adding to ActiveFilter.mDomains");

+ (*mDomains)[u""_str] = !hasIncludes;

+ exit_context();

+void ActiveFilter::AddSitekey(const String& sitekey) const

+ if (!mSitekeys)

+ {

+ mSitekeys.reset(new SitekeySet());

+ annotate_address(mSitekeys.get(), "SitekeySet");

+ }

+ enter_context("Adding to ActiveFilter.mSitekeys");

+ mSitekeys->insert(sitekey);

+ exit_context();

+bool ActiveFilter::IsActiveOnDomain(DependentString& docDomain, const String& sitekey) const

+ auto sitekeys = GetSitekeys();

+ if (sitekeys && !sitekeys->find(sitekey))

+ return false;

+ // If no domains are set the rule matches everywhere

+ auto domains = GetDomains();

+ if (!domains)

+ return true;

+ // If the document has no host name, match only if the filter isn't restricted

+ // to specific domains

+ if (docDomain.empty())

+ return (*domains)[u""_str];

+ ToLower(DependentString(docDomain));

+ String::size_type len = docDomain.length();

+ if (len > 0 && mIgnoreTrailingDot && docDomain[len - 1] == '.')

+ docDomain.reset(docDomain, 0, len - 1);

+ while (true)

+ {

+ auto it = domains->find(docDomain);

+ if (it)

+ return it->second;

+ String::size_type nextDot = docDomain.find(u'.');

+ if (nextDot == docDomain.npos)

+ break;

+ docDomain.reset(docDomain, nextDot + 1);

+ }

+ return (*domains)[u""_str];

+bool ActiveFilter::IsActiveOnlyOnDomain(DependentString& docDomain) const

+ auto domains = GetDomains();

+ if (!domains || docDomain.empty() || (*domains)[u""_str])

+ return false;

+ ToLower(DependentString(docDomain));

+ String::size_type len = docDomain.length();

+ if (len > 0 && mIgnoreTrailingDot && docDomain[len - 1] == '.')

+ docDomain.reset(docDomain, 0, len - 1);

+ for (auto it = domains->begin(); it != domains->end(); ++it)

+ {

+ if (!it->second || it->first.equals(docDomain))

+ continue;

+ size_t len1 = it->first.length();

+ size_t len2 = docDomain.length();

+ if (len1 > len2 &&

+ DependentString(it->first, len1 - len2).equals(docDomain) &&

+ it->first[len1 - len2 - 1] == u'.')

+ {

+ continue;

+ }

+ return false;

+ }

+ return true;

+bool ActiveFilter::IsGeneric() const

+ auto sitekeys = GetSitekeys();

+ auto domains = GetDomains();

+ return !sitekeys && (!domains || (*domains)[u""_str]);

+OwnedString ActiveFilter::Serialize() const

+ /* TODO this is very inefficient */

+ OwnedString result(Filter::Serialize());

+ if (mDisabled)

+ result.append(u"disabled=true\n"_str);

+ if (mHitCount)

+ {

+ result.append(u"hitCount="_str);

+ result.append(to_string(mHitCount));

+ result.append(u'\n');

+ }

+ if (mLastHit)

+ {

+ result.append(u"lastHit="_str);

+ result.append(to_string(mLastHit));

+ result.append(u'\n');

+ }

+ return result;

« no previous file with comments | « compiled/ActiveFilter.h ('k') | compiled/BlockingFilter.h » ('j') | compiled/RegExpFilter.cpp » ('J')