compiled/ActiveFilter.cpp - Issue 29333474: Issue 4125 - [emscripten] Convert filter classes to C++

Unified Diff: compiled/ActiveFilter.cpp

Issue 29333474: Issue 4125 - [emscripten] Convert filter classes to C++ (Closed)

Patch Set: Optimized hash lookup performance a bit Created Feb. 8, 2016, 7:11 p.m.

Use n/p to move between diff chunks; N/P to move between comments.

Jump to:

View side-by-side diff with in-line comments

Index: compiled/ActiveFilter.cpp

===================================================================

new file mode 100644

--- /dev/null

+++ b/compiled/ActiveFilter.cpp

@@ -0,0 +1,204 @@

+#include <cstdio>

+#include "ActiveFilter.h"

+#include "StringScanner.h"

+namespace

+ OwnedString to_string(unsigned int i)

+ {

+ char buffer[11];

+ int len = sprintf(buffer, "%u", i);

+ return std::move(OwnedString(buffer, len));

sergei 2016/02/17 12:54:28 It's not necessary to use `std::move` for the retu

Wladimir Palant 2016/02/18 16:06:29 It got me confused that C++ generates different co

sergei 2016/02/22 12:45:32 Yes, the behavior of C++ compiler regarding the re

+ }

+ActiveFilter::ActiveFilter(const String& text, bool ignoreTrailingDot)

+ : Filter(text), mDisabled(false), mHitCount(0), mLastHit(0),

+ mIgnoreTrailingDot(ignoreTrailingDot)

+void ActiveFilter::ToLower(String& str, String::size_type start,

+ String::size_type end)

+ for (String::size_type i = start; i < end; ++i)

+ {

+ String::value_type currChar = str[i];

+ // This should be more efficient with a lookup table but I couldn't measure

+ // any performance difference.

+ if (currChar >= u'A' && currChar <= u'Z')

+ str[i] = currChar + u'a' - u'A';

+ else if (currChar >= 128)

+ {

+ // It seems that calling JS it the easiest solution for lowercasing

+ // Unicode characters.

+ str[i] = EM_ASM_INT({

+ return String.fromCharCode($0).toLowerCase().charCodeAt(0);

+ }, currChar);

sergei 2016/02/17 12:54:27 Wouldn't it be better to call JS function on the r

Wladimir Palant 2016/02/18 16:06:30 Not sure about that. "Rest of the string" isn't a

sergei 2016/02/22 12:45:32 I thought rather about surrogate pairs, e.g. http:

Wladimir Palant 2016/02/23 12:37:20 Interesting. I was under the impression that JavaS

sergei 2016/02/23 15:07:23 Agree.

+ }

+ActiveFilter::DomainMap* ActiveFilter::GetDomains() const

+ return mDomains.get();

+ActiveFilter::SitekeySet* ActiveFilter::GetSitekeys() const

+ return mSitekeys.get();

+void ActiveFilter::ParseDomains(const String& domains,

+ String::value_type separator) const

+ DomainMap::size_type count = 2;

+ for (String::size_type i = 0; i < domains.length(); i++)

+ if (domains[i] == separator)

+ count++;

+ mDomains.reset(new DomainMap(count));

+ annotate_address(mDomains.get(), "DomainMap");

+ StringScanner scanner(domains, 0, separator);

+ String::size_type start = 0;

+ bool reverse = false;

+ bool hasIncludes = false;

+ bool done = false;

+ while (!done)

+ {

+ done = scanner.done();

+ String::value_type currChar = scanner.next();

+ if (currChar == u'~' && scanner.position() == start)

+ {

+ start++;

+ reverse = true;

+ }

+ else if (currChar == separator)

+ {

+ String::size_type len = scanner.position() - start;

+ if (len > 0 && mIgnoreTrailingDot && domains[start + len - 1] == '.')

+ len--;

+ if (len > 0)

+ {

+ enter_context("Adding to ActiveFilter.mDomains");

+ (*mDomains)[DependentString(domains, start, len)] = !reverse;

+ exit_context();

+ if (!reverse)

+ hasIncludes = true;

+ }

+ start = scanner.position() + 1;

+ reverse = false;

+ }

+ enter_context("Adding to ActiveFilter.mDomains");

+ (*mDomains)[u""_str] = !hasIncludes;

+ exit_context();

+void ActiveFilter::AddSitekey(const String& sitekey) const

+ if (!mSitekeys)

+ {

+ mSitekeys.reset(new SitekeySet());

+ annotate_address(mSitekeys.get(), "SitekeySet");

+ }

+ enter_context("Adding to ActiveFilter.mSitekeys");

+ mSitekeys->insert(sitekey);

+ exit_context();

+bool ActiveFilter::IsActiveOnDomain(DependentString& docDomain, const String& sitekey) const

+ auto sitekeys = GetSitekeys();

+ if (sitekeys && sitekeys->find(sitekey) == sitekeys->end())

+ return false;

+ // If no domains are set the rule matches everywhere

+ auto domains = GetDomains();

+ if (!domains)

+ return true;

+ // If the document has no host name, match only if the filter isn't restricted

+ // to specific domains

+ if (docDomain.empty())

+ return (*domains)[u""_str];

+ String::size_type len = docDomain.length();

+ ToLower(docDomain, 0, len);

+ if (len > 0 && mIgnoreTrailingDot && docDomain[len - 1] == '.')

+ docDomain.reset(docDomain, 0, len - 1);

+ while (true)

+ {

+ auto it = domains->find(docDomain);

+ if (it != domains->end())

+ return it->second;

+ String::size_type nextDot = docDomain.find(u'.');

+ if (nextDot == docDomain.npos)

+ break;

+ docDomain.reset(docDomain, nextDot + 1);

+ }

+ return (*domains)[u""_str];

+bool ActiveFilter::IsActiveOnlyOnDomain(DependentString& docDomain) const

+ auto domains = GetDomains();

+ if (!domains || docDomain.empty() || (*domains)[u""_str])

+ return false;

+ String::size_type len = docDomain.length();

+ ToLower(docDomain, 0, len);

+ if (len > 0 && mIgnoreTrailingDot && docDomain[len - 1] == '.')

+ docDomain.reset(docDomain, 0, len - 1);

+ for (auto it = domains->begin(); it != domains->end(); ++it)

+ {

+ if (!it->second || it->first.equals(docDomain))

+ continue;

+ size_t len1 = it->first.length();

+ size_t len2 = docDomain.length();

+ if (len1 > len2 &&

+ DependentString(it->first, len1 - len2).equals(docDomain) &&

+ it->first[len1 - len2 - 1] == u'.')

+ {

+ continue;

+ }

+ return false;

+ }

+ return true;

+bool ActiveFilter::IsGeneric() const

+ auto sitekeys = GetSitekeys();

+ auto domains = GetDomains();

+ return !sitekeys && (!domains || (*domains)[u""_str]);

+OwnedString ActiveFilter::Serialize() const

+ /* TODO this is very inefficient */

+ OwnedString result(Filter::Serialize());

+ if (mDisabled)

+ result.append(u"disabled=true\n"_str);

+ if (mHitCount)

+ {

+ result.append(u"hitCount="_str);

+ result.append(to_string(mHitCount));

+ result.append(u'\n');

+ }

+ if (mLastHit)

+ {

+ result.append(u"lastHit="_str);

+ result.append(to_string(mLastHit));

+ result.append(u'\n');

+ }

+ return std::move(result);

« compiled/ActiveFilter.h ('K') | « compiled/ActiveFilter.h ('k') | compiled/CSSPropertyFilter.h » ('j') | compiled/CSSPropertyFilter.h » ('J')