Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Unified Diff: compiled/ActiveFilter.cpp

Issue 29333474: Issue 4125 - [emscripten] Convert filter classes to C++ (Closed)
Patch Set: Optimized hash lookup performance a bit Created Feb. 8, 2016, 7:11 p.m.
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: compiled/ActiveFilter.cpp
===================================================================
new file mode 100644
--- /dev/null
+++ b/compiled/ActiveFilter.cpp
@@ -0,0 +1,204 @@
+#include <cstdio>
+
+#include "ActiveFilter.h"
+#include "StringScanner.h"
+
+namespace
+{
+ OwnedString to_string(unsigned int i)
+ {
+ char buffer[11];
+ int len = sprintf(buffer, "%u", i);
+ return std::move(OwnedString(buffer, len));
sergei 2016/02/17 12:54:28 It's not necessary to use `std::move` for the retu
Wladimir Palant 2016/02/18 16:06:29 It got me confused that C++ generates different co
sergei 2016/02/22 12:45:32 Yes, the behavior of C++ compiler regarding the re
+ }
+}
+
+ActiveFilter::ActiveFilter(const String& text, bool ignoreTrailingDot)
+ : Filter(text), mDisabled(false), mHitCount(0), mLastHit(0),
+ mIgnoreTrailingDot(ignoreTrailingDot)
+{
+}
+
+void ActiveFilter::ToLower(String& str, String::size_type start,
+ String::size_type end)
+{
+ for (String::size_type i = start; i < end; ++i)
+ {
+ String::value_type currChar = str[i];
+
+ // This should be more efficient with a lookup table but I couldn't measure
+ // any performance difference.
+ if (currChar >= u'A' && currChar <= u'Z')
+ str[i] = currChar + u'a' - u'A';
+ else if (currChar >= 128)
+ {
+ // It seems that calling JS it the easiest solution for lowercasing
+ // Unicode characters.
+ str[i] = EM_ASM_INT({
+ return String.fromCharCode($0).toLowerCase().charCodeAt(0);
+ }, currChar);
sergei 2016/02/17 12:54:27 Wouldn't it be better to call JS function on the r
Wladimir Palant 2016/02/18 16:06:30 Not sure about that. "Rest of the string" isn't a
sergei 2016/02/22 12:45:32 I thought rather about surrogate pairs, e.g. http:
Wladimir Palant 2016/02/23 12:37:20 Interesting. I was under the impression that JavaS
sergei 2016/02/23 15:07:23 Agree.
+ }
+ }
+}
+
+ActiveFilter::DomainMap* ActiveFilter::GetDomains() const
+{
+ return mDomains.get();
+}
+
+ActiveFilter::SitekeySet* ActiveFilter::GetSitekeys() const
+{
+ return mSitekeys.get();
+}
+
+void ActiveFilter::ParseDomains(const String& domains,
+ String::value_type separator) const
+{
+ DomainMap::size_type count = 2;
+ for (String::size_type i = 0; i < domains.length(); i++)
+ if (domains[i] == separator)
+ count++;
+
+ mDomains.reset(new DomainMap(count));
+ annotate_address(mDomains.get(), "DomainMap");
+
+ StringScanner scanner(domains, 0, separator);
+ String::size_type start = 0;
+ bool reverse = false;
+ bool hasIncludes = false;
+ bool done = false;
+ while (!done)
+ {
+ done = scanner.done();
+ String::value_type currChar = scanner.next();
+ if (currChar == u'~' && scanner.position() == start)
+ {
+ start++;
+ reverse = true;
+ }
+ else if (currChar == separator)
+ {
+ String::size_type len = scanner.position() - start;
+ if (len > 0 && mIgnoreTrailingDot && domains[start + len - 1] == '.')
+ len--;
+ if (len > 0)
+ {
+ enter_context("Adding to ActiveFilter.mDomains");
+ (*mDomains)[DependentString(domains, start, len)] = !reverse;
+ exit_context();
+
+ if (!reverse)
+ hasIncludes = true;
+ }
+ start = scanner.position() + 1;
+ reverse = false;
+ }
+ }
+ enter_context("Adding to ActiveFilter.mDomains");
+ (*mDomains)[u""_str] = !hasIncludes;
+ exit_context();
+}
+
+void ActiveFilter::AddSitekey(const String& sitekey) const
+{
+ if (!mSitekeys)
+ {
+ mSitekeys.reset(new SitekeySet());
+ annotate_address(mSitekeys.get(), "SitekeySet");
+ }
+
+ enter_context("Adding to ActiveFilter.mSitekeys");
+ mSitekeys->insert(sitekey);
+ exit_context();
+}
+
+bool ActiveFilter::IsActiveOnDomain(DependentString& docDomain, const String& sitekey) const
+{
+ auto sitekeys = GetSitekeys();
+ if (sitekeys && sitekeys->find(sitekey) == sitekeys->end())
+ return false;
+
+ // If no domains are set the rule matches everywhere
+ auto domains = GetDomains();
+ if (!domains)
+ return true;
+
+ // If the document has no host name, match only if the filter isn't restricted
+ // to specific domains
+ if (docDomain.empty())
+ return (*domains)[u""_str];
+
+ String::size_type len = docDomain.length();
+ ToLower(docDomain, 0, len);
+ if (len > 0 && mIgnoreTrailingDot && docDomain[len - 1] == '.')
+ docDomain.reset(docDomain, 0, len - 1);
+ while (true)
+ {
+ auto it = domains->find(docDomain);
+ if (it != domains->end())
+ return it->second;
+
+ String::size_type nextDot = docDomain.find(u'.');
+ if (nextDot == docDomain.npos)
+ break;
+ docDomain.reset(docDomain, nextDot + 1);
+ }
+ return (*domains)[u""_str];
+}
+
+bool ActiveFilter::IsActiveOnlyOnDomain(DependentString& docDomain) const
+{
+ auto domains = GetDomains();
+ if (!domains || docDomain.empty() || (*domains)[u""_str])
+ return false;
+
+ String::size_type len = docDomain.length();
+ ToLower(docDomain, 0, len);
+ if (len > 0 && mIgnoreTrailingDot && docDomain[len - 1] == '.')
+ docDomain.reset(docDomain, 0, len - 1);
+ for (auto it = domains->begin(); it != domains->end(); ++it)
+ {
+ if (!it->second || it->first.equals(docDomain))
+ continue;
+
+ size_t len1 = it->first.length();
+ size_t len2 = docDomain.length();
+ if (len1 > len2 &&
+ DependentString(it->first, len1 - len2).equals(docDomain) &&
+ it->first[len1 - len2 - 1] == u'.')
+ {
+ continue;
+ }
+
+ return false;
+ }
+ return true;
+}
+
+bool ActiveFilter::IsGeneric() const
+{
+ auto sitekeys = GetSitekeys();
+ auto domains = GetDomains();
+ return !sitekeys && (!domains || (*domains)[u""_str]);
+}
+
+OwnedString ActiveFilter::Serialize() const
+{
+ /* TODO this is very inefficient */
+ OwnedString result(Filter::Serialize());
+ if (mDisabled)
+ result.append(u"disabled=true\n"_str);
+ if (mHitCount)
+ {
+ result.append(u"hitCount="_str);
+ result.append(to_string(mHitCount));
+ result.append(u'\n');
+ }
+ if (mLastHit)
+ {
+ result.append(u"lastHit="_str);
+ result.append(to_string(mLastHit));
+ result.append(u'\n');
+ }
+ return std::move(result);
+}

Powered by Google App Engine
This is Rietveld