Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Unified Diff: compiled/StringMap.h

Issue 29333474: Issue 4125 - [emscripten] Convert filter classes to C++ (Closed)
Patch Set: Optimized hash lookup performance a bit Created Feb. 8, 2016, 7:11 p.m.
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: compiled/StringMap.h
===================================================================
new file mode 100644
--- /dev/null
+++ b/compiled/StringMap.h
@@ -0,0 +1,251 @@
+#pragma once
+
+#include <cstddef>
+#include <cmath>
+#include <initializer_list>
+
+#include "String.h"
+#include "debug.h"
+
+template<typename Entry>
+struct _HashContainerIterator
sergei 2016/02/17 12:54:57 What do you think about moving all internal struct
Wladimir Palant 2016/02/18 16:07:07 Done.
+{
+ typedef Entry entry_type;
+ typedef _HashContainerIterator<Entry> iterator;
+
+ const entry_type* mPos;
+ const entry_type* mEnd;
+
+ explicit _HashContainerIterator(const entry_type* start, const entry_type* end)
+ : mPos(start), mEnd(end)
+ {
+ if (mPos != mEnd && mPos->first.is_invalid())
+ ++(*this);
+ }
+
+ _HashContainerIterator()
+ {
sergei 2016/02/17 12:54:55 It seems this destructor code is not needed.
Wladimir Palant 2016/02/18 16:07:03 Done.
+ }
+
+ const entry_type& operator*() const
+ {
+ return *mPos;
+ }
+
+ const entry_type* operator->() const
+ {
+ return mPos;
+ }
+
+ iterator& operator++()
+ {
+ do {
+ ++mPos;
+ } while(mPos != mEnd && mPos->first.is_invalid());
+ return *this;
+ }
+
+ bool operator==(const iterator& it) const
+ {
+ return mPos == it.mPos;
+ }
+
+ bool operator!=(const iterator& it) const
+ {
+ return mPos != it.mPos;
+ }
+};
+
+template<typename Entry>
+class _HashContainer
+{
+public:
+ typedef Entry entry_type;
+ typedef size_t size_type;
+ typedef _HashContainerIterator<Entry> iterator;
sergei 2016/02/17 12:55:01 What do you think about renaming it to `const_iter
Wladimir Palant 2016/02/18 16:07:09 Done.
+
+private:
+ _HashContainer(const _HashContainer& other) {}
+ void operator=(const _HashContainer& other) {}
sergei 2016/02/17 12:54:59 I guess, it's "noncopyable", it would be better t
Wladimir Palant 2016/02/18 16:07:04 Done.
+
+protected:
+ static constexpr size_type MIN_BUCKETS = 1;
+ static constexpr double LOAD_FACTOR = 0.8;
+ entry_type* mBuckets;
sergei 2016/02/17 12:55:03 It can be std::unique_ptr<entry_type[]>, however I
Wladimir Palant 2016/02/18 16:07:05 Done.
+ size_type mBucketCount;
+ size_type mEntryCount;
+
+ explicit _HashContainer(size_type expectedEntries = 0)
+ : mEntryCount(0)
+ {
+ expectedEntries = ceil(expectedEntries / LOAD_FACTOR);
+ mBucketCount = MIN_BUCKETS;
+ while (mBucketCount < expectedEntries)
+ mBucketCount <<= 1;
+
+ mBuckets = new entry_type[mBucketCount];
+ // Working around https://github.com/waywardmonkeys/emscripten-trace-collector/issues/2 here
+ annotate_address(reinterpret_cast<size_type*>(mBuckets) - 1, "Hash table buffer");
+ }
+
+ ~_HashContainer()
+ {
+ delete[] mBuckets;
+ }
+
+ static size_type hash(const String& str)
+ {
+ // FNV-1a hash function
+ size_type result = 2166136261;
+ for (String::size_type i = 0; i < str.length(); i++)
+ result = (result ^ str[i]) * 16777619;
+ return result;
+ }
+
+ entry_type* find_entry(const String& key) const
+ {
+ size_type h = hash(key);
+
+ // This does quadratic probing, effectively the following formula is used:
+ // pos = (hash + 1/2 i + 1/2 i ^ 2) mod bucketCount
+ for (size_type i = 0; ; ++i)
+ {
+ // mBucketCount is 2^n so (h & mBucketCount - 1) is equivalent to
+ // h % mBucketCount but significantly faster.
+ entry_type* entry = mBuckets + (h & mBucketCount - 1);
+ if (entry->first.is_invalid() || entry->first.equals(key))
+ return entry;
+ h += i;
+ }
+ }
+
+ void resize(size_type bucketCount)
+ {
+ entry_type* oldBuckets = mBuckets;
sergei 2016/02/17 12:55:02 here we can also use std::unique_ptr<[]>
Wladimir Palant 2016/02/18 16:07:12 Done.
+ size_type oldCount = mBucketCount;
+
+ mEntryCount = 0;
sergei 2016/02/17 12:54:58 after executing of this method the member `mEntryC
Wladimir Palant 2016/02/18 16:07:08 It comes out identical right now, this is going to
sergei 2016/02/22 12:45:54 Acknowledged.
+ mBucketCount = bucketCount;
+ mBuckets = new entry_type[mBucketCount];
+ // Working around https://github.com/waywardmonkeys/emscripten-trace-collector/issues/2 here
+ annotate_address(reinterpret_cast<size_type*>(mBuckets) - 1, "Hash table buffer");
+
+ for (size_type i = 0; i < oldCount; i++)
+ {
+ entry_type& entry = oldBuckets[i];
+ if (!entry.first.is_invalid())
+ {
+ *find_entry(entry.first) = entry;
sergei 2016/02/17 12:54:56 It would be nice to explain it somewhere in commen
Wladimir Palant 2016/02/18 16:07:10 Not sure what these comments should explain but I
+ mEntryCount++;
+ }
+ }
+
+ delete[] oldBuckets;
+ }
+
+ entry_type* assign(entry_type* existing, const entry_type& entry)
+ {
+ bool added = existing->first.is_invalid();
+ *existing = entry;
sergei 2016/02/17 12:55:00 Wouldn't it be better to firstly resize and only t
Wladimir Palant 2016/02/18 16:07:06 I've restructured the code to assign after resizin
sergei 2016/02/22 12:45:51 Acknowledged.
+ if (added)
+ {
+ mEntryCount++;
+ if (mEntryCount >= mBucketCount * LOAD_FACTOR)
+ {
+ resize(mBucketCount << 1);
+ existing = find_entry(entry.first);
+ }
+ }
+ return existing;
+ }
+
+public:
+ void insert(const entry_type& entry)
+ {
+ assign(find_entry(entry.first), entry);
+ }
+
+ iterator find(const String& key) const
+ {
+ entry_type* entry = find_entry(key);
+ if (entry->first.is_invalid())
+ return end();
+ else
+ return iterator(entry, mBuckets + mBucketCount);
+ }
+
+ iterator begin() const
+ {
+ return iterator(mBuckets, mBuckets + mBucketCount);
+ }
+
+ iterator end() const
+ {
+ return iterator(mBuckets + mBucketCount, mBuckets + mBucketCount);
+ }
+};
+
+struct _StringSetEntry
+{
+ _StringSetEntry() {}
+ _StringSetEntry(const String& key)
+ : first(key)
+ {
+ }
+
+ DependentString first;
+};
+
+class StringSet : public _HashContainer<_StringSetEntry>
+{
+};
+
+template<typename T>
+struct _StringMapEntry
+{
+ _StringMapEntry() {}
+ _StringMapEntry(const String& key)
+ : first(key)
+ {
+ }
+ _StringMapEntry(const String& key, T value)
+ : first(key), second(value)
+ {
+ }
+
+ DependentString first;
+ T second;
+};
+
+template<typename T>
+class StringMap : public _HashContainer<_StringMapEntry<T>>
+{
+public:
+ typedef _HashContainer<_StringMapEntry<T>> super;
+ typedef typename super::size_type size_type;
+ typedef typename super::entry_type entry_type;
+
+ explicit StringMap(size_type expectedEntries = 0)
+ : super(expectedEntries)
+ {
+ }
+
+ StringMap(std::initializer_list<entry_type> list)
+ : super(list.size())
+ {
+ for (auto it = list.begin(); it != list.end(); ++it)
+ super::insert(*it);
+ }
+
+ ~StringMap()
+ {
+ }
+
+ T& operator[](const String& key)
sergei 2016/02/17 12:55:04 What do you think about having in addition a const
Wladimir Palant 2016/02/18 16:07:11 This is how STL does it, if you don't want to crea
sergei 2016/02/22 12:45:53 Acknowledged.
+ {
+ entry_type* entry = super::find_entry(key);
+ if (entry->first.is_invalid())
+ entry = super::assign(entry, key);
+ return entry->second;
+ }
+};

Powered by Google App Engine
This is Rietveld