| Index: compiled/StringMap.h |
| =================================================================== |
| new file mode 100644 |
| --- /dev/null |
| +++ b/compiled/StringMap.h |
| @@ -0,0 +1,251 @@ |
| +#pragma once |
| + |
| +#include <cstddef> |
| +#include <cmath> |
| +#include <initializer_list> |
| + |
| +#include "String.h" |
| +#include "debug.h" |
| + |
| +template<typename Entry> |
| +struct _HashContainerIterator |
|
sergei
2016/02/17 12:54:57
What do you think about moving all internal struct
Wladimir Palant
2016/02/18 16:07:07
Done.
|
| +{ |
| + typedef Entry entry_type; |
| + typedef _HashContainerIterator<Entry> iterator; |
| + |
| + const entry_type* mPos; |
| + const entry_type* mEnd; |
| + |
| + explicit _HashContainerIterator(const entry_type* start, const entry_type* end) |
| + : mPos(start), mEnd(end) |
| + { |
| + if (mPos != mEnd && mPos->first.is_invalid()) |
| + ++(*this); |
| + } |
| + |
| + _HashContainerIterator() |
| + { |
|
sergei
2016/02/17 12:54:55
It seems this destructor code is not needed.
Wladimir Palant
2016/02/18 16:07:03
Done.
|
| + } |
| + |
| + const entry_type& operator*() const |
| + { |
| + return *mPos; |
| + } |
| + |
| + const entry_type* operator->() const |
| + { |
| + return mPos; |
| + } |
| + |
| + iterator& operator++() |
| + { |
| + do { |
| + ++mPos; |
| + } while(mPos != mEnd && mPos->first.is_invalid()); |
| + return *this; |
| + } |
| + |
| + bool operator==(const iterator& it) const |
| + { |
| + return mPos == it.mPos; |
| + } |
| + |
| + bool operator!=(const iterator& it) const |
| + { |
| + return mPos != it.mPos; |
| + } |
| +}; |
| + |
| +template<typename Entry> |
| +class _HashContainer |
| +{ |
| +public: |
| + typedef Entry entry_type; |
| + typedef size_t size_type; |
| + typedef _HashContainerIterator<Entry> iterator; |
|
sergei
2016/02/17 12:55:01
What do you think about renaming it to `const_iter
Wladimir Palant
2016/02/18 16:07:09
Done.
|
| + |
| +private: |
| + _HashContainer(const _HashContainer& other) {} |
| + void operator=(const _HashContainer& other) {} |
|
sergei
2016/02/17 12:54:59
I guess, it's "noncopyable", it would be better t
Wladimir Palant
2016/02/18 16:07:04
Done.
|
| + |
| +protected: |
| + static constexpr size_type MIN_BUCKETS = 1; |
| + static constexpr double LOAD_FACTOR = 0.8; |
| + entry_type* mBuckets; |
|
sergei
2016/02/17 12:55:03
It can be std::unique_ptr<entry_type[]>, however I
Wladimir Palant
2016/02/18 16:07:05
Done.
|
| + size_type mBucketCount; |
| + size_type mEntryCount; |
| + |
| + explicit _HashContainer(size_type expectedEntries = 0) |
| + : mEntryCount(0) |
| + { |
| + expectedEntries = ceil(expectedEntries / LOAD_FACTOR); |
| + mBucketCount = MIN_BUCKETS; |
| + while (mBucketCount < expectedEntries) |
| + mBucketCount <<= 1; |
| + |
| + mBuckets = new entry_type[mBucketCount]; |
| + // Working around https://github.com/waywardmonkeys/emscripten-trace-collector/issues/2 here |
| + annotate_address(reinterpret_cast<size_type*>(mBuckets) - 1, "Hash table buffer"); |
| + } |
| + |
| + ~_HashContainer() |
| + { |
| + delete[] mBuckets; |
| + } |
| + |
| + static size_type hash(const String& str) |
| + { |
| + // FNV-1a hash function |
| + size_type result = 2166136261; |
| + for (String::size_type i = 0; i < str.length(); i++) |
| + result = (result ^ str[i]) * 16777619; |
| + return result; |
| + } |
| + |
| + entry_type* find_entry(const String& key) const |
| + { |
| + size_type h = hash(key); |
| + |
| + // This does quadratic probing, effectively the following formula is used: |
| + // pos = (hash + 1/2 i + 1/2 i ^ 2) mod bucketCount |
| + for (size_type i = 0; ; ++i) |
| + { |
| + // mBucketCount is 2^n so (h & mBucketCount - 1) is equivalent to |
| + // h % mBucketCount but significantly faster. |
| + entry_type* entry = mBuckets + (h & mBucketCount - 1); |
| + if (entry->first.is_invalid() || entry->first.equals(key)) |
| + return entry; |
| + h += i; |
| + } |
| + } |
| + |
| + void resize(size_type bucketCount) |
| + { |
| + entry_type* oldBuckets = mBuckets; |
|
sergei
2016/02/17 12:55:02
here we can also use std::unique_ptr<[]>
Wladimir Palant
2016/02/18 16:07:12
Done.
|
| + size_type oldCount = mBucketCount; |
| + |
| + mEntryCount = 0; |
|
sergei
2016/02/17 12:54:58
after executing of this method the member `mEntryC
Wladimir Palant
2016/02/18 16:07:08
It comes out identical right now, this is going to
sergei
2016/02/22 12:45:54
Acknowledged.
|
| + mBucketCount = bucketCount; |
| + mBuckets = new entry_type[mBucketCount]; |
| + // Working around https://github.com/waywardmonkeys/emscripten-trace-collector/issues/2 here |
| + annotate_address(reinterpret_cast<size_type*>(mBuckets) - 1, "Hash table buffer"); |
| + |
| + for (size_type i = 0; i < oldCount; i++) |
| + { |
| + entry_type& entry = oldBuckets[i]; |
| + if (!entry.first.is_invalid()) |
| + { |
| + *find_entry(entry.first) = entry; |
|
sergei
2016/02/17 12:54:56
It would be nice to explain it somewhere in commen
Wladimir Palant
2016/02/18 16:07:10
Not sure what these comments should explain but I
|
| + mEntryCount++; |
| + } |
| + } |
| + |
| + delete[] oldBuckets; |
| + } |
| + |
| + entry_type* assign(entry_type* existing, const entry_type& entry) |
| + { |
| + bool added = existing->first.is_invalid(); |
| + *existing = entry; |
|
sergei
2016/02/17 12:55:00
Wouldn't it be better to firstly resize and only t
Wladimir Palant
2016/02/18 16:07:06
I've restructured the code to assign after resizin
sergei
2016/02/22 12:45:51
Acknowledged.
|
| + if (added) |
| + { |
| + mEntryCount++; |
| + if (mEntryCount >= mBucketCount * LOAD_FACTOR) |
| + { |
| + resize(mBucketCount << 1); |
| + existing = find_entry(entry.first); |
| + } |
| + } |
| + return existing; |
| + } |
| + |
| +public: |
| + void insert(const entry_type& entry) |
| + { |
| + assign(find_entry(entry.first), entry); |
| + } |
| + |
| + iterator find(const String& key) const |
| + { |
| + entry_type* entry = find_entry(key); |
| + if (entry->first.is_invalid()) |
| + return end(); |
| + else |
| + return iterator(entry, mBuckets + mBucketCount); |
| + } |
| + |
| + iterator begin() const |
| + { |
| + return iterator(mBuckets, mBuckets + mBucketCount); |
| + } |
| + |
| + iterator end() const |
| + { |
| + return iterator(mBuckets + mBucketCount, mBuckets + mBucketCount); |
| + } |
| +}; |
| + |
| +struct _StringSetEntry |
| +{ |
| + _StringSetEntry() {} |
| + _StringSetEntry(const String& key) |
| + : first(key) |
| + { |
| + } |
| + |
| + DependentString first; |
| +}; |
| + |
| +class StringSet : public _HashContainer<_StringSetEntry> |
| +{ |
| +}; |
| + |
| +template<typename T> |
| +struct _StringMapEntry |
| +{ |
| + _StringMapEntry() {} |
| + _StringMapEntry(const String& key) |
| + : first(key) |
| + { |
| + } |
| + _StringMapEntry(const String& key, T value) |
| + : first(key), second(value) |
| + { |
| + } |
| + |
| + DependentString first; |
| + T second; |
| +}; |
| + |
| +template<typename T> |
| +class StringMap : public _HashContainer<_StringMapEntry<T>> |
| +{ |
| +public: |
| + typedef _HashContainer<_StringMapEntry<T>> super; |
| + typedef typename super::size_type size_type; |
| + typedef typename super::entry_type entry_type; |
| + |
| + explicit StringMap(size_type expectedEntries = 0) |
| + : super(expectedEntries) |
| + { |
| + } |
| + |
| + StringMap(std::initializer_list<entry_type> list) |
| + : super(list.size()) |
| + { |
| + for (auto it = list.begin(); it != list.end(); ++it) |
| + super::insert(*it); |
| + } |
| + |
| + ~StringMap() |
| + { |
| + } |
| + |
| + T& operator[](const String& key) |
|
sergei
2016/02/17 12:55:04
What do you think about having in addition a const
Wladimir Palant
2016/02/18 16:07:11
This is how STL does it, if you don't want to crea
sergei
2016/02/22 12:45:53
Acknowledged.
|
| + { |
| + entry_type* entry = super::find_entry(key); |
| + if (entry->first.is_invalid()) |
| + entry = super::assign(entry, key); |
| + return entry->second; |
| + } |
| +}; |