| Left: | ||
| Right: | 
| LEFT | RIGHT | 
|---|---|
| 1 #ifndef ADBLOCK_PLUS_STRING_MAP_H | 1 #pragma once | 
| 2 #define ADBLOCK_PLUS_STRING_MAP_H | |
| 3 | 2 | 
| 4 #include <cstddef> | 3 #include <cstddef> | 
| 5 #include <cmath> | 4 #include <cmath> | 
| 6 #include <initializer_list> | 5 #include <initializer_list> | 
| 6 #include <memory> | |
| 7 | 7 | 
| 8 #include "String.h" | 8 #include "String.h" | 
| 9 #include "debug.h" | 9 #include "debug.h" | 
| 10 | 10 | 
| 11 template<class Entry> | 11 template<typename T> | 
| 
 
René Jeschke
2016/02/02 11:11:22
Nit: The usage of `class` for declaring template p
 
Wladimir Palant
2016/02/02 17:55:18
Done.
 
 | |
| 12 struct _HashContainerIterator | 12 class StringMap; | 
| 13 | |
| 14 namespace StringMap_internal | |
| 13 { | 15 { | 
| 14 typedef Entry entry_type; | 16 template<typename Entry> | 
| 15 typedef _HashContainerIterator<Entry> iterator; | 17 struct HashContainerIterator | 
| 16 | 18 { | 
| 17 const entry_type* mPos; | 19 typedef Entry entry_type; | 
| 18 const entry_type* mEnd; | 20 typedef HashContainerIterator<Entry> iterator; | 
| 19 | 21 | 
| 20 explicit _HashContainerIterator(const entry_type* start, const entry_type* end ) | 22 const entry_type* mPos; | 
| 21 : mPos(start), mEnd(end) | 23 const entry_type* mEnd; | 
| 22 { | 24 | 
| 23 if (mPos != mEnd && mPos->first.is_invalid()) | 25 explicit HashContainerIterator(const entry_type* start, const entry_type* en d) | 
| 24 ++(*this); | 26 : mPos(start), mEnd(end) | 
| 25 } | 27 { | 
| 26 | 28 if (mPos != mEnd && mPos->first.is_invalid()) | 
| 27 _HashContainerIterator() | 29 ++(*this); | 
| 28 { | 30 } | 
| 29 } | 31 | 
| 30 | 32 const entry_type& operator*() const | 
| 31 const entry_type& operator*() const | 33 { | 
| 32 { | 34 return *mPos; | 
| 33 return *mPos; | 35 } | 
| 34 } | 36 | 
| 35 | 37 const entry_type* operator->() const | 
| 36 const entry_type* operator->() const | 38 { | 
| 37 { | 39 return mPos; | 
| 38 return mPos; | 40 } | 
| 39 } | 41 | 
| 40 | 42 iterator& operator++() | 
| 41 iterator& operator++() | 43 { | 
| 42 { | 44 do { | 
| 43 do { | 45 ++mPos; | 
| 44 ++mPos; | 46 } while(mPos != mEnd && mPos->first.is_invalid()); | 
| 45 } while(mPos != mEnd && mPos->first.is_invalid()); | 47 return *this; | 
| 46 return *this; | 48 } | 
| 47 } | 49 | 
| 48 | 50 bool operator==(const iterator& it) const | 
| 49 bool operator==(const iterator& it) const | 51 { | 
| 50 { | 52 return mPos == it.mPos; | 
| 51 return mPos == it.mPos; | 53 } | 
| 52 } | 54 | 
| 53 | 55 bool operator!=(const iterator& it) const | 
| 54 bool operator!=(const iterator& it) const | 56 { | 
| 55 { | 57 return mPos != it.mPos; | 
| 56 return mPos != it.mPos; | 58 } | 
| 57 } | 59 }; | 
| 60 | |
| 61 template<typename Entry> | |
| 62 struct HashContainerReference | |
| 63 { | |
| 64 typedef Entry entry_type; | |
| 65 | |
| 66 entry_type* mEntry; | |
| 67 | |
| 68 explicit HashContainerReference(entry_type* entry) | |
| 69 : mEntry(entry) | |
| 70 { | |
| 71 } | |
| 72 | |
| 73 const entry_type* operator->() const | |
| 74 { | |
| 75 return mEntry; | |
| 76 } | |
| 77 | |
| 78 operator bool() const | |
| 79 { | |
| 80 return !mEntry->first.is_invalid(); | |
| 81 } | |
| 82 }; | |
| 83 | |
| 84 template<typename Entry> | |
| 85 class HashContainer | |
| 86 { | |
| 87 public: | |
| 88 typedef Entry entry_type; | |
| 89 typedef size_t size_type; | |
| 90 typedef HashContainerIterator<Entry> const_iterator; | |
| 91 typedef HashContainerReference<const Entry> const_reference; | |
| 92 | |
| 93 private: | |
| 94 explicit HashContainer(const HashContainer& other); | |
| 95 void operator=(const HashContainer& other); | |
| 96 | |
| 97 protected: | |
| 98 static constexpr size_type MIN_BUCKETS = 1; | |
| 99 static constexpr double LOAD_FACTOR = 0.8; | |
| 100 std::unique_ptr<entry_type[]> mBuckets; | |
| 101 size_type mBucketCount; | |
| 102 size_type mEntryCount; | |
| 103 | |
| 104 #if defined(DEBUG) | |
| 105 size_type mInsertCounter; | |
| 106 #endif | |
| 107 | |
| 108 explicit HashContainer(size_type expectedEntries = 0) | |
| 109 : mEntryCount(0) | |
| 110 { | |
| 111 expectedEntries = ceil(expectedEntries / LOAD_FACTOR); | |
| 112 mBucketCount = MIN_BUCKETS; | |
| 113 while (mBucketCount < expectedEntries) | |
| 114 mBucketCount <<= 1; | |
| 115 | |
| 116 mBuckets.reset(new entry_type[mBucketCount]); | |
| 117 // Working around https://github.com/waywardmonkeys/emscripten-trace-colle ctor/issues/2 here | |
| 118 annotate_address(reinterpret_cast<size_type*>(mBuckets.get()) - 1, "Hash t able buffer"); | |
| 119 } | |
| 120 | |
| 121 static size_type hash(const String& str) | |
| 122 { | |
| 123 // FNV-1a hash function | |
| 124 size_type result = 2166136261; | |
| 125 for (String::size_type i = 0; i < str.length(); i++) | |
| 126 result = (result ^ str[i]) * 16777619; | |
| 127 return result; | |
| 128 } | |
| 129 | |
| 130 entry_type* find_bucket(const String& key) const | |
| 131 { | |
| 132 size_type h = hash(key); | |
| 133 | |
| 134 // This does quadratic probing, effectively the following formula is used: | |
| 135 // pos = (hash + 1/2 i + 1/2 i ^ 2) mod bucketCount | |
| 136 for (size_type i = 0; ; ++i) | |
| 137 { | |
| 138 // mBucketCount is 2^n so (h & mBucketCount - 1) is equivalent to | |
| 139 // h % mBucketCount but significantly faster. | |
| 140 entry_type* entry = &mBuckets[h & (mBucketCount - 1)]; | |
| 141 if (entry->first.is_invalid() || entry->first.equals(key)) | |
| 142 return entry; | |
| 143 h += i; | |
| 144 } | |
| 145 } | |
| 146 | |
| 147 void resize(size_type bucketCount) | |
| 148 { | |
| 149 std::unique_ptr<entry_type[]> oldBuckets(std::move(mBuckets)); | |
| 150 size_type oldCount = mBucketCount; | |
| 151 | |
| 152 mEntryCount = 0; | |
| 153 mBucketCount = bucketCount; | |
| 154 mBuckets.reset(new entry_type[mBucketCount]); | |
| 155 // Working around https://github.com/waywardmonkeys/emscripten-trace-colle ctor/issues/2 here | |
| 156 annotate_address(reinterpret_cast<size_type*>(mBuckets.get()) - 1, "Hash t able buffer"); | |
| 157 | |
| 158 // Copy old entries into the new buffer | |
| 159 for (size_type i = 0; i < oldCount; i++) | |
| 160 { | |
| 161 entry_type& entry = oldBuckets[i]; | |
| 162 if (!entry.first.is_invalid() && !entry.first.is_deleted()) | |
| 163 { | |
| 164 *find_bucket(entry.first) = entry; | |
| 165 mEntryCount++; | |
| 166 } | |
| 167 } | |
| 168 } | |
| 169 | |
| 170 entry_type* assign(entry_type* existing, const entry_type& entry) | |
| 171 { | |
| 172 if (existing->first.is_invalid()) | |
| 173 { | |
| 174 if (mEntryCount + 1 >= mBucketCount * LOAD_FACTOR) | |
| 175 { | |
| 176 resize(mBucketCount << 1); | |
| 177 existing = find_bucket(entry.first); | |
| 178 } | |
| 179 mEntryCount++; | |
| 180 #if defined(DEBUG) | |
| 181 mInsertCounter++; | |
| 182 #endif | |
| 183 } | |
| 184 *existing = entry; | |
| 185 return existing; | |
| 186 } | |
| 187 | |
| 188 public: | |
| 189 void insert(const entry_type& entry) | |
| 190 { | |
| 191 assign(find_bucket(entry.first), entry); | |
| 192 } | |
| 193 | |
| 194 bool erase(const String& key) | |
| 195 { | |
| 196 entry_type* entry = find_bucket(key); | |
| 197 if (entry->first.is_invalid()) | |
| 198 return false; | |
| 199 | |
| 200 entry->first.erase(); | |
| 201 return true; | |
| 202 } | |
| 203 | |
| 204 const_reference find(const String& key) const | |
| 205 { | |
| 206 return const_reference(find_bucket(key)); | |
| 207 } | |
| 208 | |
| 209 const_iterator begin() const | |
| 210 { | |
| 211 return const_iterator(&mBuckets[0], &mBuckets[mBucketCount]); | |
| 212 } | |
| 213 | |
| 214 const_iterator end() const | |
| 215 { | |
| 216 return const_iterator(&mBuckets[mBucketCount], &mBuckets[mBucketCount]); | |
| 217 } | |
| 218 }; | |
| 219 | |
| 220 struct StringSetEntry | |
| 221 { | |
| 222 StringSetEntry() {} | |
| 223 StringSetEntry(const String& key) | |
| 224 : first(key) | |
| 225 { | |
| 226 } | |
| 227 | |
| 228 DependentString first; | |
| 229 }; | |
| 230 | |
| 231 template<typename T> | |
| 232 struct StringMapEntry | |
| 233 { | |
| 234 StringMapEntry() {} | |
| 235 StringMapEntry(const String& key) | |
| 236 : first(key), second() | |
| 237 { | |
| 238 } | |
| 239 StringMapEntry(const String& key, T value) | |
| 240 : first(key), second(value) | |
| 241 { | |
| 242 } | |
| 243 | |
| 244 DependentString first; | |
| 245 T second; | |
| 246 }; | |
| 247 | |
| 248 template<typename T> | |
| 249 struct StringMapEntryReference : public HashContainerReference<StringMapEntry< T>> | |
| 250 { | |
| 251 typedef HashContainerReference<StringMapEntry<T>> super; | |
| 252 typedef typename super::entry_type entry_type; | |
| 253 typedef StringMap<T> map_type; | |
| 254 | |
| 255 map_type* mMap; | |
| 256 | |
| 257 #if defined(DEBUG) | |
| 258 typename map_type::size_type mInsertCounter; | |
| 259 typename map_type::size_type mHash; | |
| 260 #endif | |
| 261 | |
| 262 StringMapEntryReference(map_type* map, const String& key, entry_type* entry) | |
| 263 : super(entry), mMap(map) | |
| 264 { | |
| 265 #if defined(DEBUG) | |
| 266 mInsertCounter = mMap->mInsertCounter; | |
| 267 mHash = mMap->hash(key); | |
| 268 #endif | |
| 269 } | |
| 270 | |
| 271 void assign(const String& key, const T& value) | |
| 272 { | |
| 273 #if defined(DEBUG) | |
| 274 assert(mInsertCounter == mMap->mInsertCounter, | |
| 275 u"There should be no insert operations performed between map.find() an d assign()"_str); | |
| 276 assert(mHash == mMap->hash(key), | |
| 277 u"The keys used in map.find() and assign() should be identical"_str); | |
| 278 #endif | |
| 279 | |
| 280 mMap->assign(this->mEntry, entry_type(key, value)); | |
| 281 } | |
| 282 }; | |
| 283 } | |
| 284 | |
| 285 class StringSet | |
| 286 : public StringMap_internal::HashContainer<StringMap_internal::StringSetEntry> | |
| 287 { | |
| 58 }; | 288 }; | 
| 59 | 289 | 
| 60 template<class Entry> | 290 template<typename T> | 
| 61 class _HashContainer | 291 class StringMap | 
| 292 : public StringMap_internal::HashContainer<StringMap_internal::StringMapEntry< T>> | |
| 62 { | 293 { | 
| 63 public: | 294 public: | 
| 64 typedef Entry entry_type; | 295 typedef StringMap_internal::HashContainer<StringMap_internal::StringMapEntry<T >> super; | 
| 65 typedef size_t size_type; | |
| 66 typedef _HashContainerIterator<Entry> iterator; | |
| 67 | |
| 68 private: | |
| 69 _HashContainer(const _HashContainer& other) {} | |
| 70 void operator=(const _HashContainer& other) {} | |
| 71 | |
| 72 protected: | |
| 73 static constexpr size_type MIN_BUCKETS = 1; | |
| 74 static constexpr double LOAD_FACTOR = 0.8; | |
| 75 entry_type* mBuckets; | |
| 76 size_type mBucketCount; | |
| 77 size_type mEntryCount; | |
| 78 | |
| 79 explicit _HashContainer(size_type expectedEntries = 0) | |
| 80 : mEntryCount(0) | |
| 81 { | |
| 82 expectedEntries = ceil(expectedEntries / LOAD_FACTOR); | |
| 83 mBucketCount = MIN_BUCKETS; | |
| 84 while (mBucketCount < expectedEntries) | |
| 85 mBucketCount <<= 1; | |
| 86 | |
| 87 mBuckets = new entry_type[mBucketCount]; | |
| 88 // Working around https://github.com/waywardmonkeys/emscripten-trace-collect or/issues/2 here | |
| 89 annotate_address(reinterpret_cast<size_type*>(mBuckets) - 1, "Hash table buf fer"); | |
| 90 } | |
| 91 | |
| 92 ~_HashContainer() | |
| 93 { | |
| 94 delete[] mBuckets; | |
| 95 } | |
| 96 | |
| 97 static size_type hash(const String& str) | |
| 98 { | |
| 99 // FNV-1a hash function | |
| 100 size_type result = 2166136261; | |
| 101 for (String::size_type i = 0; i < str.length(); i++) | |
| 102 result = (result ^ str[i]) * 16777619; | |
| 103 return result; | |
| 104 } | |
| 105 | |
| 106 entry_type* find_entry(const String& key) const | |
| 107 { | |
| 108 size_type h = hash(key); | |
| 109 | |
| 110 // This does quadratic probing, effectively the following formula is used: | |
| 111 // pos = (hash + 1/2 i + 1/2 i ^ 2) mod bucketCount | |
| 112 for (size_type i = 0; ; ++i) | |
| 113 { | |
| 114 entry_type* entry = mBuckets + h % mBucketCount; | |
| 115 if (entry->first.is_invalid() || entry->first.equals(key)) | |
| 116 return entry; | |
| 117 h += i; | |
| 118 } | |
| 119 } | |
| 120 | |
| 121 void resize(size_type bucketCount) | |
| 122 { | |
| 123 entry_type* oldBuckets = mBuckets; | |
| 124 size_type oldCount = mBucketCount; | |
| 125 | |
| 126 mEntryCount = 0; | |
| 127 mBucketCount = bucketCount; | |
| 128 mBuckets = new entry_type[mBucketCount]; | |
| 129 // Working around https://github.com/waywardmonkeys/emscripten-trace-collect or/issues/2 here | |
| 130 annotate_address(reinterpret_cast<size_type*>(mBuckets) - 1, "Hash table buf fer"); | |
| 131 | |
| 132 for (size_type i = 0; i < oldCount; i++) | |
| 133 { | |
| 134 entry_type& entry = oldBuckets[i]; | |
| 135 if (!entry.first.is_invalid()) | |
| 136 { | |
| 137 *find_entry(entry.first) = entry; | |
| 138 mEntryCount++; | |
| 139 } | |
| 140 } | |
| 141 | |
| 142 delete[] oldBuckets; | |
| 143 } | |
| 144 | |
| 145 entry_type* assign(entry_type* existing, const entry_type& entry) | |
| 146 { | |
| 147 bool added = existing->first.is_invalid(); | |
| 148 *existing = entry; | |
| 149 if (added) | |
| 150 { | |
| 151 mEntryCount++; | |
| 152 if (mEntryCount >= mBucketCount * LOAD_FACTOR) | |
| 153 { | |
| 154 resize(mBucketCount << 1); | |
| 155 existing = find_entry(entry.first); | |
| 156 } | |
| 157 } | |
| 158 return existing; | |
| 159 } | |
| 160 | |
| 161 public: | |
| 162 void insert(const entry_type& entry) | |
| 163 { | |
| 164 assign(find_entry(entry.first), entry); | |
| 165 } | |
| 166 | |
| 167 iterator find(const String& key) const | |
| 168 { | |
| 169 entry_type* entry = find_entry(key); | |
| 170 if (entry->first.is_invalid()) | |
| 171 return end(); | |
| 172 else | |
| 173 return iterator(entry, mBuckets + mBucketCount); | |
| 174 } | |
| 175 | |
| 176 iterator begin() const | |
| 177 { | |
| 178 return iterator(mBuckets, mBuckets + mBucketCount); | |
| 179 } | |
| 180 | |
| 181 iterator end() const | |
| 182 { | |
| 183 return iterator(mBuckets + mBucketCount, mBuckets + mBucketCount); | |
| 184 } | |
| 185 }; | |
| 186 | |
| 187 struct _StringSetEntry | |
| 188 { | |
| 189 _StringSetEntry() {} | |
| 190 _StringSetEntry(const String& key) | |
| 191 : first(key) | |
| 192 { | |
| 193 } | |
| 194 | |
| 195 String first; | |
| 196 }; | |
| 197 | |
| 198 class StringSet : public _HashContainer<_StringSetEntry> | |
| 199 { | |
| 200 }; | |
| 201 | |
| 202 template<class T> | |
| 203 struct _StringMapEntry | |
| 204 { | |
| 205 _StringMapEntry() {} | |
| 206 _StringMapEntry(const String& key) | |
| 207 : first(key) | |
| 208 { | |
| 209 } | |
| 210 _StringMapEntry(const String& key, T value) | |
| 211 : first(key), second(value) | |
| 212 { | |
| 213 } | |
| 214 | |
| 215 String first; | |
| 216 T second; | |
| 217 }; | |
| 218 | |
| 219 template<class T> | |
| 220 class StringMap : public _HashContainer<_StringMapEntry<T>> | |
| 221 { | |
| 222 public: | |
| 223 typedef _HashContainer<_StringMapEntry<T>> super; | |
| 224 typedef typename super::size_type size_type; | 296 typedef typename super::size_type size_type; | 
| 225 typedef typename super::entry_type entry_type; | 297 typedef typename super::entry_type entry_type; | 
| 298 typedef typename super::const_reference const_reference; | |
| 299 typedef StringMap_internal::StringMapEntryReference<T> reference; | |
| 300 friend class StringMap_internal::StringMapEntryReference<T>; | |
| 226 | 301 | 
| 227 explicit StringMap(size_type expectedEntries = 0) | 302 explicit StringMap(size_type expectedEntries = 0) | 
| 228 : super(expectedEntries) | 303 : super(expectedEntries) | 
| 229 { | 304 { | 
| 230 } | 305 } | 
| 231 | 306 | 
| 232 StringMap(std::initializer_list<entry_type> list) | 307 StringMap(std::initializer_list<entry_type> list) | 
| 233 : super(list.size()) | 308 : super(list.size()) | 
| 234 { | 309 { | 
| 235 for (auto it = list.begin(); it != list.end(); ++it) | 310 for (auto it = list.begin(); it != list.end(); ++it) | 
| 236 super::insert(*it); | 311 super::insert(*it); | 
| 237 } | 312 } | 
| 238 | 313 | 
| 239 ~StringMap() | 314 ~StringMap() | 
| 240 { | 315 { | 
| 241 } | 316 } | 
| 242 | 317 | 
| 243 T& operator[](const String& key) | 318 T& operator[](const String& key) | 
| 244 { | 319 { | 
| 245 entry_type* entry = super::find_entry(key); | 320 entry_type* entry = super::find_bucket(key); | 
| 246 if (entry->first.is_invalid()) | 321 if (entry->first.is_invalid()) | 
| 247 entry = super::assign(entry, key); | 322 entry = super::assign(entry, key); | 
| 248 return entry->second; | 323 return entry->second; | 
| 249 } | 324 } | 
| 325 | |
| 326 const_reference find(const String& key) const | |
| 327 { | |
| 328 return super::find(key); | |
| 329 } | |
| 330 | |
| 331 reference find(const String& key) | |
| 332 { | |
| 333 return reference(this, key, super::find_bucket(key)); | |
| 334 } | |
| 250 }; | 335 }; | 
| 251 | |
| 252 #endif | |
| LEFT | RIGHT |