Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Delta Between Two Patch Sets: compiled/StringMap.h

Issue 29333474: Issue 4125 - [emscripten] Convert filter classes to C++ (Closed)
Left Patch Set: Optimized hash lookup performance a bit Created Feb. 8, 2016, 7:11 p.m.
Right Patch Set: Addressed comments from Patch Set 28 Created March 21, 2017, 10:04 a.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
Left: Side by side diff | Download
Right: Side by side diff | Download
« no previous file with change/comment | « compiled/String.h ('k') | compiled/StringScanner.h » ('j') | no next file with change/comment »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
LEFTRIGHT
1 #pragma once 1 #pragma once
2 2
3 #include <cstddef> 3 #include <cstddef>
4 #include <cmath> 4 #include <cmath>
5 #include <initializer_list> 5 #include <initializer_list>
6 #include <memory>
6 7
7 #include "String.h" 8 #include "String.h"
8 #include "debug.h" 9 #include "debug.h"
9 10
10 template<typename Entry> 11 template<typename T>
11 struct _HashContainerIterator 12 class StringMap;
sergei 2016/02/17 12:54:57 What do you think about moving all internal struct
Wladimir Palant 2016/02/18 16:07:07 Done.
13
14 namespace StringMap_internal
12 { 15 {
13 typedef Entry entry_type; 16 template<typename Entry>
14 typedef _HashContainerIterator<Entry> iterator; 17 struct HashContainerIterator
15 18 {
16 const entry_type* mPos; 19 typedef Entry entry_type;
17 const entry_type* mEnd; 20 typedef HashContainerIterator<Entry> iterator;
18 21
19 explicit _HashContainerIterator(const entry_type* start, const entry_type* end ) 22 const entry_type* mPos;
20 : mPos(start), mEnd(end) 23 const entry_type* mEnd;
21 { 24
22 if (mPos != mEnd && mPos->first.is_invalid()) 25 explicit HashContainerIterator(const entry_type* start, const entry_type* en d)
23 ++(*this); 26 : mPos(start), mEnd(end)
24 } 27 {
25 28 if (mPos != mEnd && mPos->first.is_invalid())
26 _HashContainerIterator() 29 ++(*this);
27 { 30 }
sergei 2016/02/17 12:54:55 It seems this destructor code is not needed.
Wladimir Palant 2016/02/18 16:07:03 Done.
28 } 31
29 32 const entry_type& operator*() const
30 const entry_type& operator*() const 33 {
31 { 34 return *mPos;
32 return *mPos; 35 }
33 } 36
34 37 const entry_type* operator->() const
35 const entry_type* operator->() const 38 {
36 { 39 return mPos;
37 return mPos; 40 }
38 } 41
39 42 iterator& operator++()
40 iterator& operator++() 43 {
41 { 44 do {
42 do { 45 ++mPos;
43 ++mPos; 46 } while(mPos != mEnd && mPos->first.is_invalid());
44 } while(mPos != mEnd && mPos->first.is_invalid()); 47 return *this;
45 return *this; 48 }
46 } 49
47 50 bool operator==(const iterator& it) const
48 bool operator==(const iterator& it) const 51 {
49 { 52 return mPos == it.mPos;
50 return mPos == it.mPos; 53 }
51 } 54
52 55 bool operator!=(const iterator& it) const
53 bool operator!=(const iterator& it) const 56 {
54 { 57 return mPos != it.mPos;
55 return mPos != it.mPos; 58 }
56 } 59 };
60
61 template<typename Entry>
62 struct HashContainerReference
63 {
64 typedef Entry entry_type;
65
66 entry_type* mEntry;
67
68 explicit HashContainerReference(entry_type* entry)
69 : mEntry(entry)
70 {
71 }
72
73 const entry_type* operator->() const
74 {
75 return mEntry;
76 }
77
78 operator bool() const
79 {
80 return !mEntry->first.is_invalid();
81 }
82 };
83
84 template<typename Entry>
85 class HashContainer
86 {
87 public:
88 typedef Entry entry_type;
89 typedef size_t size_type;
90 typedef HashContainerIterator<Entry> const_iterator;
91 typedef HashContainerReference<const Entry> const_reference;
92
93 private:
94 explicit HashContainer(const HashContainer& other);
95 void operator=(const HashContainer& other);
96
97 protected:
98 static constexpr size_type MIN_BUCKETS = 1;
99 static constexpr double LOAD_FACTOR = 0.8;
100 std::unique_ptr<entry_type[]> mBuckets;
101 size_type mBucketCount;
102 size_type mEntryCount;
103
104 #if defined(DEBUG)
105 size_type mInsertCounter;
106 #endif
107
108 explicit HashContainer(size_type expectedEntries = 0)
109 : mEntryCount(0)
110 {
111 expectedEntries = ceil(expectedEntries / LOAD_FACTOR);
112 mBucketCount = MIN_BUCKETS;
113 while (mBucketCount < expectedEntries)
114 mBucketCount <<= 1;
115
116 mBuckets.reset(new entry_type[mBucketCount]);
117 // Working around https://github.com/waywardmonkeys/emscripten-trace-colle ctor/issues/2 here
118 annotate_address(reinterpret_cast<size_type*>(mBuckets.get()) - 1, "Hash t able buffer");
119 }
120
121 static size_type hash(const String& str)
122 {
123 // FNV-1a hash function
124 size_type result = 2166136261;
125 for (String::size_type i = 0; i < str.length(); i++)
126 result = (result ^ str[i]) * 16777619;
127 return result;
128 }
129
130 entry_type* find_bucket(const String& key) const
131 {
132 size_type h = hash(key);
133
134 // This does quadratic probing, effectively the following formula is used:
135 // pos = (hash + 1/2 i + 1/2 i ^ 2) mod bucketCount
136 for (size_type i = 0; ; ++i)
137 {
138 // mBucketCount is 2^n so (h & mBucketCount - 1) is equivalent to
139 // h % mBucketCount but significantly faster.
140 entry_type* entry = &mBuckets[h & (mBucketCount - 1)];
141 if (entry->first.is_invalid() || entry->first.equals(key))
142 return entry;
143 h += i;
144 }
145 }
146
147 void resize(size_type bucketCount)
148 {
149 std::unique_ptr<entry_type[]> oldBuckets(std::move(mBuckets));
150 size_type oldCount = mBucketCount;
151
152 mEntryCount = 0;
153 mBucketCount = bucketCount;
154 mBuckets.reset(new entry_type[mBucketCount]);
155 // Working around https://github.com/waywardmonkeys/emscripten-trace-colle ctor/issues/2 here
156 annotate_address(reinterpret_cast<size_type*>(mBuckets.get()) - 1, "Hash t able buffer");
157
158 // Copy old entries into the new buffer
159 for (size_type i = 0; i < oldCount; i++)
160 {
161 entry_type& entry = oldBuckets[i];
162 if (!entry.first.is_invalid() && !entry.first.is_deleted())
163 {
164 *find_bucket(entry.first) = entry;
165 mEntryCount++;
166 }
167 }
168 }
169
170 entry_type* assign(entry_type* existing, const entry_type& entry)
171 {
172 if (existing->first.is_invalid())
173 {
174 if (mEntryCount + 1 >= mBucketCount * LOAD_FACTOR)
175 {
176 resize(mBucketCount << 1);
177 existing = find_bucket(entry.first);
178 }
179 mEntryCount++;
180 #if defined(DEBUG)
181 mInsertCounter++;
182 #endif
183 }
184 *existing = entry;
185 return existing;
186 }
187
188 public:
189 void insert(const entry_type& entry)
190 {
191 assign(find_bucket(entry.first), entry);
192 }
193
194 bool erase(const String& key)
195 {
196 entry_type* entry = find_bucket(key);
197 if (entry->first.is_invalid())
198 return false;
199
200 entry->first.erase();
201 return true;
202 }
203
204 const_reference find(const String& key) const
205 {
206 return const_reference(find_bucket(key));
207 }
208
209 const_iterator begin() const
210 {
211 return const_iterator(&mBuckets[0], &mBuckets[mBucketCount]);
212 }
213
214 const_iterator end() const
215 {
216 return const_iterator(&mBuckets[mBucketCount], &mBuckets[mBucketCount]);
217 }
218 };
219
220 struct StringSetEntry
221 {
222 StringSetEntry() {}
223 StringSetEntry(const String& key)
224 : first(key)
225 {
226 }
227
228 DependentString first;
229 };
230
231 template<typename T>
232 struct StringMapEntry
233 {
234 StringMapEntry() {}
235 StringMapEntry(const String& key)
236 : first(key), second()
237 {
238 }
239 StringMapEntry(const String& key, T value)
240 : first(key), second(value)
241 {
242 }
243
244 DependentString first;
245 T second;
246 };
247
248 template<typename T>
249 struct StringMapEntryReference : public HashContainerReference<StringMapEntry< T>>
250 {
251 typedef HashContainerReference<StringMapEntry<T>> super;
252 typedef typename super::entry_type entry_type;
253 typedef StringMap<T> map_type;
254
255 map_type* mMap;
256
257 #if defined(DEBUG)
258 typename map_type::size_type mInsertCounter;
259 typename map_type::size_type mHash;
260 #endif
261
262 StringMapEntryReference(map_type* map, const String& key, entry_type* entry)
263 : super(entry), mMap(map)
264 {
265 #if defined(DEBUG)
266 mInsertCounter = mMap->mInsertCounter;
267 mHash = mMap->hash(key);
268 #endif
269 }
270
271 void assign(const String& key, const T& value)
272 {
273 #if defined(DEBUG)
274 assert(mInsertCounter == mMap->mInsertCounter,
275 u"There should be no insert operations performed between map.find() an d assign()"_str);
276 assert(mHash == mMap->hash(key),
277 u"The keys used in map.find() and assign() should be identical"_str);
278 #endif
279
280 mMap->assign(this->mEntry, entry_type(key, value));
281 }
282 };
283 }
284
285 class StringSet
286 : public StringMap_internal::HashContainer<StringMap_internal::StringSetEntry>
287 {
57 }; 288 };
58 289
59 template<typename Entry> 290 template<typename T>
60 class _HashContainer 291 class StringMap
292 : public StringMap_internal::HashContainer<StringMap_internal::StringMapEntry< T>>
61 { 293 {
62 public: 294 public:
63 typedef Entry entry_type; 295 typedef StringMap_internal::HashContainer<StringMap_internal::StringMapEntry<T >> super;
64 typedef size_t size_type;
65 typedef _HashContainerIterator<Entry> iterator;
sergei 2016/02/17 12:55:01 What do you think about renaming it to `const_iter
Wladimir Palant 2016/02/18 16:07:09 Done.
66
67 private:
68 _HashContainer(const _HashContainer& other) {}
69 void operator=(const _HashContainer& other) {}
sergei 2016/02/17 12:54:59 I guess, it's "noncopyable", it would be better t
Wladimir Palant 2016/02/18 16:07:04 Done.
70
71 protected:
72 static constexpr size_type MIN_BUCKETS = 1;
73 static constexpr double LOAD_FACTOR = 0.8;
74 entry_type* mBuckets;
sergei 2016/02/17 12:55:03 It can be std::unique_ptr<entry_type[]>, however I
Wladimir Palant 2016/02/18 16:07:05 Done.
75 size_type mBucketCount;
76 size_type mEntryCount;
77
78 explicit _HashContainer(size_type expectedEntries = 0)
79 : mEntryCount(0)
80 {
81 expectedEntries = ceil(expectedEntries / LOAD_FACTOR);
82 mBucketCount = MIN_BUCKETS;
83 while (mBucketCount < expectedEntries)
84 mBucketCount <<= 1;
85
86 mBuckets = new entry_type[mBucketCount];
87 // Working around https://github.com/waywardmonkeys/emscripten-trace-collect or/issues/2 here
88 annotate_address(reinterpret_cast<size_type*>(mBuckets) - 1, "Hash table buf fer");
89 }
90
91 ~_HashContainer()
92 {
93 delete[] mBuckets;
94 }
95
96 static size_type hash(const String& str)
97 {
98 // FNV-1a hash function
99 size_type result = 2166136261;
100 for (String::size_type i = 0; i < str.length(); i++)
101 result = (result ^ str[i]) * 16777619;
102 return result;
103 }
104
105 entry_type* find_entry(const String& key) const
106 {
107 size_type h = hash(key);
108
109 // This does quadratic probing, effectively the following formula is used:
110 // pos = (hash + 1/2 i + 1/2 i ^ 2) mod bucketCount
111 for (size_type i = 0; ; ++i)
112 {
113 // mBucketCount is 2^n so (h & mBucketCount - 1) is equivalent to
114 // h % mBucketCount but significantly faster.
115 entry_type* entry = mBuckets + (h & mBucketCount - 1);
116 if (entry->first.is_invalid() || entry->first.equals(key))
117 return entry;
118 h += i;
119 }
120 }
121
122 void resize(size_type bucketCount)
123 {
124 entry_type* oldBuckets = mBuckets;
sergei 2016/02/17 12:55:02 here we can also use std::unique_ptr<[]>
Wladimir Palant 2016/02/18 16:07:12 Done.
125 size_type oldCount = mBucketCount;
126
127 mEntryCount = 0;
sergei 2016/02/17 12:54:58 after executing of this method the member `mEntryC
Wladimir Palant 2016/02/18 16:07:08 It comes out identical right now, this is going to
sergei 2016/02/22 12:45:54 Acknowledged.
128 mBucketCount = bucketCount;
129 mBuckets = new entry_type[mBucketCount];
130 // Working around https://github.com/waywardmonkeys/emscripten-trace-collect or/issues/2 here
131 annotate_address(reinterpret_cast<size_type*>(mBuckets) - 1, "Hash table buf fer");
132
133 for (size_type i = 0; i < oldCount; i++)
134 {
135 entry_type& entry = oldBuckets[i];
136 if (!entry.first.is_invalid())
137 {
138 *find_entry(entry.first) = entry;
sergei 2016/02/17 12:54:56 It would be nice to explain it somewhere in commen
Wladimir Palant 2016/02/18 16:07:10 Not sure what these comments should explain but I
139 mEntryCount++;
140 }
141 }
142
143 delete[] oldBuckets;
144 }
145
146 entry_type* assign(entry_type* existing, const entry_type& entry)
147 {
148 bool added = existing->first.is_invalid();
149 *existing = entry;
sergei 2016/02/17 12:55:00 Wouldn't it be better to firstly resize and only t
Wladimir Palant 2016/02/18 16:07:06 I've restructured the code to assign after resizin
sergei 2016/02/22 12:45:51 Acknowledged.
150 if (added)
151 {
152 mEntryCount++;
153 if (mEntryCount >= mBucketCount * LOAD_FACTOR)
154 {
155 resize(mBucketCount << 1);
156 existing = find_entry(entry.first);
157 }
158 }
159 return existing;
160 }
161
162 public:
163 void insert(const entry_type& entry)
164 {
165 assign(find_entry(entry.first), entry);
166 }
167
168 iterator find(const String& key) const
169 {
170 entry_type* entry = find_entry(key);
171 if (entry->first.is_invalid())
172 return end();
173 else
174 return iterator(entry, mBuckets + mBucketCount);
175 }
176
177 iterator begin() const
178 {
179 return iterator(mBuckets, mBuckets + mBucketCount);
180 }
181
182 iterator end() const
183 {
184 return iterator(mBuckets + mBucketCount, mBuckets + mBucketCount);
185 }
186 };
187
188 struct _StringSetEntry
189 {
190 _StringSetEntry() {}
191 _StringSetEntry(const String& key)
192 : first(key)
193 {
194 }
195
196 DependentString first;
197 };
198
199 class StringSet : public _HashContainer<_StringSetEntry>
200 {
201 };
202
203 template<typename T>
204 struct _StringMapEntry
205 {
206 _StringMapEntry() {}
207 _StringMapEntry(const String& key)
208 : first(key)
209 {
210 }
211 _StringMapEntry(const String& key, T value)
212 : first(key), second(value)
213 {
214 }
215
216 DependentString first;
217 T second;
218 };
219
220 template<typename T>
221 class StringMap : public _HashContainer<_StringMapEntry<T>>
222 {
223 public:
224 typedef _HashContainer<_StringMapEntry<T>> super;
225 typedef typename super::size_type size_type; 296 typedef typename super::size_type size_type;
226 typedef typename super::entry_type entry_type; 297 typedef typename super::entry_type entry_type;
298 typedef typename super::const_reference const_reference;
299 typedef StringMap_internal::StringMapEntryReference<T> reference;
300 friend class StringMap_internal::StringMapEntryReference<T>;
227 301
228 explicit StringMap(size_type expectedEntries = 0) 302 explicit StringMap(size_type expectedEntries = 0)
229 : super(expectedEntries) 303 : super(expectedEntries)
230 { 304 {
231 } 305 }
232 306
233 StringMap(std::initializer_list<entry_type> list) 307 StringMap(std::initializer_list<entry_type> list)
234 : super(list.size()) 308 : super(list.size())
235 { 309 {
236 for (auto it = list.begin(); it != list.end(); ++it) 310 for (auto it = list.begin(); it != list.end(); ++it)
237 super::insert(*it); 311 super::insert(*it);
238 } 312 }
239 313
240 ~StringMap() 314 ~StringMap()
241 { 315 {
242 } 316 }
243 317
244 T& operator[](const String& key) 318 T& operator[](const String& key)
sergei 2016/02/17 12:55:04 What do you think about having in addition a const
Wladimir Palant 2016/02/18 16:07:11 This is how STL does it, if you don't want to crea
sergei 2016/02/22 12:45:53 Acknowledged.
245 { 319 {
246 entry_type* entry = super::find_entry(key); 320 entry_type* entry = super::find_bucket(key);
247 if (entry->first.is_invalid()) 321 if (entry->first.is_invalid())
248 entry = super::assign(entry, key); 322 entry = super::assign(entry, key);
249 return entry->second; 323 return entry->second;
250 } 324 }
325
326 const_reference find(const String& key) const
327 {
328 return super::find(key);
329 }
330
331 reference find(const String& key)
332 {
333 return reference(this, key, super::find_bucket(key));
334 }
251 }; 335 };
LEFTRIGHT

Powered by Google App Engine
This is Rietveld