Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Side by Side Diff: compiled/StringMap.h

Issue 29333474: Issue 4125 - [emscripten] Convert filter classes to C++ (Closed)
Patch Set: Optimized hash lookup performance a bit Created Feb. 8, 2016, 7:11 p.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 #pragma once
2
3 #include <cstddef>
4 #include <cmath>
5 #include <initializer_list>
6
7 #include "String.h"
8 #include "debug.h"
9
10 template<typename Entry>
11 struct _HashContainerIterator
sergei 2016/02/17 12:54:57 What do you think about moving all internal struct
Wladimir Palant 2016/02/18 16:07:07 Done.
12 {
13 typedef Entry entry_type;
14 typedef _HashContainerIterator<Entry> iterator;
15
16 const entry_type* mPos;
17 const entry_type* mEnd;
18
19 explicit _HashContainerIterator(const entry_type* start, const entry_type* end )
20 : mPos(start), mEnd(end)
21 {
22 if (mPos != mEnd && mPos->first.is_invalid())
23 ++(*this);
24 }
25
26 _HashContainerIterator()
27 {
sergei 2016/02/17 12:54:55 It seems this destructor code is not needed.
Wladimir Palant 2016/02/18 16:07:03 Done.
28 }
29
30 const entry_type& operator*() const
31 {
32 return *mPos;
33 }
34
35 const entry_type* operator->() const
36 {
37 return mPos;
38 }
39
40 iterator& operator++()
41 {
42 do {
43 ++mPos;
44 } while(mPos != mEnd && mPos->first.is_invalid());
45 return *this;
46 }
47
48 bool operator==(const iterator& it) const
49 {
50 return mPos == it.mPos;
51 }
52
53 bool operator!=(const iterator& it) const
54 {
55 return mPos != it.mPos;
56 }
57 };
58
59 template<typename Entry>
60 class _HashContainer
61 {
62 public:
63 typedef Entry entry_type;
64 typedef size_t size_type;
65 typedef _HashContainerIterator<Entry> iterator;
sergei 2016/02/17 12:55:01 What do you think about renaming it to `const_iter
Wladimir Palant 2016/02/18 16:07:09 Done.
66
67 private:
68 _HashContainer(const _HashContainer& other) {}
69 void operator=(const _HashContainer& other) {}
sergei 2016/02/17 12:54:59 I guess, it's "noncopyable", it would be better t
Wladimir Palant 2016/02/18 16:07:04 Done.
70
71 protected:
72 static constexpr size_type MIN_BUCKETS = 1;
73 static constexpr double LOAD_FACTOR = 0.8;
74 entry_type* mBuckets;
sergei 2016/02/17 12:55:03 It can be std::unique_ptr<entry_type[]>, however I
Wladimir Palant 2016/02/18 16:07:05 Done.
75 size_type mBucketCount;
76 size_type mEntryCount;
77
78 explicit _HashContainer(size_type expectedEntries = 0)
79 : mEntryCount(0)
80 {
81 expectedEntries = ceil(expectedEntries / LOAD_FACTOR);
82 mBucketCount = MIN_BUCKETS;
83 while (mBucketCount < expectedEntries)
84 mBucketCount <<= 1;
85
86 mBuckets = new entry_type[mBucketCount];
87 // Working around https://github.com/waywardmonkeys/emscripten-trace-collect or/issues/2 here
88 annotate_address(reinterpret_cast<size_type*>(mBuckets) - 1, "Hash table buf fer");
89 }
90
91 ~_HashContainer()
92 {
93 delete[] mBuckets;
94 }
95
96 static size_type hash(const String& str)
97 {
98 // FNV-1a hash function
99 size_type result = 2166136261;
100 for (String::size_type i = 0; i < str.length(); i++)
101 result = (result ^ str[i]) * 16777619;
102 return result;
103 }
104
105 entry_type* find_entry(const String& key) const
106 {
107 size_type h = hash(key);
108
109 // This does quadratic probing, effectively the following formula is used:
110 // pos = (hash + 1/2 i + 1/2 i ^ 2) mod bucketCount
111 for (size_type i = 0; ; ++i)
112 {
113 // mBucketCount is 2^n so (h & mBucketCount - 1) is equivalent to
114 // h % mBucketCount but significantly faster.
115 entry_type* entry = mBuckets + (h & mBucketCount - 1);
116 if (entry->first.is_invalid() || entry->first.equals(key))
117 return entry;
118 h += i;
119 }
120 }
121
122 void resize(size_type bucketCount)
123 {
124 entry_type* oldBuckets = mBuckets;
sergei 2016/02/17 12:55:02 here we can also use std::unique_ptr<[]>
Wladimir Palant 2016/02/18 16:07:12 Done.
125 size_type oldCount = mBucketCount;
126
127 mEntryCount = 0;
sergei 2016/02/17 12:54:58 after executing of this method the member `mEntryC
Wladimir Palant 2016/02/18 16:07:08 It comes out identical right now, this is going to
sergei 2016/02/22 12:45:54 Acknowledged.
128 mBucketCount = bucketCount;
129 mBuckets = new entry_type[mBucketCount];
130 // Working around https://github.com/waywardmonkeys/emscripten-trace-collect or/issues/2 here
131 annotate_address(reinterpret_cast<size_type*>(mBuckets) - 1, "Hash table buf fer");
132
133 for (size_type i = 0; i < oldCount; i++)
134 {
135 entry_type& entry = oldBuckets[i];
136 if (!entry.first.is_invalid())
137 {
138 *find_entry(entry.first) = entry;
sergei 2016/02/17 12:54:56 It would be nice to explain it somewhere in commen
Wladimir Palant 2016/02/18 16:07:10 Not sure what these comments should explain but I
139 mEntryCount++;
140 }
141 }
142
143 delete[] oldBuckets;
144 }
145
146 entry_type* assign(entry_type* existing, const entry_type& entry)
147 {
148 bool added = existing->first.is_invalid();
149 *existing = entry;
sergei 2016/02/17 12:55:00 Wouldn't it be better to firstly resize and only t
Wladimir Palant 2016/02/18 16:07:06 I've restructured the code to assign after resizin
sergei 2016/02/22 12:45:51 Acknowledged.
150 if (added)
151 {
152 mEntryCount++;
153 if (mEntryCount >= mBucketCount * LOAD_FACTOR)
154 {
155 resize(mBucketCount << 1);
156 existing = find_entry(entry.first);
157 }
158 }
159 return existing;
160 }
161
162 public:
163 void insert(const entry_type& entry)
164 {
165 assign(find_entry(entry.first), entry);
166 }
167
168 iterator find(const String& key) const
169 {
170 entry_type* entry = find_entry(key);
171 if (entry->first.is_invalid())
172 return end();
173 else
174 return iterator(entry, mBuckets + mBucketCount);
175 }
176
177 iterator begin() const
178 {
179 return iterator(mBuckets, mBuckets + mBucketCount);
180 }
181
182 iterator end() const
183 {
184 return iterator(mBuckets + mBucketCount, mBuckets + mBucketCount);
185 }
186 };
187
188 struct _StringSetEntry
189 {
190 _StringSetEntry() {}
191 _StringSetEntry(const String& key)
192 : first(key)
193 {
194 }
195
196 DependentString first;
197 };
198
199 class StringSet : public _HashContainer<_StringSetEntry>
200 {
201 };
202
203 template<typename T>
204 struct _StringMapEntry
205 {
206 _StringMapEntry() {}
207 _StringMapEntry(const String& key)
208 : first(key)
209 {
210 }
211 _StringMapEntry(const String& key, T value)
212 : first(key), second(value)
213 {
214 }
215
216 DependentString first;
217 T second;
218 };
219
220 template<typename T>
221 class StringMap : public _HashContainer<_StringMapEntry<T>>
222 {
223 public:
224 typedef _HashContainer<_StringMapEntry<T>> super;
225 typedef typename super::size_type size_type;
226 typedef typename super::entry_type entry_type;
227
228 explicit StringMap(size_type expectedEntries = 0)
229 : super(expectedEntries)
230 {
231 }
232
233 StringMap(std::initializer_list<entry_type> list)
234 : super(list.size())
235 {
236 for (auto it = list.begin(); it != list.end(); ++it)
237 super::insert(*it);
238 }
239
240 ~StringMap()
241 {
242 }
243
244 T& operator[](const String& key)
sergei 2016/02/17 12:55:04 What do you think about having in addition a const
Wladimir Palant 2016/02/18 16:07:11 This is how STL does it, if you don't want to crea
sergei 2016/02/22 12:45:53 Acknowledged.
245 {
246 entry_type* entry = super::find_entry(key);
247 if (entry->first.is_invalid())
248 entry = super::assign(entry, key);
249 return entry->second;
250 }
251 };
OLDNEW

Powered by Google App Engine
This is Rietveld