Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Delta Between Two Patch Sets: compiled/StringMap.h

Issue 29333474: Issue 4125 - [emscripten] Convert filter classes to C++ (Closed)
Left Patch Set: Improved performance Created Jan. 28, 2016, 2:31 p.m.
Right Patch Set: Addressed comments from Patch Set 28 Created March 21, 2017, 10:04 a.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
Left: Side by side diff | Download
Right: Side by side diff | Download
« no previous file with change/comment | « compiled/String.h ('k') | compiled/StringScanner.h » ('j') | no next file with change/comment »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
LEFTRIGHT
1 #ifndef ADBLOCK_PLUS_STRING_MAP_H 1 #pragma once
2 #define ADBLOCK_PLUS_STRING_MAP_H
3 2
4 #include <cstddef> 3 #include <cstddef>
5 #include <cmath> 4 #include <cmath>
6 #include <initializer_list> 5 #include <initializer_list>
6 #include <memory>
7 7
8 #include "String.h" 8 #include "String.h"
9 #include "debug.h" 9 #include "debug.h"
10 10
11 template<class Entry> 11 template<typename T>
12 struct _HashContainerIterator 12 class StringMap;
13
14 namespace StringMap_internal
13 { 15 {
14 typedef Entry entry_type; 16 template<typename Entry>
15 typedef _HashContainerIterator<Entry> iterator; 17 struct HashContainerIterator
16 18 {
17 const entry_type* mPos; 19 typedef Entry entry_type;
18 const entry_type* mEnd; 20 typedef HashContainerIterator<Entry> iterator;
19 21
20 explicit _HashContainerIterator(const entry_type* start, const entry_type* end ) 22 const entry_type* mPos;
21 : mPos(start), mEnd(end) 23 const entry_type* mEnd;
22 { 24
23 if (mPos != mEnd && mPos->first.is_invalid()) 25 explicit HashContainerIterator(const entry_type* start, const entry_type* en d)
24 ++(*this); 26 : mPos(start), mEnd(end)
25 } 27 {
26 28 if (mPos != mEnd && mPos->first.is_invalid())
27 _HashContainerIterator() 29 ++(*this);
28 { 30 }
29 } 31
30 32 const entry_type& operator*() const
31 const entry_type& operator*() const 33 {
32 { 34 return *mPos;
33 return *mPos; 35 }
34 } 36
35 37 const entry_type* operator->() const
36 const entry_type* operator->() const 38 {
37 { 39 return mPos;
38 return mPos; 40 }
39 } 41
40 42 iterator& operator++()
41 iterator& operator++() 43 {
42 { 44 do {
43 do { 45 ++mPos;
44 ++mPos; 46 } while(mPos != mEnd && mPos->first.is_invalid());
45 } while(mPos != mEnd && mPos->first.is_invalid()); 47 return *this;
46 return *this; 48 }
47 } 49
48 50 bool operator==(const iterator& it) const
49 bool operator==(const iterator& it) const 51 {
50 { 52 return mPos == it.mPos;
51 return mPos == it.mPos; 53 }
52 } 54
53 55 bool operator!=(const iterator& it) const
54 bool operator!=(const iterator& it) const 56 {
55 { 57 return mPos != it.mPos;
56 return mPos != it.mPos; 58 }
57 } 59 };
60
61 template<typename Entry>
62 struct HashContainerReference
63 {
64 typedef Entry entry_type;
65
66 entry_type* mEntry;
67
68 explicit HashContainerReference(entry_type* entry)
69 : mEntry(entry)
70 {
71 }
72
73 const entry_type* operator->() const
74 {
75 return mEntry;
76 }
77
78 operator bool() const
79 {
80 return !mEntry->first.is_invalid();
81 }
82 };
83
84 template<typename Entry>
85 class HashContainer
86 {
87 public:
88 typedef Entry entry_type;
89 typedef size_t size_type;
90 typedef HashContainerIterator<Entry> const_iterator;
91 typedef HashContainerReference<const Entry> const_reference;
92
93 private:
94 explicit HashContainer(const HashContainer& other);
95 void operator=(const HashContainer& other);
96
97 protected:
98 static constexpr size_type MIN_BUCKETS = 1;
99 static constexpr double LOAD_FACTOR = 0.8;
100 std::unique_ptr<entry_type[]> mBuckets;
101 size_type mBucketCount;
102 size_type mEntryCount;
103
104 #if defined(DEBUG)
105 size_type mInsertCounter;
106 #endif
107
108 explicit HashContainer(size_type expectedEntries = 0)
109 : mEntryCount(0)
110 {
111 expectedEntries = ceil(expectedEntries / LOAD_FACTOR);
112 mBucketCount = MIN_BUCKETS;
113 while (mBucketCount < expectedEntries)
114 mBucketCount <<= 1;
115
116 mBuckets.reset(new entry_type[mBucketCount]);
117 // Working around https://github.com/waywardmonkeys/emscripten-trace-colle ctor/issues/2 here
118 annotate_address(reinterpret_cast<size_type*>(mBuckets.get()) - 1, "Hash t able buffer");
119 }
120
121 static size_type hash(const String& str)
122 {
123 // FNV-1a hash function
124 size_type result = 2166136261;
125 for (String::size_type i = 0; i < str.length(); i++)
126 result = (result ^ str[i]) * 16777619;
127 return result;
128 }
129
130 entry_type* find_bucket(const String& key) const
131 {
132 size_type h = hash(key);
133
134 // This does quadratic probing, effectively the following formula is used:
135 // pos = (hash + 1/2 i + 1/2 i ^ 2) mod bucketCount
136 for (size_type i = 0; ; ++i)
137 {
138 // mBucketCount is 2^n so (h & mBucketCount - 1) is equivalent to
139 // h % mBucketCount but significantly faster.
140 entry_type* entry = &mBuckets[h & (mBucketCount - 1)];
141 if (entry->first.is_invalid() || entry->first.equals(key))
142 return entry;
143 h += i;
144 }
145 }
146
147 void resize(size_type bucketCount)
148 {
149 std::unique_ptr<entry_type[]> oldBuckets(std::move(mBuckets));
150 size_type oldCount = mBucketCount;
151
152 mEntryCount = 0;
153 mBucketCount = bucketCount;
154 mBuckets.reset(new entry_type[mBucketCount]);
155 // Working around https://github.com/waywardmonkeys/emscripten-trace-colle ctor/issues/2 here
156 annotate_address(reinterpret_cast<size_type*>(mBuckets.get()) - 1, "Hash t able buffer");
157
158 // Copy old entries into the new buffer
159 for (size_type i = 0; i < oldCount; i++)
160 {
161 entry_type& entry = oldBuckets[i];
162 if (!entry.first.is_invalid() && !entry.first.is_deleted())
163 {
164 *find_bucket(entry.first) = entry;
165 mEntryCount++;
166 }
167 }
168 }
169
170 entry_type* assign(entry_type* existing, const entry_type& entry)
171 {
172 if (existing->first.is_invalid())
173 {
174 if (mEntryCount + 1 >= mBucketCount * LOAD_FACTOR)
175 {
176 resize(mBucketCount << 1);
177 existing = find_bucket(entry.first);
178 }
179 mEntryCount++;
180 #if defined(DEBUG)
181 mInsertCounter++;
182 #endif
183 }
184 *existing = entry;
185 return existing;
186 }
187
188 public:
189 void insert(const entry_type& entry)
190 {
191 assign(find_bucket(entry.first), entry);
192 }
193
194 bool erase(const String& key)
195 {
196 entry_type* entry = find_bucket(key);
197 if (entry->first.is_invalid())
198 return false;
199
200 entry->first.erase();
201 return true;
202 }
203
204 const_reference find(const String& key) const
205 {
206 return const_reference(find_bucket(key));
207 }
208
209 const_iterator begin() const
210 {
211 return const_iterator(&mBuckets[0], &mBuckets[mBucketCount]);
212 }
213
214 const_iterator end() const
215 {
216 return const_iterator(&mBuckets[mBucketCount], &mBuckets[mBucketCount]);
217 }
218 };
219
220 struct StringSetEntry
221 {
222 StringSetEntry() {}
223 StringSetEntry(const String& key)
224 : first(key)
225 {
226 }
227
228 DependentString first;
229 };
230
231 template<typename T>
232 struct StringMapEntry
233 {
234 StringMapEntry() {}
235 StringMapEntry(const String& key)
236 : first(key), second()
237 {
238 }
239 StringMapEntry(const String& key, T value)
240 : first(key), second(value)
241 {
242 }
243
244 DependentString first;
245 T second;
246 };
247
248 template<typename T>
249 struct StringMapEntryReference : public HashContainerReference<StringMapEntry< T>>
250 {
251 typedef HashContainerReference<StringMapEntry<T>> super;
252 typedef typename super::entry_type entry_type;
253 typedef StringMap<T> map_type;
254
255 map_type* mMap;
256
257 #if defined(DEBUG)
258 typename map_type::size_type mInsertCounter;
259 typename map_type::size_type mHash;
260 #endif
261
262 StringMapEntryReference(map_type* map, const String& key, entry_type* entry)
263 : super(entry), mMap(map)
264 {
265 #if defined(DEBUG)
266 mInsertCounter = mMap->mInsertCounter;
267 mHash = mMap->hash(key);
268 #endif
269 }
270
271 void assign(const String& key, const T& value)
272 {
273 #if defined(DEBUG)
274 assert(mInsertCounter == mMap->mInsertCounter,
275 u"There should be no insert operations performed between map.find() an d assign()"_str);
276 assert(mHash == mMap->hash(key),
277 u"The keys used in map.find() and assign() should be identical"_str);
278 #endif
279
280 mMap->assign(this->mEntry, entry_type(key, value));
281 }
282 };
283 }
284
285 class StringSet
286 : public StringMap_internal::HashContainer<StringMap_internal::StringSetEntry>
287 {
58 }; 288 };
59 289
60 template<class Entry> 290 template<typename T>
61 class _HashContainer 291 class StringMap
292 : public StringMap_internal::HashContainer<StringMap_internal::StringMapEntry< T>>
62 { 293 {
63 public: 294 public:
64 typedef Entry entry_type; 295 typedef StringMap_internal::HashContainer<StringMap_internal::StringMapEntry<T >> super;
65 typedef size_t size_type;
66 typedef _HashContainerIterator<Entry> iterator;
67
68 private:
69 _HashContainer(const _HashContainer& other) {}
70 void operator=(const _HashContainer& other) {}
71
72 protected:
73 static constexpr size_type MIN_BUCKETS = 1;
74 static constexpr double LOAD_FACTOR = 0.8;
75 entry_type* mBuckets;
76 size_type mBucketCount;
77 size_type mEntryCount;
78
79 explicit _HashContainer(size_type expectedEntries = 0)
80 : mEntryCount(0)
81 {
82 expectedEntries = ceil(expectedEntries / LOAD_FACTOR);
83 mBucketCount = MIN_BUCKETS;
84 while (mBucketCount < expectedEntries)
85 mBucketCount <<= 1;
86
87 mBuckets = new entry_type[mBucketCount];
88 annotate_address(mBuckets, "Hash table buffer");
89 }
90
91 ~_HashContainer()
92 {
93 delete[] mBuckets;
94 }
95
96 static size_type hash(const String& str)
97 {
98 // FNV-1a hash function
99 size_type result = 2166136261;
100 for (String::size_type i = 0; i < str.length(); i++)
101 result = (result ^ str[i]) * 16777619;
102 return result;
103 }
104
105 entry_type* find_entry(const String& key) const
106 {
107 size_type h = hash(key);
108
109 // This does quadratic probing, effectively the following formula is used:
110 // pos = (hash + 1/2 i + 1/2 i ^ 2) mod bucketCount
111 for (size_type i = 0; ; ++i)
112 {
113 entry_type* entry = mBuckets + h % mBucketCount;
114 if (entry->first.is_invalid() || entry->first.equals(key))
115 return entry;
116 h += i;
117 }
118 }
119
120 void resize(size_type bucketCount)
121 {
122 entry_type* oldBuckets = mBuckets;
123 size_type oldCount = mBucketCount;
124
125 mEntryCount = 0;
126 mBucketCount = bucketCount;
127 mBuckets = new entry_type[mBucketCount];
128 annotate_address(mBuckets, "Hash table buffer");
129
130 for (size_type i = 0; i < oldCount; i++)
131 {
132 entry_type& entry = oldBuckets[i];
133 if (!entry.first.is_invalid())
134 {
135 *find_entry(entry.first) = entry;
136 mEntryCount++;
137 }
138 }
139
140 delete[] oldBuckets;
141 }
142
143 entry_type* assign(entry_type* existing, const entry_type& entry)
144 {
145 bool added = existing->first.is_invalid();
146 *existing = entry;
147 if (added)
148 {
149 mEntryCount++;
150 if (mEntryCount >= mBucketCount * LOAD_FACTOR)
151 {
152 resize(mBucketCount << 1);
153 existing = find_entry(entry.first);
154 }
155 }
156 return existing;
157 }
158
159 public:
160 void insert(const entry_type& entry)
161 {
162 assign(find_entry(entry.first), entry);
163 }
164
165 iterator find(const String& key) const
166 {
167 entry_type* entry = find_entry(key);
168 if (entry->first.is_invalid())
169 return end();
170 else
171 return iterator(entry, mBuckets + mBucketCount);
172 }
173
174 iterator begin() const
175 {
176 return iterator(mBuckets, mBuckets + mBucketCount);
177 }
178
179 iterator end() const
180 {
181 return iterator(mBuckets + mBucketCount, mBuckets + mBucketCount);
182 }
183 };
184
185 struct _StringSetEntry
186 {
187 _StringSetEntry() {}
188 _StringSetEntry(const String& key)
189 : first(key)
190 {
191 }
192
193 String first;
194 };
195
196 class StringSet : public _HashContainer<_StringSetEntry>
197 {
198 };
199
200 template<class T>
201 struct _StringMapEntry
202 {
203 _StringMapEntry() {}
204 _StringMapEntry(const String& key)
205 : first(key)
206 {
207 }
208 _StringMapEntry(const String& key, T value)
209 : first(key), second(value)
210 {
211 }
212
213 String first;
214 T second;
215 };
216
217 template<class T>
218 class StringMap : public _HashContainer<_StringMapEntry<T>>
219 {
220 public:
221 typedef _HashContainer<_StringMapEntry<T>> super;
222 typedef typename super::size_type size_type; 296 typedef typename super::size_type size_type;
223 typedef typename super::entry_type entry_type; 297 typedef typename super::entry_type entry_type;
298 typedef typename super::const_reference const_reference;
299 typedef StringMap_internal::StringMapEntryReference<T> reference;
300 friend class StringMap_internal::StringMapEntryReference<T>;
224 301
225 explicit StringMap(size_type expectedEntries = 0) 302 explicit StringMap(size_type expectedEntries = 0)
226 : super(expectedEntries) 303 : super(expectedEntries)
227 { 304 {
228 } 305 }
229 306
230 StringMap(std::initializer_list<entry_type> list) 307 StringMap(std::initializer_list<entry_type> list)
231 : super(list.size()) 308 : super(list.size())
232 { 309 {
233 for (auto it = list.begin(); it != list.end(); ++it) 310 for (auto it = list.begin(); it != list.end(); ++it)
234 super::insert(*it); 311 super::insert(*it);
235 } 312 }
236 313
237 ~StringMap() 314 ~StringMap()
238 { 315 {
239 } 316 }
240 317
241 T& operator[](const String& key) 318 T& operator[](const String& key)
242 { 319 {
243 entry_type* entry = super::find_entry(key); 320 entry_type* entry = super::find_bucket(key);
244 if (entry->first.is_invalid()) 321 if (entry->first.is_invalid())
245 entry = super::assign(entry, key); 322 entry = super::assign(entry, key);
246 return entry->second; 323 return entry->second;
247 } 324 }
325
326 const_reference find(const String& key) const
327 {
328 return super::find(key);
329 }
330
331 reference find(const String& key)
332 {
333 return reference(this, key, super::find_bucket(key));
334 }
248 }; 335 };
249
250 #endif
LEFTRIGHT

Powered by Google App Engine
This is Rietveld