Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Delta Between Two Patch Sets: compiled/StringMap.h

Issue 29333474: Issue 4125 - [emscripten] Convert filter classes to C++ (Closed)
Left Patch Set: Split up String class into two, cleaned up RegExpFilter methods Created Feb. 4, 2016, 7:20 p.m.
Right Patch Set: Addressed comments from Patch Set 28 Created March 21, 2017, 10:04 a.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
Left: Side by side diff | Download
Right: Side by side diff | Download
« no previous file with change/comment | « compiled/String.h ('k') | compiled/StringScanner.h » ('j') | no next file with change/comment »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
LEFTRIGHT
1 #ifndef ADBLOCK_PLUS_STRING_MAP_H 1 #pragma once
2 #define ADBLOCK_PLUS_STRING_MAP_H
3 2
4 #include <cstddef> 3 #include <cstddef>
5 #include <cmath> 4 #include <cmath>
6 #include <initializer_list> 5 #include <initializer_list>
6 #include <memory>
7 7
8 #include "String.h" 8 #include "String.h"
9 #include "debug.h" 9 #include "debug.h"
10 10
11 template<typename Entry> 11 template<typename T>
12 struct _HashContainerIterator 12 class StringMap;
13
14 namespace StringMap_internal
13 { 15 {
14 typedef Entry entry_type; 16 template<typename Entry>
15 typedef _HashContainerIterator<Entry> iterator; 17 struct HashContainerIterator
16 18 {
17 const entry_type* mPos; 19 typedef Entry entry_type;
18 const entry_type* mEnd; 20 typedef HashContainerIterator<Entry> iterator;
19 21
20 explicit _HashContainerIterator(const entry_type* start, const entry_type* end ) 22 const entry_type* mPos;
21 : mPos(start), mEnd(end) 23 const entry_type* mEnd;
22 { 24
23 if (mPos != mEnd && mPos->first.is_invalid()) 25 explicit HashContainerIterator(const entry_type* start, const entry_type* en d)
24 ++(*this); 26 : mPos(start), mEnd(end)
25 } 27 {
26 28 if (mPos != mEnd && mPos->first.is_invalid())
27 _HashContainerIterator() 29 ++(*this);
28 { 30 }
29 } 31
30 32 const entry_type& operator*() const
31 const entry_type& operator*() const 33 {
32 { 34 return *mPos;
33 return *mPos; 35 }
34 } 36
35 37 const entry_type* operator->() const
36 const entry_type* operator->() const 38 {
37 { 39 return mPos;
38 return mPos; 40 }
39 } 41
40 42 iterator& operator++()
41 iterator& operator++() 43 {
42 { 44 do {
43 do { 45 ++mPos;
44 ++mPos; 46 } while(mPos != mEnd && mPos->first.is_invalid());
45 } while(mPos != mEnd && mPos->first.is_invalid()); 47 return *this;
46 return *this; 48 }
47 } 49
48 50 bool operator==(const iterator& it) const
49 bool operator==(const iterator& it) const 51 {
50 { 52 return mPos == it.mPos;
51 return mPos == it.mPos; 53 }
52 } 54
53 55 bool operator!=(const iterator& it) const
54 bool operator!=(const iterator& it) const 56 {
55 { 57 return mPos != it.mPos;
56 return mPos != it.mPos; 58 }
57 } 59 };
60
61 template<typename Entry>
62 struct HashContainerReference
63 {
64 typedef Entry entry_type;
65
66 entry_type* mEntry;
67
68 explicit HashContainerReference(entry_type* entry)
69 : mEntry(entry)
70 {
71 }
72
73 const entry_type* operator->() const
74 {
75 return mEntry;
76 }
77
78 operator bool() const
79 {
80 return !mEntry->first.is_invalid();
81 }
82 };
83
84 template<typename Entry>
85 class HashContainer
86 {
87 public:
88 typedef Entry entry_type;
89 typedef size_t size_type;
90 typedef HashContainerIterator<Entry> const_iterator;
91 typedef HashContainerReference<const Entry> const_reference;
92
93 private:
94 explicit HashContainer(const HashContainer& other);
95 void operator=(const HashContainer& other);
96
97 protected:
98 static constexpr size_type MIN_BUCKETS = 1;
99 static constexpr double LOAD_FACTOR = 0.8;
100 std::unique_ptr<entry_type[]> mBuckets;
101 size_type mBucketCount;
102 size_type mEntryCount;
103
104 #if defined(DEBUG)
105 size_type mInsertCounter;
106 #endif
107
108 explicit HashContainer(size_type expectedEntries = 0)
109 : mEntryCount(0)
110 {
111 expectedEntries = ceil(expectedEntries / LOAD_FACTOR);
112 mBucketCount = MIN_BUCKETS;
113 while (mBucketCount < expectedEntries)
114 mBucketCount <<= 1;
115
116 mBuckets.reset(new entry_type[mBucketCount]);
117 // Working around https://github.com/waywardmonkeys/emscripten-trace-colle ctor/issues/2 here
118 annotate_address(reinterpret_cast<size_type*>(mBuckets.get()) - 1, "Hash t able buffer");
119 }
120
121 static size_type hash(const String& str)
122 {
123 // FNV-1a hash function
124 size_type result = 2166136261;
125 for (String::size_type i = 0; i < str.length(); i++)
126 result = (result ^ str[i]) * 16777619;
127 return result;
128 }
129
130 entry_type* find_bucket(const String& key) const
131 {
132 size_type h = hash(key);
133
134 // This does quadratic probing, effectively the following formula is used:
135 // pos = (hash + 1/2 i + 1/2 i ^ 2) mod bucketCount
136 for (size_type i = 0; ; ++i)
137 {
138 // mBucketCount is 2^n so (h & mBucketCount - 1) is equivalent to
139 // h % mBucketCount but significantly faster.
140 entry_type* entry = &mBuckets[h & (mBucketCount - 1)];
141 if (entry->first.is_invalid() || entry->first.equals(key))
142 return entry;
143 h += i;
144 }
145 }
146
147 void resize(size_type bucketCount)
148 {
149 std::unique_ptr<entry_type[]> oldBuckets(std::move(mBuckets));
150 size_type oldCount = mBucketCount;
151
152 mEntryCount = 0;
153 mBucketCount = bucketCount;
154 mBuckets.reset(new entry_type[mBucketCount]);
155 // Working around https://github.com/waywardmonkeys/emscripten-trace-colle ctor/issues/2 here
156 annotate_address(reinterpret_cast<size_type*>(mBuckets.get()) - 1, "Hash t able buffer");
157
158 // Copy old entries into the new buffer
159 for (size_type i = 0; i < oldCount; i++)
160 {
161 entry_type& entry = oldBuckets[i];
162 if (!entry.first.is_invalid() && !entry.first.is_deleted())
163 {
164 *find_bucket(entry.first) = entry;
165 mEntryCount++;
166 }
167 }
168 }
169
170 entry_type* assign(entry_type* existing, const entry_type& entry)
171 {
172 if (existing->first.is_invalid())
173 {
174 if (mEntryCount + 1 >= mBucketCount * LOAD_FACTOR)
175 {
176 resize(mBucketCount << 1);
177 existing = find_bucket(entry.first);
178 }
179 mEntryCount++;
180 #if defined(DEBUG)
181 mInsertCounter++;
182 #endif
183 }
184 *existing = entry;
185 return existing;
186 }
187
188 public:
189 void insert(const entry_type& entry)
190 {
191 assign(find_bucket(entry.first), entry);
192 }
193
194 bool erase(const String& key)
195 {
196 entry_type* entry = find_bucket(key);
197 if (entry->first.is_invalid())
198 return false;
199
200 entry->first.erase();
201 return true;
202 }
203
204 const_reference find(const String& key) const
205 {
206 return const_reference(find_bucket(key));
207 }
208
209 const_iterator begin() const
210 {
211 return const_iterator(&mBuckets[0], &mBuckets[mBucketCount]);
212 }
213
214 const_iterator end() const
215 {
216 return const_iterator(&mBuckets[mBucketCount], &mBuckets[mBucketCount]);
217 }
218 };
219
220 struct StringSetEntry
221 {
222 StringSetEntry() {}
223 StringSetEntry(const String& key)
224 : first(key)
225 {
226 }
227
228 DependentString first;
229 };
230
231 template<typename T>
232 struct StringMapEntry
233 {
234 StringMapEntry() {}
235 StringMapEntry(const String& key)
236 : first(key), second()
237 {
238 }
239 StringMapEntry(const String& key, T value)
240 : first(key), second(value)
241 {
242 }
243
244 DependentString first;
245 T second;
246 };
247
248 template<typename T>
249 struct StringMapEntryReference : public HashContainerReference<StringMapEntry< T>>
250 {
251 typedef HashContainerReference<StringMapEntry<T>> super;
252 typedef typename super::entry_type entry_type;
253 typedef StringMap<T> map_type;
254
255 map_type* mMap;
256
257 #if defined(DEBUG)
258 typename map_type::size_type mInsertCounter;
259 typename map_type::size_type mHash;
260 #endif
261
262 StringMapEntryReference(map_type* map, const String& key, entry_type* entry)
263 : super(entry), mMap(map)
264 {
265 #if defined(DEBUG)
266 mInsertCounter = mMap->mInsertCounter;
267 mHash = mMap->hash(key);
268 #endif
269 }
270
271 void assign(const String& key, const T& value)
272 {
273 #if defined(DEBUG)
274 assert(mInsertCounter == mMap->mInsertCounter,
275 u"There should be no insert operations performed between map.find() an d assign()"_str);
276 assert(mHash == mMap->hash(key),
277 u"The keys used in map.find() and assign() should be identical"_str);
278 #endif
279
280 mMap->assign(this->mEntry, entry_type(key, value));
281 }
282 };
283 }
284
285 class StringSet
286 : public StringMap_internal::HashContainer<StringMap_internal::StringSetEntry>
287 {
58 }; 288 };
59 289
60 template<typename Entry> 290 template<typename T>
61 class _HashContainer 291 class StringMap
292 : public StringMap_internal::HashContainer<StringMap_internal::StringMapEntry< T>>
62 { 293 {
63 public: 294 public:
64 typedef Entry entry_type; 295 typedef StringMap_internal::HashContainer<StringMap_internal::StringMapEntry<T >> super;
65 typedef size_t size_type;
66 typedef _HashContainerIterator<Entry> iterator;
67
68 private:
69 _HashContainer(const _HashContainer& other) {}
70 void operator=(const _HashContainer& other) {}
71
72 protected:
73 static constexpr size_type MIN_BUCKETS = 1;
74 static constexpr double LOAD_FACTOR = 0.8;
75 entry_type* mBuckets;
76 size_type mBucketCount;
77 size_type mEntryCount;
78
79 explicit _HashContainer(size_type expectedEntries = 0)
80 : mEntryCount(0)
81 {
82 expectedEntries = ceil(expectedEntries / LOAD_FACTOR);
83 mBucketCount = MIN_BUCKETS;
84 while (mBucketCount < expectedEntries)
85 mBucketCount <<= 1;
86
87 mBuckets = new entry_type[mBucketCount];
88 // Working around https://github.com/waywardmonkeys/emscripten-trace-collect or/issues/2 here
89 annotate_address(reinterpret_cast<size_type*>(mBuckets) - 1, "Hash table buf fer");
90 }
91
92 ~_HashContainer()
93 {
94 delete[] mBuckets;
95 }
96
97 static size_type hash(const String& str)
98 {
99 // FNV-1a hash function
100 size_type result = 2166136261;
101 for (String::size_type i = 0; i < str.length(); i++)
102 result = (result ^ str[i]) * 16777619;
103 return result;
104 }
105
106 entry_type* find_entry(const String& key) const
107 {
108 size_type h = hash(key);
109
110 // This does quadratic probing, effectively the following formula is used:
111 // pos = (hash + 1/2 i + 1/2 i ^ 2) mod bucketCount
112 for (size_type i = 0; ; ++i)
113 {
114 entry_type* entry = mBuckets + h % mBucketCount;
115 if (entry->first.is_invalid() || entry->first.equals(key))
116 return entry;
117 h += i;
118 }
119 }
120
121 void resize(size_type bucketCount)
122 {
123 entry_type* oldBuckets = mBuckets;
124 size_type oldCount = mBucketCount;
125
126 mEntryCount = 0;
127 mBucketCount = bucketCount;
128 mBuckets = new entry_type[mBucketCount];
129 // Working around https://github.com/waywardmonkeys/emscripten-trace-collect or/issues/2 here
130 annotate_address(reinterpret_cast<size_type*>(mBuckets) - 1, "Hash table buf fer");
131
132 for (size_type i = 0; i < oldCount; i++)
133 {
134 entry_type& entry = oldBuckets[i];
135 if (!entry.first.is_invalid())
136 {
137 *find_entry(entry.first) = entry;
138 mEntryCount++;
139 }
140 }
141
142 delete[] oldBuckets;
143 }
144
145 entry_type* assign(entry_type* existing, const entry_type& entry)
146 {
147 bool added = existing->first.is_invalid();
148 *existing = entry;
149 if (added)
150 {
151 mEntryCount++;
152 if (mEntryCount >= mBucketCount * LOAD_FACTOR)
153 {
154 resize(mBucketCount << 1);
155 existing = find_entry(entry.first);
156 }
157 }
158 return existing;
159 }
160
161 public:
162 void insert(const entry_type& entry)
163 {
164 assign(find_entry(entry.first), entry);
165 }
166
167 iterator find(const String& key) const
168 {
169 entry_type* entry = find_entry(key);
170 if (entry->first.is_invalid())
171 return end();
172 else
173 return iterator(entry, mBuckets + mBucketCount);
174 }
175
176 iterator begin() const
177 {
178 return iterator(mBuckets, mBuckets + mBucketCount);
179 }
180
181 iterator end() const
182 {
183 return iterator(mBuckets + mBucketCount, mBuckets + mBucketCount);
184 }
185 };
186
187 struct _StringSetEntry
188 {
189 _StringSetEntry() {}
190 _StringSetEntry(const String& key)
191 : first(key)
192 {
193 }
194
195 DependentString first;
196 };
197
198 class StringSet : public _HashContainer<_StringSetEntry>
199 {
200 };
201
202 template<typename T>
203 struct _StringMapEntry
204 {
205 _StringMapEntry() {}
206 _StringMapEntry(const String& key)
207 : first(key)
208 {
209 }
210 _StringMapEntry(const String& key, T value)
211 : first(key), second(value)
212 {
213 }
214
215 DependentString first;
216 T second;
217 };
218
219 template<typename T>
220 class StringMap : public _HashContainer<_StringMapEntry<T>>
221 {
222 public:
223 typedef _HashContainer<_StringMapEntry<T>> super;
224 typedef typename super::size_type size_type; 296 typedef typename super::size_type size_type;
225 typedef typename super::entry_type entry_type; 297 typedef typename super::entry_type entry_type;
298 typedef typename super::const_reference const_reference;
299 typedef StringMap_internal::StringMapEntryReference<T> reference;
300 friend class StringMap_internal::StringMapEntryReference<T>;
226 301
227 explicit StringMap(size_type expectedEntries = 0) 302 explicit StringMap(size_type expectedEntries = 0)
228 : super(expectedEntries) 303 : super(expectedEntries)
229 { 304 {
230 } 305 }
231 306
232 StringMap(std::initializer_list<entry_type> list) 307 StringMap(std::initializer_list<entry_type> list)
233 : super(list.size()) 308 : super(list.size())
234 { 309 {
235 for (auto it = list.begin(); it != list.end(); ++it) 310 for (auto it = list.begin(); it != list.end(); ++it)
236 super::insert(*it); 311 super::insert(*it);
237 } 312 }
238 313
239 ~StringMap() 314 ~StringMap()
240 { 315 {
241 } 316 }
242 317
243 T& operator[](const String& key) 318 T& operator[](const String& key)
244 { 319 {
245 entry_type* entry = super::find_entry(key); 320 entry_type* entry = super::find_bucket(key);
246 if (entry->first.is_invalid()) 321 if (entry->first.is_invalid())
247 entry = super::assign(entry, key); 322 entry = super::assign(entry, key);
248 return entry->second; 323 return entry->second;
249 } 324 }
325
326 const_reference find(const String& key) const
327 {
328 return super::find(key);
329 }
330
331 reference find(const String& key)
332 {
333 return reference(this, key, super::find_bucket(key));
334 }
250 }; 335 };
251
252 #endif
LEFTRIGHT

Powered by Google App Engine
This is Rietveld