Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Side by Side Diff: compiled/Map.h

Issue 29572731: Issue 5141 - Generalize Map class to allow non-strings as keys (Closed) Base URL: https://hg.adblockplus.org/adblockpluscore
Patch Set: Use constructor inheritance Created Oct. 10, 2017, 6:30 p.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 * This file is part of Adblock Plus <https://adblockplus.org/>, 2 * This file is part of Adblock Plus <https://adblockplus.org/>,
3 * Copyright (C) 2006-present eyeo GmbH 3 * Copyright (C) 2006-present eyeo GmbH
4 * 4 *
5 * Adblock Plus is free software: you can redistribute it and/or modify 5 * Adblock Plus is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 3 as 6 * it under the terms of the GNU General Public License version 3 as
7 * published by the Free Software Foundation. 7 * published by the Free Software Foundation.
8 * 8 *
9 * Adblock Plus is distributed in the hope that it will be useful, 9 * Adblock Plus is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details. 12 * GNU General Public License for more details.
13 * 13 *
14 * You should have received a copy of the GNU General Public License 14 * You should have received a copy of the GNU General Public License
15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. 15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.
16 */ 16 */
17 17
18 #pragma once 18 #pragma once
19 19
20 #include <cstddef>
21 #include <cmath> 20 #include <cmath>
22 #include <initializer_list> 21 #include <initializer_list>
23 #include <memory> 22 #include <memory>
24 23
25 #include "String.h"
26 #include "debug.h" 24 #include "debug.h"
27 25
28 template<typename T> 26 template<typename Entry, typename Value>
29 class StringMap; 27 class Map;
30 28
31 namespace StringMap_internal 29 namespace Map_internal
32 { 30 {
33 template<typename Entry> 31 template<typename Entry>
34 struct HashContainerIterator 32 struct HashContainerIterator
35 { 33 {
36 typedef Entry entry_type; 34 typedef Entry entry_type;
37 typedef HashContainerIterator<Entry> iterator; 35 typedef HashContainerIterator<Entry> iterator;
38 36
39 const entry_type* mPos; 37 const entry_type* mPos;
40 const entry_type* mEnd; 38 const entry_type* mEnd;
41 39
42 explicit HashContainerIterator(const entry_type* start, const entry_type* en d) 40 explicit HashContainerIterator(const entry_type* start, const entry_type* en d)
43 : mPos(start), mEnd(end) 41 : mPos(start), mEnd(end)
44 { 42 {
45 if (mPos != mEnd && mPos->first.is_invalid()) 43 if (mPos != mEnd && mPos->is_invalid())
46 ++(*this); 44 ++(*this);
47 } 45 }
48 46
49 const entry_type& operator*() const 47 const entry_type& operator*() const
50 { 48 {
51 return *mPos; 49 return *mPos;
52 } 50 }
53 51
54 const entry_type* operator->() const 52 const entry_type* operator->() const
55 { 53 {
56 return mPos; 54 return mPos;
57 } 55 }
58 56
59 iterator& operator++() 57 iterator& operator++()
60 { 58 {
61 do { 59 do {
62 ++mPos; 60 ++mPos;
63 } while(mPos != mEnd && mPos->first.is_invalid()); 61 } while(mPos != mEnd && mPos->is_invalid());
64 return *this; 62 return *this;
65 } 63 }
66 64
67 bool operator==(const iterator& it) const 65 bool operator==(const iterator& it) const
68 { 66 {
69 return mPos == it.mPos; 67 return mPos == it.mPos;
70 } 68 }
71 69
72 bool operator!=(const iterator& it) const 70 bool operator!=(const iterator& it) const
73 { 71 {
(...skipping 13 matching lines...) Expand all
87 { 85 {
88 } 86 }
89 87
90 const entry_type* operator->() const 88 const entry_type* operator->() const
91 { 89 {
92 return mEntry; 90 return mEntry;
93 } 91 }
94 92
95 operator bool() const 93 operator bool() const
96 { 94 {
97 return !mEntry->first.is_invalid(); 95 return !mEntry->is_invalid();
98 } 96 }
99 }; 97 };
100 98
101 template<typename Entry> 99 template<typename Entry>
102 class HashContainer 100 class HashContainer
103 { 101 {
104 public: 102 public:
105 typedef Entry entry_type; 103 typedef Entry entry_type;
106 typedef size_t size_type; 104 typedef typename Entry::key_type key_type;
105 typedef typename entry_type::size_type size_type;
107 typedef HashContainerIterator<Entry> const_iterator; 106 typedef HashContainerIterator<Entry> const_iterator;
108 typedef HashContainerReference<const Entry> const_reference; 107 typedef HashContainerReference<const Entry> const_reference;
109 108
110 private: 109 private:
111 explicit HashContainer(const HashContainer& other); 110 explicit HashContainer(const HashContainer& other);
112 void operator=(const HashContainer& other); 111 void operator=(const HashContainer& other);
113 112
114 protected: 113 protected:
115 static constexpr size_type MIN_BUCKETS = 1; 114 static constexpr size_type MIN_BUCKETS = 1;
116 static constexpr double LOAD_FACTOR = 0.8; 115 static constexpr double LOAD_FACTOR = 0.8;
117 std::unique_ptr<entry_type[]> mBuckets; 116 std::unique_ptr<entry_type[]> mBuckets;
118 size_type mBucketCount; 117 size_type mBucketCount;
119 size_type mEntryCount; 118 size_type mEntryCount;
120 119
121 #if defined(DEBUG) 120 #if defined(DEBUG)
122 size_type mInsertCounter; 121 size_type mInsertCounter;
123 #endif 122 #endif
124 123
125 explicit HashContainer(size_type expectedEntries = 0) 124 entry_type* find_bucket(const key_type& key) const
126 : mEntryCount(0)
127 { 125 {
128 expectedEntries = ceil(expectedEntries / LOAD_FACTOR); 126 size_type h = entry_type::hash(key);
129 mBucketCount = MIN_BUCKETS;
130 while (mBucketCount < expectedEntries)
131 mBucketCount <<= 1;
132
133 mBuckets.reset(new entry_type[mBucketCount]);
134 // Working around https://github.com/waywardmonkeys/emscripten-trace-colle ctor/issues/2 here
135 annotate_address(reinterpret_cast<size_type*>(mBuckets.get()) - 1, "Hash t able buffer");
136 }
137
138 static size_type hash(const String& str)
139 {
140 // FNV-1a hash function
141 size_type result = 2166136261;
142 for (String::size_type i = 0; i < str.length(); i++)
143 result = (result ^ str[i]) * 16777619;
144 return result;
145 }
146
147 entry_type* find_bucket(const String& key) const
148 {
149 size_type h = hash(key);
150 127
151 // This does quadratic probing, effectively the following formula is used: 128 // This does quadratic probing, effectively the following formula is used:
152 // pos = (hash + 1/2 i + 1/2 i ^ 2) mod bucketCount 129 // pos = (hash + 1/2 i + 1/2 i ^ 2) mod bucketCount
153 for (size_type i = 0; ; ++i) 130 for (size_type i = 0; ; ++i)
154 { 131 {
155 // mBucketCount is 2^n so (h & mBucketCount - 1) is equivalent to 132 // mBucketCount is 2^n so (h & mBucketCount - 1) is equivalent to
156 // h % mBucketCount but significantly faster. 133 // h % mBucketCount but significantly faster.
157 entry_type* entry = &mBuckets[h & (mBucketCount - 1)]; 134 entry_type* entry = &mBuckets[h & (mBucketCount - 1)];
158 if (entry->first.is_invalid() || entry->first.equals(key)) 135 if (entry->is_invalid() || entry->equals(key))
159 return entry; 136 return entry;
160 h += i; 137 h += i;
161 } 138 }
162 } 139 }
163 140
164 void resize(size_type bucketCount) 141 void resize(size_type bucketCount)
165 { 142 {
166 std::unique_ptr<entry_type[]> oldBuckets(std::move(mBuckets)); 143 std::unique_ptr<entry_type[]> oldBuckets(std::move(mBuckets));
167 size_type oldCount = mBucketCount; 144 size_type oldCount = mBucketCount;
168 145
169 mEntryCount = 0; 146 mEntryCount = 0;
170 mBucketCount = bucketCount; 147 mBucketCount = bucketCount;
171 mBuckets.reset(new entry_type[mBucketCount]); 148 mBuckets.reset(new entry_type[mBucketCount]);
172 // Working around https://github.com/waywardmonkeys/emscripten-trace-colle ctor/issues/2 here 149 // Working around https://github.com/waywardmonkeys/emscripten-trace-colle ctor/issues/2 here
173 annotate_address(reinterpret_cast<size_type*>(mBuckets.get()) - 1, "Hash t able buffer"); 150 annotate_address(reinterpret_cast<size_type*>(mBuckets.get()) - 1, "Hash t able buffer");
174 151
175 // Copy old entries into the new buffer 152 // Copy old entries into the new buffer
176 for (size_type i = 0; i < oldCount; i++) 153 for (size_type i = 0; i < oldCount; i++)
177 { 154 {
178 entry_type& entry = oldBuckets[i]; 155 entry_type& entry = oldBuckets[i];
179 if (!entry.first.is_invalid() && !entry.first.is_deleted()) 156 if (!entry.is_invalid() && !entry.is_deleted())
180 { 157 {
181 *find_bucket(entry.first) = entry; 158 *find_bucket(entry.first) = entry;
182 mEntryCount++; 159 mEntryCount++;
183 } 160 }
184 } 161 }
185 } 162 }
186 163
187 entry_type* assign(entry_type* existing, const entry_type& entry) 164 entry_type* assign(entry_type* existing, const entry_type& entry)
188 { 165 {
189 if (existing->first.is_invalid()) 166 if (existing->is_invalid())
190 { 167 {
191 if (mEntryCount + 1 >= mBucketCount * LOAD_FACTOR) 168 if (mEntryCount + 1 >= mBucketCount * LOAD_FACTOR)
192 { 169 {
193 resize(mBucketCount << 1); 170 resize(mBucketCount << 1);
194 existing = find_bucket(entry.first); 171 existing = find_bucket(entry.first);
195 } 172 }
196 mEntryCount++; 173 mEntryCount++;
197 #if defined(DEBUG) 174 #if defined(DEBUG)
198 mInsertCounter++; 175 mInsertCounter++;
199 #endif 176 #endif
200 } 177 }
201 *existing = entry; 178 *existing = entry;
202 return existing; 179 return existing;
203 } 180 }
204 181
205 public: 182 public:
183 explicit HashContainer(size_type expectedEntries = 0)
184 : mEntryCount(0)
185 {
186 expectedEntries = ceil(expectedEntries / LOAD_FACTOR);
187 mBucketCount = MIN_BUCKETS;
188 while (mBucketCount < expectedEntries)
189 mBucketCount <<= 1;
190
191 mBuckets.reset(new entry_type[mBucketCount]);
192 // Working around https://github.com/waywardmonkeys/emscripten-trace-colle ctor/issues/2 here
193 annotate_address(reinterpret_cast<size_type*>(mBuckets.get()) - 1, "Hash t able buffer");
194 }
195
206 void insert(const entry_type& entry) 196 void insert(const entry_type& entry)
207 { 197 {
208 assign(find_bucket(entry.first), entry); 198 assign(find_bucket(entry.first), entry);
209 } 199 }
210 200
211 bool erase(const String& key) 201 bool erase(const key_type& key)
212 { 202 {
213 entry_type* entry = find_bucket(key); 203 entry_type* entry = find_bucket(key);
214 if (entry->first.is_invalid()) 204 if (entry->is_invalid())
215 return false; 205 return false;
216 206
217 entry->first.erase(); 207 entry->erase();
218 return true; 208 return true;
219 } 209 }
220 210
221 const_reference find(const String& key) const 211 const_reference find(const key_type& key) const
222 { 212 {
223 return const_reference(find_bucket(key)); 213 return const_reference(find_bucket(key));
224 } 214 }
225 215
226 const_iterator begin() const 216 const_iterator begin() const
227 { 217 {
228 return const_iterator(&mBuckets[0], &mBuckets[mBucketCount]); 218 return const_iterator(&mBuckets[0], &mBuckets[mBucketCount]);
229 } 219 }
230 220
231 const_iterator end() const 221 const_iterator end() const
232 { 222 {
233 return const_iterator(&mBuckets[mBucketCount], &mBuckets[mBucketCount]); 223 return const_iterator(&mBuckets[mBucketCount], &mBuckets[mBucketCount]);
234 } 224 }
235 225
236 size_type size() const 226 size_type size() const
237 { 227 {
238 return mEntryCount; 228 return mEntryCount;
239 } 229 }
240 }; 230 };
241 231
242 struct StringSetEntry 232 template<typename Entry, typename Value>
233 struct MapReference : public HashContainerReference<Entry>
sergei 2017/10/11 10:03:22 I would remove Value template parameter because it
Wladimir Palant 2017/10/11 18:28:31 Done.
243 { 234 {
244 StringSetEntry() {} 235 typedef HashContainerReference<Entry> super;
245 StringSetEntry(const String& key)
246 : first(key)
247 {
248 }
249
250 DependentString first;
251 };
252
253 template<typename T>
254 struct StringMapEntry
255 {
256 StringMapEntry() {}
257 StringMapEntry(const String& key)
258 : first(key), second()
259 {
260 }
261 StringMapEntry(const String& key, T value)
262 : first(key), second(value)
263 {
264 }
265
266 DependentString first;
267 T second;
268 };
269
270 template<typename T>
271 struct StringMapEntryReference : public HashContainerReference<StringMapEntry< T>>
272 {
273 typedef HashContainerReference<StringMapEntry<T>> super;
274 typedef typename super::entry_type entry_type; 236 typedef typename super::entry_type entry_type;
sergei 2017/10/11 10:03:21 entry_type is already defined in the public base c
Wladimir Palant 2017/10/11 18:28:31 Yes. However, using it in this class will produce
sergei 2017/10/17 12:58:06 Acknowledged. Interesting whether there is a diffe
275 typedef StringMap<T> map_type; 237 typedef typename entry_type::key_type key_type;
238 typedef Value value_type;
239 typedef Map<Entry, Value> map_type;
276 240
277 map_type* mMap; 241 map_type* mMap;
278 242
279 #if defined(DEBUG) 243 #if defined(DEBUG)
280 typename map_type::size_type mInsertCounter; 244 typename map_type::size_type mInsertCounter;
281 typename map_type::size_type mHash; 245 typename map_type::size_type mHash;
282 #endif 246 #endif
283 247
284 StringMapEntryReference(map_type* map, const String& key, entry_type* entry) 248 MapReference(map_type* map, const key_type& key, entry_type* entry)
285 : super(entry), mMap(map) 249 : super(entry), mMap(map)
286 { 250 {
287 #if defined(DEBUG) 251 #if defined(DEBUG)
288 mInsertCounter = mMap->mInsertCounter; 252 mInsertCounter = mMap->mInsertCounter;
289 mHash = mMap->hash(key); 253 mHash = entry_type::hash(key);
290 #endif 254 #endif
291 } 255 }
292 256
293 void assign(const String& key, const T& value) 257 void assign(const key_type& key, const value_type& value)
294 { 258 {
295 #if defined(DEBUG) 259 #if defined(DEBUG)
296 assert2(mInsertCounter == mMap->mInsertCounter, 260 assert2(mInsertCounter == mMap->mInsertCounter,
297 u"There should be no insert operations performed between map.find() an d assign()"_str); 261 u"There should be no insert operations performed between map.find() an d assign()"_str);
298 assert2(mHash == mMap->hash(key), 262 assert2(mHash == entry_type::hash(key),
299 u"The keys used in map.find() and assign() should be identical"_str); 263 u"The keys used in map.find() and assign() should be identical"_str);
300 #endif 264 #endif
301 265
302 mMap->assign(this->mEntry, entry_type(key, value)); 266 mMap->assign(this->mEntry, entry_type(key, value));
303 } 267 }
304 }; 268 };
305 } 269 }
306 270
307 class StringSet 271 template<typename Entry>
308 : public StringMap_internal::HashContainer<StringMap_internal::StringSetEntry> 272 class Set : public Map_internal::HashContainer<Entry>
309 { 273 {
274 public:
275 typedef Map_internal::HashContainer<Entry> super;
276 typedef typename super::size_type size_type;
277 typedef typename super::entry_type entry_type;
278 typedef typename super::key_type key_type;
sergei 2017/10/11 10:03:22 Why is it required to define again size_type, entr
Wladimir Palant 2017/10/11 18:28:31 Done here. For Map class they are actually require
279
280 using super::super;
310 }; 281 };
311 282
312 template<typename T> 283 template<typename Entry, typename Value>
313 class StringMap 284 class Map : public Map_internal::HashContainer<Entry>
314 : public StringMap_internal::HashContainer<StringMap_internal::StringMapEntry< T>>
315 { 285 {
316 public: 286 public:
317 typedef StringMap_internal::HashContainer<StringMap_internal::StringMapEntry<T >> super; 287 typedef Map_internal::HashContainer<Entry> super;
318 typedef typename super::size_type size_type; 288 typedef typename super::size_type size_type;
319 typedef typename super::entry_type entry_type; 289 typedef typename super::entry_type entry_type;
290 typedef typename super::key_type key_type;
291 typedef Value value_type;
320 typedef typename super::const_reference const_reference; 292 typedef typename super::const_reference const_reference;
sergei 2017/10/11 10:03:22 const_reference is already defined in the publicly
Wladimir Palant 2017/10/11 18:28:31 It's used in this class, we'll get compile error u
321 typedef StringMap_internal::StringMapEntryReference<T> reference; 293 typedef Map_internal::MapReference<entry_type, value_type> reference;
322 friend struct StringMap_internal::StringMapEntryReference<T>; 294 friend struct Map_internal::MapReference<entry_type, value_type>;
323 295
324 explicit StringMap(size_type expectedEntries = 0) 296 using super::super;
325 : super(expectedEntries)
326 {
327 }
328 297
329 StringMap(std::initializer_list<entry_type> list) 298 Map(std::initializer_list<entry_type> list)
330 : super(list.size()) 299 : super(list.size())
331 { 300 {
332 for (const auto& item : list) 301 for (const auto& item : list)
333 super::insert(item); 302 super::insert(item);
334 } 303 }
335 304
336 ~StringMap() 305 value_type& operator[](const key_type& key)
337 {
338 }
339
340 T& operator[](const String& key)
341 { 306 {
342 entry_type* entry = super::find_bucket(key); 307 entry_type* entry = super::find_bucket(key);
343 if (entry->first.is_invalid()) 308 if (entry->is_invalid())
344 entry = super::assign(entry, key); 309 entry = super::assign(entry, key);
345 return entry->second; 310 return entry->second;
346 } 311 }
347 312
348 const_reference find(const String& key) const 313 const_reference find(const key_type& key) const
sergei 2017/10/11 10:03:22 It seems this method is not needed because it's av
Wladimir Palant 2017/10/11 18:28:31 We declare a non-const variant of this method belo
sergei 2017/10/17 12:58:06 What about `using super::find;`, though it should
349 { 314 {
350 return super::find(key); 315 return super::find(key);
351 } 316 }
352 317
353 reference find(const String& key) 318 reference find(const key_type& key)
354 { 319 {
355 return reference(this, key, super::find_bucket(key)); 320 return reference(this, key, super::find_bucket(key));
356 } 321 }
357 }; 322 };
OLDNEW

Powered by Google App Engine
This is Rietveld