Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Side by Side Diff: compiled/StringMap.h

Issue 29572731: Issue 5141 - Generalize Map class to allow non-strings as keys (Closed) Base URL: https://hg.adblockplus.org/adblockpluscore
Patch Set: Introduced key_type_cref Created Dec. 4, 2017, 1:43 p.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View unified diff | Download patch
« compiled/IntMap.h ('K') | « compiled/Map.h ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * This file is part of Adblock Plus <https://adblockplus.org/>, 2 * This file is part of Adblock Plus <https://adblockplus.org/>,
3 * Copyright (C) 2006-present eyeo GmbH 3 * Copyright (C) 2006-present eyeo GmbH
4 * 4 *
5 * Adblock Plus is free software: you can redistribute it and/or modify 5 * Adblock Plus is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 3 as 6 * it under the terms of the GNU General Public License version 3 as
7 * published by the Free Software Foundation. 7 * published by the Free Software Foundation.
8 * 8 *
9 * Adblock Plus is distributed in the hope that it will be useful, 9 * Adblock Plus is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details. 12 * GNU General Public License for more details.
13 * 13 *
14 * You should have received a copy of the GNU General Public License 14 * You should have received a copy of the GNU General Public License
15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. 15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.
16 */ 16 */
17 17
18 #pragma once 18 #pragma once
19 19
20 #include <cstddef> 20 #include <cstddef>
21 #include <cmath>
22 #include <initializer_list>
23 #include <memory>
24 21
22 #include "Map.h"
25 #include "String.h" 23 #include "String.h"
26 #include "debug.h"
27
28 template<typename T>
29 class StringMap;
30 24
31 namespace StringMap_internal 25 namespace StringMap_internal
32 { 26 {
33 template<typename Entry> 27 struct StringSetEntry
34 struct HashContainerIterator
35 { 28 {
36 typedef Entry entry_type; 29 typedef DependentString key_type;
37 typedef HashContainerIterator<Entry> iterator; 30 typedef const String& key_type_cref;
31 typedef size_t size_type;
38 32
39 const entry_type* mPos; 33 key_type first;
40 const entry_type* mEnd;
41 34
42 explicit HashContainerIterator(const entry_type* start, const entry_type* en d) 35 StringSetEntry(key_type_cref key = DependentString())
sergei 2017/12/04 14:26:09 here and below the default value for `key` could b
Wladimir Palant 2017/12/04 18:28:48 It's non-trivial to initialize that static member,
43 : mPos(start), mEnd(end)
44 { 36 {
45 if (mPos != mEnd && mPos->first.is_invalid()) 37 if (!key.is_invalid())
46 ++(*this); 38 first.reset(key);
47 } 39 }
48 40
49 const entry_type& operator*() const 41 bool equals(key_type_cref other) const
50 { 42 {
51 return *mPos; 43 return first.equals(other);
52 } 44 }
53 45
54 const entry_type* operator->() const 46 bool is_invalid() const
55 { 47 {
56 return mPos; 48 return first.is_invalid();
57 } 49 }
58 50
59 iterator& operator++() 51 bool is_deleted() const
60 { 52 {
61 do { 53 return first.is_deleted();
62 ++mPos;
63 } while(mPos != mEnd && mPos->first.is_invalid());
64 return *this;
65 } 54 }
66 55
67 bool operator==(const iterator& it) const 56 void erase()
68 { 57 {
69 return mPos == it.mPos; 58 first.erase();
70 } 59 }
71 60
72 bool operator!=(const iterator& it) const 61 static size_type hash(key_type_cref key)
73 { 62 {
74 return mPos != it.mPos; 63 // FNV-1a hash function
64 size_type result = 2166136261;
65 for (String::size_type i = 0; i < key.length(); i++)
66 result = (result ^ key[i]) * 16777619;
67 return result;
75 } 68 }
76 }; 69 };
77 70
78 template<typename Entry> 71 template<typename Value>
79 struct HashContainerReference 72 struct StringMapEntry : StringSetEntry
80 { 73 {
81 typedef Entry entry_type; 74 typedef StringSetEntry super;
75 typedef Value value_type;
82 76
83 entry_type* mEntry; 77 value_type second;
84 78
85 explicit HashContainerReference(entry_type* entry) 79 StringMapEntry(key_type_cref key = DependentString(),
86 : mEntry(entry) 80 value_type value = value_type())
81 : super(key), second(value)
87 { 82 {
88 } 83 }
89 84
90 const entry_type* operator->() const 85 void erase()
91 { 86 {
92 return mEntry; 87 super::erase();
93 } 88 second = value_type();
94
95 operator bool() const
96 {
97 return !mEntry->first.is_invalid();
98 }
99 };
100
101 template<typename Entry>
102 class HashContainer
103 {
104 public:
105 typedef Entry entry_type;
106 typedef size_t size_type;
107 typedef HashContainerIterator<Entry> const_iterator;
108 typedef HashContainerReference<const Entry> const_reference;
109
110 private:
111 explicit HashContainer(const HashContainer& other);
112 void operator=(const HashContainer& other);
113
114 protected:
115 static constexpr size_type MIN_BUCKETS = 1;
116 static constexpr double LOAD_FACTOR = 0.8;
117 std::unique_ptr<entry_type[]> mBuckets;
118 size_type mBucketCount;
119 size_type mEntryCount;
120
121 #if defined(DEBUG)
122 size_type mInsertCounter;
123 #endif
124
125 explicit HashContainer(size_type expectedEntries = 0)
126 : mEntryCount(0)
127 {
128 expectedEntries = ceil(expectedEntries / LOAD_FACTOR);
129 mBucketCount = MIN_BUCKETS;
130 while (mBucketCount < expectedEntries)
131 mBucketCount <<= 1;
132
133 mBuckets.reset(new entry_type[mBucketCount]);
134 // Working around https://github.com/waywardmonkeys/emscripten-trace-colle ctor/issues/2 here
135 annotate_address(reinterpret_cast<size_type*>(mBuckets.get()) - 1, "Hash t able buffer");
136 }
137
138 static size_type hash(const String& str)
139 {
140 // FNV-1a hash function
141 size_type result = 2166136261;
142 for (String::size_type i = 0; i < str.length(); i++)
143 result = (result ^ str[i]) * 16777619;
144 return result;
145 }
146
147 entry_type* find_bucket(const String& key) const
148 {
149 size_type h = hash(key);
150
151 // This does quadratic probing, effectively the following formula is used:
152 // pos = (hash + 1/2 i + 1/2 i ^ 2) mod bucketCount
153 for (size_type i = 0; ; ++i)
154 {
155 // mBucketCount is 2^n so (h & mBucketCount - 1) is equivalent to
156 // h % mBucketCount but significantly faster.
157 entry_type* entry = &mBuckets[h & (mBucketCount - 1)];
158 if (entry->first.is_invalid() || entry->first.equals(key))
159 return entry;
160 h += i;
161 }
162 }
163
164 void resize(size_type bucketCount)
165 {
166 std::unique_ptr<entry_type[]> oldBuckets(std::move(mBuckets));
167 size_type oldCount = mBucketCount;
168
169 mEntryCount = 0;
170 mBucketCount = bucketCount;
171 mBuckets.reset(new entry_type[mBucketCount]);
172 // Working around https://github.com/waywardmonkeys/emscripten-trace-colle ctor/issues/2 here
173 annotate_address(reinterpret_cast<size_type*>(mBuckets.get()) - 1, "Hash t able buffer");
174
175 // Copy old entries into the new buffer
176 for (size_type i = 0; i < oldCount; i++)
177 {
178 entry_type& entry = oldBuckets[i];
179 if (!entry.first.is_invalid() && !entry.first.is_deleted())
180 {
181 *find_bucket(entry.first) = entry;
182 mEntryCount++;
183 }
184 }
185 }
186
187 entry_type* assign(entry_type* existing, const entry_type& entry)
188 {
189 if (existing->first.is_invalid())
190 {
191 if (mEntryCount + 1 >= mBucketCount * LOAD_FACTOR)
192 {
193 resize(mBucketCount << 1);
194 existing = find_bucket(entry.first);
195 }
196 mEntryCount++;
197 #if defined(DEBUG)
198 mInsertCounter++;
199 #endif
200 }
201 *existing = entry;
202 return existing;
203 }
204
205 public:
206 void insert(const entry_type& entry)
207 {
208 assign(find_bucket(entry.first), entry);
209 }
210
211 bool erase(const String& key)
212 {
213 entry_type* entry = find_bucket(key);
214 if (entry->first.is_invalid())
215 return false;
216
217 entry->first.erase();
218 return true;
219 }
220
221 const_reference find(const String& key) const
222 {
223 return const_reference(find_bucket(key));
224 }
225
226 const_iterator begin() const
227 {
228 return const_iterator(&mBuckets[0], &mBuckets[mBucketCount]);
229 }
230
231 const_iterator end() const
232 {
233 return const_iterator(&mBuckets[mBucketCount], &mBuckets[mBucketCount]);
234 }
235
236 size_type size() const
237 {
238 return mEntryCount;
239 }
240 };
241
242 struct StringSetEntry
243 {
244 StringSetEntry() {}
245 StringSetEntry(const String& key)
246 : first(key)
247 {
248 }
249
250 DependentString first;
251 };
252
253 template<typename T>
254 struct StringMapEntry
255 {
256 StringMapEntry() {}
257 StringMapEntry(const String& key)
258 : first(key), second()
259 {
260 }
261 StringMapEntry(const String& key, T value)
262 : first(key), second(value)
263 {
264 }
265
266 DependentString first;
267 T second;
268 };
269
270 template<typename T>
271 struct StringMapEntryReference : public HashContainerReference<StringMapEntry< T>>
272 {
273 typedef HashContainerReference<StringMapEntry<T>> super;
274 typedef typename super::entry_type entry_type;
275 typedef StringMap<T> map_type;
276
277 map_type* mMap;
278
279 #if defined(DEBUG)
280 typename map_type::size_type mInsertCounter;
281 typename map_type::size_type mHash;
282 #endif
283
284 StringMapEntryReference(map_type* map, const String& key, entry_type* entry)
285 : super(entry), mMap(map)
286 {
287 #if defined(DEBUG)
288 mInsertCounter = mMap->mInsertCounter;
289 mHash = mMap->hash(key);
290 #endif
291 }
292
293 void assign(const String& key, const T& value)
294 {
295 #if defined(DEBUG)
296 assert2(mInsertCounter == mMap->mInsertCounter,
297 u"There should be no insert operations performed between map.find() an d assign()"_str);
298 assert2(mHash == mMap->hash(key),
299 u"The keys used in map.find() and assign() should be identical"_str);
300 #endif
301
302 mMap->assign(this->mEntry, entry_type(key, value));
303 } 89 }
304 }; 90 };
305 } 91 }
306 92
307 class StringSet 93 using StringSet = Set<StringMap_internal::StringSetEntry>;
308 : public StringMap_internal::HashContainer<StringMap_internal::StringSetEntry>
309 {
310 };
311 94
312 template<typename T> 95 template<typename Value>
313 class StringMap 96 using StringMap = Map<StringMap_internal::StringMapEntry<Value>>;
314 : public StringMap_internal::HashContainer<StringMap_internal::StringMapEntry< T>>
315 {
316 public:
317 typedef StringMap_internal::HashContainer<StringMap_internal::StringMapEntry<T >> super;
318 typedef typename super::size_type size_type;
319 typedef typename super::entry_type entry_type;
320 typedef typename super::const_reference const_reference;
321 typedef StringMap_internal::StringMapEntryReference<T> reference;
322 friend struct StringMap_internal::StringMapEntryReference<T>;
323
324 explicit StringMap(size_type expectedEntries = 0)
325 : super(expectedEntries)
326 {
327 }
328
329 StringMap(std::initializer_list<entry_type> list)
330 : super(list.size())
331 {
332 for (const auto& item : list)
333 super::insert(item);
334 }
335
336 ~StringMap()
337 {
338 }
339
340 T& operator[](const String& key)
341 {
342 entry_type* entry = super::find_bucket(key);
343 if (entry->first.is_invalid())
344 entry = super::assign(entry, key);
345 return entry->second;
346 }
347
348 const_reference find(const String& key) const
349 {
350 return super::find(key);
351 }
352
353 reference find(const String& key)
354 {
355 return reference(this, key, super::find_bucket(key));
356 }
357 };
OLDNEW
« compiled/IntMap.h ('K') | « compiled/Map.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld