| Left: | ||
| Right: |
| OLD | NEW |
|---|---|
| 1 /* | 1 /* |
| 2 * This file is part of Adblock Plus <https://adblockplus.org/>, | 2 * This file is part of Adblock Plus <https://adblockplus.org/>, |
| 3 * Copyright (C) 2006-present eyeo GmbH | 3 * Copyright (C) 2006-present eyeo GmbH |
| 4 * | 4 * |
| 5 * Adblock Plus is free software: you can redistribute it and/or modify | 5 * Adblock Plus is free software: you can redistribute it and/or modify |
| 6 * it under the terms of the GNU General Public License version 3 as | 6 * it under the terms of the GNU General Public License version 3 as |
| 7 * published by the Free Software Foundation. | 7 * published by the Free Software Foundation. |
| 8 * | 8 * |
| 9 * Adblock Plus is distributed in the hope that it will be useful, | 9 * Adblock Plus is distributed in the hope that it will be useful, |
| 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of | 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 12 * GNU General Public License for more details. | 12 * GNU General Public License for more details. |
| 13 * | 13 * |
| 14 * You should have received a copy of the GNU General Public License | 14 * You should have received a copy of the GNU General Public License |
| 15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. | 15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. |
| 16 */ | 16 */ |
| 17 | 17 |
| 18 #pragma once | 18 #pragma once |
| 19 | 19 |
| 20 #include <cstddef> | |
| 21 #include <cmath> | 20 #include <cmath> |
| 22 #include <initializer_list> | 21 #include <initializer_list> |
| 23 #include <memory> | 22 #include <memory> |
| 24 | 23 |
| 25 #include "String.h" | |
| 26 #include "debug.h" | 24 #include "debug.h" |
| 27 | 25 |
| 28 template<typename T> | 26 template<typename Entry, typename Value> |
| 29 class StringMap; | 27 class Map; |
| 30 | 28 |
| 31 namespace StringMap_internal | 29 namespace Map_internal |
| 32 { | 30 { |
| 33 template<typename Entry> | 31 template<typename Entry> |
| 34 struct HashContainerIterator | 32 struct HashContainerIterator |
| 35 { | 33 { |
| 36 typedef Entry entry_type; | 34 typedef Entry entry_type; |
| 37 typedef HashContainerIterator<Entry> iterator; | 35 typedef HashContainerIterator<Entry> iterator; |
| 38 | 36 |
| 39 const entry_type* mPos; | 37 const entry_type* mPos; |
| 40 const entry_type* mEnd; | 38 const entry_type* mEnd; |
| 41 | 39 |
| 42 explicit HashContainerIterator(const entry_type* start, const entry_type* en d) | 40 explicit HashContainerIterator(const entry_type* start, const entry_type* en d) |
| 43 : mPos(start), mEnd(end) | 41 : mPos(start), mEnd(end) |
| 44 { | 42 { |
| 45 if (mPos != mEnd && mPos->first.is_invalid()) | 43 if (mPos != mEnd && mPos->is_invalid()) |
| 46 ++(*this); | 44 ++(*this); |
| 47 } | 45 } |
| 48 | 46 |
| 49 const entry_type& operator*() const | 47 const entry_type& operator*() const |
| 50 { | 48 { |
| 51 return *mPos; | 49 return *mPos; |
| 52 } | 50 } |
| 53 | 51 |
| 54 const entry_type* operator->() const | 52 const entry_type* operator->() const |
| 55 { | 53 { |
| 56 return mPos; | 54 return mPos; |
| 57 } | 55 } |
| 58 | 56 |
| 59 iterator& operator++() | 57 iterator& operator++() |
| 60 { | 58 { |
| 61 do { | 59 do { |
| 62 ++mPos; | 60 ++mPos; |
| 63 } while(mPos != mEnd && mPos->first.is_invalid()); | 61 } while(mPos != mEnd && mPos->is_invalid()); |
| 64 return *this; | 62 return *this; |
| 65 } | 63 } |
| 66 | 64 |
| 67 bool operator==(const iterator& it) const | 65 bool operator==(const iterator& it) const |
| 68 { | 66 { |
| 69 return mPos == it.mPos; | 67 return mPos == it.mPos; |
| 70 } | 68 } |
| 71 | 69 |
| 72 bool operator!=(const iterator& it) const | 70 bool operator!=(const iterator& it) const |
| 73 { | 71 { |
| (...skipping 13 matching lines...) Expand all Loading... | |
| 87 { | 85 { |
| 88 } | 86 } |
| 89 | 87 |
| 90 const entry_type* operator->() const | 88 const entry_type* operator->() const |
| 91 { | 89 { |
| 92 return mEntry; | 90 return mEntry; |
| 93 } | 91 } |
| 94 | 92 |
| 95 operator bool() const | 93 operator bool() const |
| 96 { | 94 { |
| 97 return !mEntry->first.is_invalid(); | 95 return !mEntry->is_invalid(); |
| 98 } | 96 } |
| 99 }; | 97 }; |
| 100 | 98 |
| 101 template<typename Entry> | 99 template<typename Entry> |
| 102 class HashContainer | 100 class HashContainer |
| 103 { | 101 { |
| 104 public: | 102 public: |
| 105 typedef Entry entry_type; | 103 typedef Entry entry_type; |
| 106 typedef size_t size_type; | 104 typedef typename Entry::key_type key_type; |
| 105 typedef typename entry_type::size_type size_type; | |
| 107 typedef HashContainerIterator<Entry> const_iterator; | 106 typedef HashContainerIterator<Entry> const_iterator; |
| 108 typedef HashContainerReference<const Entry> const_reference; | 107 typedef HashContainerReference<const Entry> const_reference; |
| 109 | 108 |
| 110 private: | 109 private: |
| 111 explicit HashContainer(const HashContainer& other); | 110 explicit HashContainer(const HashContainer& other); |
| 112 void operator=(const HashContainer& other); | 111 void operator=(const HashContainer& other); |
| 113 | 112 |
| 114 protected: | 113 protected: |
| 115 static constexpr size_type MIN_BUCKETS = 1; | 114 static constexpr size_type MIN_BUCKETS = 1; |
| 116 static constexpr double LOAD_FACTOR = 0.8; | 115 static constexpr double LOAD_FACTOR = 0.8; |
| 117 std::unique_ptr<entry_type[]> mBuckets; | 116 std::unique_ptr<entry_type[]> mBuckets; |
| 118 size_type mBucketCount; | 117 size_type mBucketCount; |
| 119 size_type mEntryCount; | 118 size_type mEntryCount; |
| 120 | 119 |
| 121 #if defined(DEBUG) | 120 #if defined(DEBUG) |
| 122 size_type mInsertCounter; | 121 size_type mInsertCounter; |
| 123 #endif | 122 #endif |
| 124 | 123 |
| 125 explicit HashContainer(size_type expectedEntries = 0) | 124 entry_type* find_bucket(const key_type& key) const |
| 126 : mEntryCount(0) | |
| 127 { | 125 { |
| 128 expectedEntries = ceil(expectedEntries / LOAD_FACTOR); | 126 size_type h = entry_type::hash(key); |
| 129 mBucketCount = MIN_BUCKETS; | |
| 130 while (mBucketCount < expectedEntries) | |
| 131 mBucketCount <<= 1; | |
| 132 | |
| 133 mBuckets.reset(new entry_type[mBucketCount]); | |
| 134 // Working around https://github.com/waywardmonkeys/emscripten-trace-colle ctor/issues/2 here | |
| 135 annotate_address(reinterpret_cast<size_type*>(mBuckets.get()) - 1, "Hash t able buffer"); | |
| 136 } | |
| 137 | |
| 138 static size_type hash(const String& str) | |
| 139 { | |
| 140 // FNV-1a hash function | |
| 141 size_type result = 2166136261; | |
| 142 for (String::size_type i = 0; i < str.length(); i++) | |
| 143 result = (result ^ str[i]) * 16777619; | |
| 144 return result; | |
| 145 } | |
| 146 | |
| 147 entry_type* find_bucket(const String& key) const | |
| 148 { | |
| 149 size_type h = hash(key); | |
| 150 | 127 |
| 151 // This does quadratic probing, effectively the following formula is used: | 128 // This does quadratic probing, effectively the following formula is used: |
| 152 // pos = (hash + 1/2 i + 1/2 i ^ 2) mod bucketCount | 129 // pos = (hash + 1/2 i + 1/2 i ^ 2) mod bucketCount |
| 153 for (size_type i = 0; ; ++i) | 130 for (size_type i = 0; ; ++i) |
| 154 { | 131 { |
| 155 // mBucketCount is 2^n so (h & mBucketCount - 1) is equivalent to | 132 // mBucketCount is 2^n so (h & mBucketCount - 1) is equivalent to |
| 156 // h % mBucketCount but significantly faster. | 133 // h % mBucketCount but significantly faster. |
| 157 entry_type* entry = &mBuckets[h & (mBucketCount - 1)]; | 134 entry_type* entry = &mBuckets[h & (mBucketCount - 1)]; |
| 158 if (entry->first.is_invalid() || entry->first.equals(key)) | 135 if (entry->is_invalid() || entry->equals(key)) |
| 159 return entry; | 136 return entry; |
| 160 h += i; | 137 h += i; |
| 161 } | 138 } |
| 162 } | 139 } |
| 163 | 140 |
| 164 void resize(size_type bucketCount) | 141 void resize(size_type bucketCount) |
| 165 { | 142 { |
| 166 std::unique_ptr<entry_type[]> oldBuckets(std::move(mBuckets)); | 143 std::unique_ptr<entry_type[]> oldBuckets(std::move(mBuckets)); |
| 167 size_type oldCount = mBucketCount; | 144 size_type oldCount = mBucketCount; |
| 168 | 145 |
| 169 mEntryCount = 0; | 146 mEntryCount = 0; |
| 170 mBucketCount = bucketCount; | 147 mBucketCount = bucketCount; |
| 171 mBuckets.reset(new entry_type[mBucketCount]); | 148 mBuckets.reset(new entry_type[mBucketCount]); |
| 172 // Working around https://github.com/waywardmonkeys/emscripten-trace-colle ctor/issues/2 here | 149 // Working around https://github.com/waywardmonkeys/emscripten-trace-colle ctor/issues/2 here |
| 173 annotate_address(reinterpret_cast<size_type*>(mBuckets.get()) - 1, "Hash t able buffer"); | 150 annotate_address(reinterpret_cast<size_type*>(mBuckets.get()) - 1, "Hash t able buffer"); |
| 174 | 151 |
| 175 // Copy old entries into the new buffer | 152 // Copy old entries into the new buffer |
| 176 for (size_type i = 0; i < oldCount; i++) | 153 for (size_type i = 0; i < oldCount; i++) |
| 177 { | 154 { |
| 178 entry_type& entry = oldBuckets[i]; | 155 entry_type& entry = oldBuckets[i]; |
| 179 if (!entry.first.is_invalid() && !entry.first.is_deleted()) | 156 if (!entry.is_invalid() && !entry.is_deleted()) |
| 180 { | 157 { |
| 181 *find_bucket(entry.first) = entry; | 158 *find_bucket(entry.first) = entry; |
| 182 mEntryCount++; | 159 mEntryCount++; |
| 183 } | 160 } |
| 184 } | 161 } |
| 185 } | 162 } |
| 186 | 163 |
| 187 entry_type* assign(entry_type* existing, const entry_type& entry) | 164 entry_type* assign(entry_type* existing, const entry_type& entry) |
| 188 { | 165 { |
| 189 if (existing->first.is_invalid()) | 166 if (existing->is_invalid()) |
| 190 { | 167 { |
| 191 if (mEntryCount + 1 >= mBucketCount * LOAD_FACTOR) | 168 if (mEntryCount + 1 >= mBucketCount * LOAD_FACTOR) |
| 192 { | 169 { |
| 193 resize(mBucketCount << 1); | 170 resize(mBucketCount << 1); |
| 194 existing = find_bucket(entry.first); | 171 existing = find_bucket(entry.first); |
| 195 } | 172 } |
| 196 mEntryCount++; | 173 mEntryCount++; |
| 197 #if defined(DEBUG) | 174 #if defined(DEBUG) |
| 198 mInsertCounter++; | 175 mInsertCounter++; |
| 199 #endif | 176 #endif |
| 200 } | 177 } |
| 201 *existing = entry; | 178 *existing = entry; |
| 202 return existing; | 179 return existing; |
| 203 } | 180 } |
| 204 | 181 |
| 205 public: | 182 public: |
| 183 explicit HashContainer(size_type expectedEntries = 0) | |
| 184 : mEntryCount(0) | |
| 185 { | |
| 186 expectedEntries = ceil(expectedEntries / LOAD_FACTOR); | |
| 187 mBucketCount = MIN_BUCKETS; | |
| 188 while (mBucketCount < expectedEntries) | |
| 189 mBucketCount <<= 1; | |
| 190 | |
| 191 mBuckets.reset(new entry_type[mBucketCount]); | |
| 192 // Working around https://github.com/waywardmonkeys/emscripten-trace-colle ctor/issues/2 here | |
| 193 annotate_address(reinterpret_cast<size_type*>(mBuckets.get()) - 1, "Hash t able buffer"); | |
| 194 } | |
| 195 | |
| 206 void insert(const entry_type& entry) | 196 void insert(const entry_type& entry) |
| 207 { | 197 { |
| 208 assign(find_bucket(entry.first), entry); | 198 assign(find_bucket(entry.first), entry); |
| 209 } | 199 } |
| 210 | 200 |
| 211 bool erase(const String& key) | 201 bool erase(const key_type& key) |
| 212 { | 202 { |
| 213 entry_type* entry = find_bucket(key); | 203 entry_type* entry = find_bucket(key); |
| 214 if (entry->first.is_invalid()) | 204 if (entry->is_invalid()) |
| 215 return false; | 205 return false; |
| 216 | 206 |
| 217 entry->first.erase(); | 207 entry->erase(); |
| 218 return true; | 208 return true; |
| 219 } | 209 } |
| 220 | 210 |
| 221 const_reference find(const String& key) const | 211 const_reference find(const key_type& key) const |
| 222 { | 212 { |
| 223 return const_reference(find_bucket(key)); | 213 return const_reference(find_bucket(key)); |
| 224 } | 214 } |
| 225 | 215 |
| 226 const_iterator begin() const | 216 const_iterator begin() const |
| 227 { | 217 { |
| 228 return const_iterator(&mBuckets[0], &mBuckets[mBucketCount]); | 218 return const_iterator(&mBuckets[0], &mBuckets[mBucketCount]); |
| 229 } | 219 } |
| 230 | 220 |
| 231 const_iterator end() const | 221 const_iterator end() const |
| 232 { | 222 { |
| 233 return const_iterator(&mBuckets[mBucketCount], &mBuckets[mBucketCount]); | 223 return const_iterator(&mBuckets[mBucketCount], &mBuckets[mBucketCount]); |
| 234 } | 224 } |
| 235 | 225 |
| 236 size_type size() const | 226 size_type size() const |
| 237 { | 227 { |
| 238 return mEntryCount; | 228 return mEntryCount; |
| 239 } | 229 } |
| 240 }; | 230 }; |
| 241 | 231 |
| 242 struct StringSetEntry | 232 template<typename Entry, typename Value> |
| 233 struct MapReference : public HashContainerReference<Entry> | |
|
sergei
2017/10/11 10:03:22
I would remove Value template parameter because it
Wladimir Palant
2017/10/11 18:28:31
Done.
| |
| 243 { | 234 { |
| 244 StringSetEntry() {} | 235 typedef HashContainerReference<Entry> super; |
| 245 StringSetEntry(const String& key) | |
| 246 : first(key) | |
| 247 { | |
| 248 } | |
| 249 | |
| 250 DependentString first; | |
| 251 }; | |
| 252 | |
| 253 template<typename T> | |
| 254 struct StringMapEntry | |
| 255 { | |
| 256 StringMapEntry() {} | |
| 257 StringMapEntry(const String& key) | |
| 258 : first(key), second() | |
| 259 { | |
| 260 } | |
| 261 StringMapEntry(const String& key, T value) | |
| 262 : first(key), second(value) | |
| 263 { | |
| 264 } | |
| 265 | |
| 266 DependentString first; | |
| 267 T second; | |
| 268 }; | |
| 269 | |
| 270 template<typename T> | |
| 271 struct StringMapEntryReference : public HashContainerReference<StringMapEntry< T>> | |
| 272 { | |
| 273 typedef HashContainerReference<StringMapEntry<T>> super; | |
| 274 typedef typename super::entry_type entry_type; | 236 typedef typename super::entry_type entry_type; |
|
sergei
2017/10/11 10:03:21
entry_type is already defined in the public base c
Wladimir Palant
2017/10/11 18:28:31
Yes. However, using it in this class will produce
sergei
2017/10/17 12:58:06
Acknowledged. Interesting whether there is a diffe
| |
| 275 typedef StringMap<T> map_type; | 237 typedef typename entry_type::key_type key_type; |
| 238 typedef Value value_type; | |
| 239 typedef Map<Entry, Value> map_type; | |
| 276 | 240 |
| 277 map_type* mMap; | 241 map_type* mMap; |
| 278 | 242 |
| 279 #if defined(DEBUG) | 243 #if defined(DEBUG) |
| 280 typename map_type::size_type mInsertCounter; | 244 typename map_type::size_type mInsertCounter; |
| 281 typename map_type::size_type mHash; | 245 typename map_type::size_type mHash; |
| 282 #endif | 246 #endif |
| 283 | 247 |
| 284 StringMapEntryReference(map_type* map, const String& key, entry_type* entry) | 248 MapReference(map_type* map, const key_type& key, entry_type* entry) |
| 285 : super(entry), mMap(map) | 249 : super(entry), mMap(map) |
| 286 { | 250 { |
| 287 #if defined(DEBUG) | 251 #if defined(DEBUG) |
| 288 mInsertCounter = mMap->mInsertCounter; | 252 mInsertCounter = mMap->mInsertCounter; |
| 289 mHash = mMap->hash(key); | 253 mHash = entry_type::hash(key); |
| 290 #endif | 254 #endif |
| 291 } | 255 } |
| 292 | 256 |
| 293 void assign(const String& key, const T& value) | 257 void assign(const key_type& key, const value_type& value) |
| 294 { | 258 { |
| 295 #if defined(DEBUG) | 259 #if defined(DEBUG) |
| 296 assert2(mInsertCounter == mMap->mInsertCounter, | 260 assert2(mInsertCounter == mMap->mInsertCounter, |
| 297 u"There should be no insert operations performed between map.find() an d assign()"_str); | 261 u"There should be no insert operations performed between map.find() an d assign()"_str); |
| 298 assert2(mHash == mMap->hash(key), | 262 assert2(mHash == entry_type::hash(key), |
| 299 u"The keys used in map.find() and assign() should be identical"_str); | 263 u"The keys used in map.find() and assign() should be identical"_str); |
| 300 #endif | 264 #endif |
| 301 | 265 |
| 302 mMap->assign(this->mEntry, entry_type(key, value)); | 266 mMap->assign(this->mEntry, entry_type(key, value)); |
| 303 } | 267 } |
| 304 }; | 268 }; |
| 305 } | 269 } |
| 306 | 270 |
| 307 class StringSet | 271 template<typename Entry> |
| 308 : public StringMap_internal::HashContainer<StringMap_internal::StringSetEntry> | 272 class Set : public Map_internal::HashContainer<Entry> |
| 309 { | 273 { |
| 274 public: | |
| 275 typedef Map_internal::HashContainer<Entry> super; | |
| 276 typedef typename super::size_type size_type; | |
| 277 typedef typename super::entry_type entry_type; | |
| 278 typedef typename super::key_type key_type; | |
|
sergei
2017/10/11 10:03:22
Why is it required to define again size_type, entr
Wladimir Palant
2017/10/11 18:28:31
Done here. For Map class they are actually require
| |
| 279 | |
| 280 using super::super; | |
| 310 }; | 281 }; |
| 311 | 282 |
| 312 template<typename T> | 283 template<typename Entry, typename Value> |
| 313 class StringMap | 284 class Map : public Map_internal::HashContainer<Entry> |
| 314 : public StringMap_internal::HashContainer<StringMap_internal::StringMapEntry< T>> | |
| 315 { | 285 { |
| 316 public: | 286 public: |
| 317 typedef StringMap_internal::HashContainer<StringMap_internal::StringMapEntry<T >> super; | 287 typedef Map_internal::HashContainer<Entry> super; |
| 318 typedef typename super::size_type size_type; | 288 typedef typename super::size_type size_type; |
| 319 typedef typename super::entry_type entry_type; | 289 typedef typename super::entry_type entry_type; |
| 290 typedef typename super::key_type key_type; | |
| 291 typedef Value value_type; | |
| 320 typedef typename super::const_reference const_reference; | 292 typedef typename super::const_reference const_reference; |
|
sergei
2017/10/11 10:03:22
const_reference is already defined in the publicly
Wladimir Palant
2017/10/11 18:28:31
It's used in this class, we'll get compile error u
| |
| 321 typedef StringMap_internal::StringMapEntryReference<T> reference; | 293 typedef Map_internal::MapReference<entry_type, value_type> reference; |
| 322 friend struct StringMap_internal::StringMapEntryReference<T>; | 294 friend struct Map_internal::MapReference<entry_type, value_type>; |
| 323 | 295 |
| 324 explicit StringMap(size_type expectedEntries = 0) | 296 using super::super; |
| 325 : super(expectedEntries) | |
| 326 { | |
| 327 } | |
| 328 | 297 |
| 329 StringMap(std::initializer_list<entry_type> list) | 298 Map(std::initializer_list<entry_type> list) |
| 330 : super(list.size()) | 299 : super(list.size()) |
| 331 { | 300 { |
| 332 for (const auto& item : list) | 301 for (const auto& item : list) |
| 333 super::insert(item); | 302 super::insert(item); |
| 334 } | 303 } |
| 335 | 304 |
| 336 ~StringMap() | 305 value_type& operator[](const key_type& key) |
| 337 { | |
| 338 } | |
| 339 | |
| 340 T& operator[](const String& key) | |
| 341 { | 306 { |
| 342 entry_type* entry = super::find_bucket(key); | 307 entry_type* entry = super::find_bucket(key); |
| 343 if (entry->first.is_invalid()) | 308 if (entry->is_invalid()) |
| 344 entry = super::assign(entry, key); | 309 entry = super::assign(entry, key); |
| 345 return entry->second; | 310 return entry->second; |
| 346 } | 311 } |
| 347 | 312 |
| 348 const_reference find(const String& key) const | 313 const_reference find(const key_type& key) const |
|
sergei
2017/10/11 10:03:22
It seems this method is not needed because it's av
Wladimir Palant
2017/10/11 18:28:31
We declare a non-const variant of this method belo
sergei
2017/10/17 12:58:06
What about `using super::find;`, though it should
| |
| 349 { | 314 { |
| 350 return super::find(key); | 315 return super::find(key); |
| 351 } | 316 } |
| 352 | 317 |
| 353 reference find(const String& key) | 318 reference find(const key_type& key) |
| 354 { | 319 { |
| 355 return reference(this, key, super::find_bucket(key)); | 320 return reference(this, key, super::find_bucket(key)); |
| 356 } | 321 } |
| 357 }; | 322 }; |
| OLD | NEW |