| LEFT | RIGHT | 
|---|
| 1 #ifndef ADBLOCK_PLUS_STRING_MAP_H | 1 #pragma once | 
| 2 #define ADBLOCK_PLUS_STRING_MAP_H |  | 
| 3 | 2 | 
| 4 #include <cstddef> | 3 #include <cstddef> | 
| 5 #include <cmath> | 4 #include <cmath> | 
| 6 #include <initializer_list> | 5 #include <initializer_list> | 
|  | 6 #include <memory> | 
| 7 | 7 | 
| 8 #include "String.h" | 8 #include "String.h" | 
| 9 #include "debug.h" | 9 #include "debug.h" | 
| 10 | 10 | 
| 11 template<class Entry> | 11 template<typename T> | 
| 12 struct _HashContainerIterator | 12 class StringMap; | 
|  | 13 | 
|  | 14 namespace StringMap_internal | 
| 13 { | 15 { | 
| 14   typedef Entry entry_type; | 16   template<typename Entry> | 
| 15   typedef _HashContainerIterator<Entry> iterator; | 17   struct HashContainerIterator | 
| 16 | 18   { | 
| 17   const entry_type* mPos; | 19     typedef Entry entry_type; | 
| 18   const entry_type* mEnd; | 20     typedef HashContainerIterator<Entry> iterator; | 
| 19 | 21 | 
| 20   explicit _HashContainerIterator(const entry_type* start, const entry_type* end
     ) | 22     const entry_type* mPos; | 
| 21       : mPos(start), mEnd(end) | 23     const entry_type* mEnd; | 
| 22   { | 24 | 
| 23     if (mPos != mEnd && mPos->first.is_invalid()) | 25     explicit HashContainerIterator(const entry_type* start, const entry_type* en
     d) | 
| 24       ++(*this); | 26         : mPos(start), mEnd(end) | 
| 25   } | 27     { | 
| 26 | 28       if (mPos != mEnd && mPos->first.is_invalid()) | 
| 27   _HashContainerIterator() | 29         ++(*this); | 
| 28   { | 30     } | 
| 29   } | 31 | 
| 30 | 32     const entry_type& operator*() const | 
| 31   const entry_type& operator*() const | 33     { | 
| 32   { | 34       return *mPos; | 
| 33     return *mPos; | 35     } | 
| 34   } | 36 | 
| 35 | 37     const entry_type* operator->() const | 
| 36   const entry_type* operator->() const | 38     { | 
| 37   { | 39       return mPos; | 
| 38     return mPos; | 40     } | 
| 39   } | 41 | 
| 40 | 42     iterator& operator++() | 
| 41   iterator& operator++() | 43     { | 
| 42   { | 44       do { | 
| 43     do { | 45         ++mPos; | 
| 44       ++mPos; | 46       } while(mPos != mEnd && mPos->first.is_invalid()); | 
| 45     } while(mPos != mEnd && mPos->first.is_invalid()); | 47       return *this; | 
| 46     return *this; | 48     } | 
| 47   } | 49 | 
| 48 | 50     bool operator==(const iterator& it) const | 
| 49   bool operator==(const iterator& it) const | 51     { | 
| 50   { | 52       return mPos == it.mPos; | 
| 51     return mPos == it.mPos; | 53     } | 
| 52   } | 54 | 
| 53 | 55     bool operator!=(const iterator& it) const | 
| 54   bool operator!=(const iterator& it) const | 56     { | 
| 55   { | 57       return mPos != it.mPos; | 
| 56     return mPos != it.mPos; | 58     } | 
| 57   } | 59   }; | 
|  | 60 | 
|  | 61   template<typename Entry> | 
|  | 62   struct HashContainerReference | 
|  | 63   { | 
|  | 64     typedef Entry entry_type; | 
|  | 65 | 
|  | 66     entry_type* mEntry; | 
|  | 67 | 
|  | 68     explicit HashContainerReference(entry_type* entry) | 
|  | 69         : mEntry(entry) | 
|  | 70     { | 
|  | 71     } | 
|  | 72 | 
|  | 73     const entry_type* operator->() const | 
|  | 74     { | 
|  | 75       return mEntry; | 
|  | 76     } | 
|  | 77 | 
|  | 78     operator bool() const | 
|  | 79     { | 
|  | 80       return !mEntry->first.is_invalid(); | 
|  | 81     } | 
|  | 82   }; | 
|  | 83 | 
|  | 84   template<typename Entry> | 
|  | 85   class HashContainer | 
|  | 86   { | 
|  | 87   public: | 
|  | 88     typedef Entry entry_type; | 
|  | 89     typedef size_t size_type; | 
|  | 90     typedef HashContainerIterator<Entry> const_iterator; | 
|  | 91     typedef HashContainerReference<const Entry> const_reference; | 
|  | 92 | 
|  | 93   private: | 
|  | 94     explicit HashContainer(const HashContainer& other); | 
|  | 95     void operator=(const HashContainer& other); | 
|  | 96 | 
|  | 97   protected: | 
|  | 98     static constexpr size_type MIN_BUCKETS = 1; | 
|  | 99     static constexpr double LOAD_FACTOR = 0.8; | 
|  | 100     std::unique_ptr<entry_type[]> mBuckets; | 
|  | 101     size_type mBucketCount; | 
|  | 102     size_type mEntryCount; | 
|  | 103 | 
|  | 104 #if defined(DEBUG) | 
|  | 105     size_type mInsertCounter; | 
|  | 106 #endif | 
|  | 107 | 
|  | 108     explicit HashContainer(size_type expectedEntries = 0) | 
|  | 109         : mEntryCount(0) | 
|  | 110     { | 
|  | 111       expectedEntries = ceil(expectedEntries / LOAD_FACTOR); | 
|  | 112       mBucketCount = MIN_BUCKETS; | 
|  | 113       while (mBucketCount < expectedEntries) | 
|  | 114         mBucketCount <<= 1; | 
|  | 115 | 
|  | 116       mBuckets.reset(new entry_type[mBucketCount]); | 
|  | 117       // Working around https://github.com/waywardmonkeys/emscripten-trace-colle
     ctor/issues/2 here | 
|  | 118       annotate_address(reinterpret_cast<size_type*>(mBuckets.get()) - 1, "Hash t
     able buffer"); | 
|  | 119     } | 
|  | 120 | 
|  | 121     static size_type hash(const String& str) | 
|  | 122     { | 
|  | 123       // FNV-1a hash function | 
|  | 124       size_type result = 2166136261; | 
|  | 125       for (String::size_type i = 0; i < str.length(); i++) | 
|  | 126         result = (result ^ str[i]) * 16777619; | 
|  | 127       return result; | 
|  | 128     } | 
|  | 129 | 
|  | 130     entry_type* find_bucket(const String& key) const | 
|  | 131     { | 
|  | 132       size_type h = hash(key); | 
|  | 133 | 
|  | 134       // This does quadratic probing, effectively the following formula is used: | 
|  | 135       // pos = (hash + 1/2 i + 1/2 i ^ 2) mod bucketCount | 
|  | 136       for (size_type i = 0; ; ++i) | 
|  | 137       { | 
|  | 138         // mBucketCount is 2^n so (h & mBucketCount - 1) is equivalent to | 
|  | 139         // h % mBucketCount but significantly faster. | 
|  | 140         entry_type* entry = &mBuckets[h & (mBucketCount - 1)]; | 
|  | 141         if (entry->first.is_invalid() || entry->first.equals(key)) | 
|  | 142           return entry; | 
|  | 143         h += i; | 
|  | 144       } | 
|  | 145     } | 
|  | 146 | 
|  | 147     void resize(size_type bucketCount) | 
|  | 148     { | 
|  | 149       std::unique_ptr<entry_type[]> oldBuckets(std::move(mBuckets)); | 
|  | 150       size_type oldCount = mBucketCount; | 
|  | 151 | 
|  | 152       mEntryCount = 0; | 
|  | 153       mBucketCount = bucketCount; | 
|  | 154       mBuckets.reset(new entry_type[mBucketCount]); | 
|  | 155       // Working around https://github.com/waywardmonkeys/emscripten-trace-colle
     ctor/issues/2 here | 
|  | 156       annotate_address(reinterpret_cast<size_type*>(mBuckets.get()) - 1, "Hash t
     able buffer"); | 
|  | 157 | 
|  | 158       // Copy old entries into the new buffer | 
|  | 159       for (size_type i = 0; i < oldCount; i++) | 
|  | 160       { | 
|  | 161         entry_type& entry = oldBuckets[i]; | 
|  | 162         if (!entry.first.is_invalid() && !entry.first.is_deleted()) | 
|  | 163         { | 
|  | 164           *find_bucket(entry.first) = entry; | 
|  | 165           mEntryCount++; | 
|  | 166         } | 
|  | 167       } | 
|  | 168     } | 
|  | 169 | 
|  | 170     entry_type* assign(entry_type* existing, const entry_type& entry) | 
|  | 171     { | 
|  | 172       if (existing->first.is_invalid()) | 
|  | 173       { | 
|  | 174         if (mEntryCount + 1 >= mBucketCount * LOAD_FACTOR) | 
|  | 175         { | 
|  | 176           resize(mBucketCount << 1); | 
|  | 177           existing = find_bucket(entry.first); | 
|  | 178         } | 
|  | 179         mEntryCount++; | 
|  | 180 #if defined(DEBUG) | 
|  | 181         mInsertCounter++; | 
|  | 182 #endif | 
|  | 183       } | 
|  | 184       *existing = entry; | 
|  | 185       return existing; | 
|  | 186     } | 
|  | 187 | 
|  | 188   public: | 
|  | 189     void insert(const entry_type& entry) | 
|  | 190     { | 
|  | 191       assign(find_bucket(entry.first), entry); | 
|  | 192     } | 
|  | 193 | 
|  | 194     bool erase(const String& key) | 
|  | 195     { | 
|  | 196       entry_type* entry = find_bucket(key); | 
|  | 197       if (entry->first.is_invalid()) | 
|  | 198         return false; | 
|  | 199 | 
|  | 200       entry->first.erase(); | 
|  | 201       return true; | 
|  | 202     } | 
|  | 203 | 
|  | 204     const_reference find(const String& key) const | 
|  | 205     { | 
|  | 206       return const_reference(find_bucket(key)); | 
|  | 207     } | 
|  | 208 | 
|  | 209     const_iterator begin() const | 
|  | 210     { | 
|  | 211       return const_iterator(&mBuckets[0], &mBuckets[mBucketCount]); | 
|  | 212     } | 
|  | 213 | 
|  | 214     const_iterator end() const | 
|  | 215     { | 
|  | 216       return const_iterator(&mBuckets[mBucketCount], &mBuckets[mBucketCount]); | 
|  | 217     } | 
|  | 218   }; | 
|  | 219 | 
|  | 220   struct StringSetEntry | 
|  | 221   { | 
|  | 222     StringSetEntry() {} | 
|  | 223     StringSetEntry(const String& key) | 
|  | 224         : first(key) | 
|  | 225     { | 
|  | 226     } | 
|  | 227 | 
|  | 228     DependentString first; | 
|  | 229   }; | 
|  | 230 | 
|  | 231   template<typename T> | 
|  | 232   struct StringMapEntry | 
|  | 233   { | 
|  | 234     StringMapEntry() {} | 
|  | 235     StringMapEntry(const String& key) | 
|  | 236         : first(key), second() | 
|  | 237     { | 
|  | 238     } | 
|  | 239     StringMapEntry(const String& key, T value) | 
|  | 240         : first(key), second(value) | 
|  | 241     { | 
|  | 242     } | 
|  | 243 | 
|  | 244     DependentString first; | 
|  | 245     T second; | 
|  | 246   }; | 
|  | 247 | 
|  | 248   template<typename T> | 
|  | 249   struct StringMapEntryReference : public HashContainerReference<StringMapEntry<
     T>> | 
|  | 250   { | 
|  | 251     typedef HashContainerReference<StringMapEntry<T>> super; | 
|  | 252     typedef typename super::entry_type entry_type; | 
|  | 253     typedef StringMap<T> map_type; | 
|  | 254 | 
|  | 255     map_type* mMap; | 
|  | 256 | 
|  | 257 #if defined(DEBUG) | 
|  | 258     typename map_type::size_type mInsertCounter; | 
|  | 259     typename map_type::size_type mHash; | 
|  | 260 #endif | 
|  | 261 | 
|  | 262     StringMapEntryReference(map_type* map, const String& key, entry_type* entry) | 
|  | 263         : super(entry), mMap(map) | 
|  | 264     { | 
|  | 265 #if defined(DEBUG) | 
|  | 266       mInsertCounter = mMap->mInsertCounter; | 
|  | 267       mHash = mMap->hash(key); | 
|  | 268 #endif | 
|  | 269     } | 
|  | 270 | 
|  | 271     void assign(const String& key, const T& value) | 
|  | 272     { | 
|  | 273 #if defined(DEBUG) | 
|  | 274       assert(mInsertCounter == mMap->mInsertCounter, | 
|  | 275           u"There should be no insert operations performed between map.find() an
     d assign()"_str); | 
|  | 276       assert(mHash == mMap->hash(key), | 
|  | 277           u"The keys used in map.find() and assign() should be identical"_str); | 
|  | 278 #endif | 
|  | 279 | 
|  | 280       mMap->assign(this->mEntry, entry_type(key, value)); | 
|  | 281     } | 
|  | 282   }; | 
|  | 283 } | 
|  | 284 | 
|  | 285 class StringSet | 
|  | 286   : public StringMap_internal::HashContainer<StringMap_internal::StringSetEntry> | 
|  | 287 { | 
| 58 }; | 288 }; | 
| 59 | 289 | 
| 60 template<class Entry> | 290 template<typename T> | 
| 61 class _HashContainer | 291 class StringMap | 
|  | 292   : public StringMap_internal::HashContainer<StringMap_internal::StringMapEntry<
     T>> | 
| 62 { | 293 { | 
| 63 public: | 294 public: | 
| 64   typedef Entry entry_type; | 295   typedef StringMap_internal::HashContainer<StringMap_internal::StringMapEntry<T
     >> super; | 
| 65   typedef size_t size_type; |  | 
| 66   typedef _HashContainerIterator<Entry> iterator; |  | 
| 67 |  | 
| 68 private: |  | 
| 69   _HashContainer(const _HashContainer& other) {} |  | 
| 70   void operator=(const _HashContainer& other) {} |  | 
| 71 |  | 
| 72 protected: |  | 
| 73   static constexpr size_type MIN_BUCKETS = 1; |  | 
| 74   static constexpr double LOAD_FACTOR = 0.8; |  | 
| 75   entry_type* mBuckets; |  | 
| 76   size_type mBucketCount; |  | 
| 77   size_type mEntryCount; |  | 
| 78 |  | 
| 79   explicit _HashContainer(size_type expectedEntries = 0) |  | 
| 80       : mEntryCount(0) |  | 
| 81   { |  | 
| 82     expectedEntries = ceil(expectedEntries / LOAD_FACTOR); |  | 
| 83     mBucketCount = MIN_BUCKETS; |  | 
| 84     while (mBucketCount < expectedEntries) |  | 
| 85       mBucketCount <<= 1; |  | 
| 86 |  | 
| 87     mBuckets = new entry_type[mBucketCount]; |  | 
| 88     // Working around https://github.com/waywardmonkeys/emscripten-trace-collect
     or/issues/2 here |  | 
| 89     annotate_address(reinterpret_cast<size_type*>(mBuckets) - 1, "Hash table buf
     fer"); |  | 
| 90   } |  | 
| 91 |  | 
| 92   ~_HashContainer() |  | 
| 93   { |  | 
| 94     delete[] mBuckets; |  | 
| 95   } |  | 
| 96 |  | 
| 97   static size_type hash(const String& str) |  | 
| 98   { |  | 
| 99     // FNV-1a hash function |  | 
| 100     size_type result = 2166136261; |  | 
| 101     for (String::size_type i = 0; i < str.length(); i++) |  | 
| 102       result = (result ^ str[i]) * 16777619; |  | 
| 103     return result; |  | 
| 104   } |  | 
| 105 |  | 
| 106   entry_type* find_entry(const String& key) const |  | 
| 107   { |  | 
| 108     size_type h = hash(key); |  | 
| 109 |  | 
| 110     // This does quadratic probing, effectively the following formula is used: |  | 
| 111     // pos = (hash + 1/2 i + 1/2 i ^ 2) mod bucketCount |  | 
| 112     for (size_type i = 0; ; ++i) |  | 
| 113     { |  | 
| 114       entry_type* entry = mBuckets + h % mBucketCount; |  | 
| 115       if (entry->first.is_invalid() || entry->first.equals(key)) |  | 
| 116         return entry; |  | 
| 117       h += i; |  | 
| 118     } |  | 
| 119   } |  | 
| 120 |  | 
| 121   void resize(size_type bucketCount) |  | 
| 122   { |  | 
| 123     entry_type* oldBuckets = mBuckets; |  | 
| 124     size_type oldCount = mBucketCount; |  | 
| 125 |  | 
| 126     mEntryCount = 0; |  | 
| 127     mBucketCount = bucketCount; |  | 
| 128     mBuckets = new entry_type[mBucketCount]; |  | 
| 129     // Working around https://github.com/waywardmonkeys/emscripten-trace-collect
     or/issues/2 here |  | 
| 130     annotate_address(reinterpret_cast<size_type*>(mBuckets) - 1, "Hash table buf
     fer"); |  | 
| 131 |  | 
| 132     for (size_type i = 0; i < oldCount; i++) |  | 
| 133     { |  | 
| 134       entry_type& entry = oldBuckets[i]; |  | 
| 135       if (!entry.first.is_invalid()) |  | 
| 136       { |  | 
| 137         *find_entry(entry.first) = entry; |  | 
| 138         mEntryCount++; |  | 
| 139       } |  | 
| 140     } |  | 
| 141 |  | 
| 142     delete[] oldBuckets; |  | 
| 143   } |  | 
| 144 |  | 
| 145   entry_type* assign(entry_type* existing, const entry_type& entry) |  | 
| 146   { |  | 
| 147     bool added = existing->first.is_invalid(); |  | 
| 148     *existing = entry; |  | 
| 149     if (added) |  | 
| 150     { |  | 
| 151       mEntryCount++; |  | 
| 152       if (mEntryCount >= mBucketCount * LOAD_FACTOR) |  | 
| 153       { |  | 
| 154         resize(mBucketCount << 1); |  | 
| 155         existing = find_entry(entry.first); |  | 
| 156       } |  | 
| 157     } |  | 
| 158     return existing; |  | 
| 159   } |  | 
| 160 |  | 
| 161 public: |  | 
| 162   void insert(const entry_type& entry) |  | 
| 163   { |  | 
| 164     assign(find_entry(entry.first), entry); |  | 
| 165   } |  | 
| 166 |  | 
| 167   iterator find(const String& key) const |  | 
| 168   { |  | 
| 169     entry_type* entry = find_entry(key); |  | 
| 170     if (entry->first.is_invalid()) |  | 
| 171       return end(); |  | 
| 172     else |  | 
| 173       return iterator(entry, mBuckets + mBucketCount); |  | 
| 174   } |  | 
| 175 |  | 
| 176   iterator begin() const |  | 
| 177   { |  | 
| 178     return iterator(mBuckets, mBuckets + mBucketCount); |  | 
| 179   } |  | 
| 180 |  | 
| 181   iterator end() const |  | 
| 182   { |  | 
| 183     return iterator(mBuckets + mBucketCount, mBuckets + mBucketCount); |  | 
| 184   } |  | 
| 185 }; |  | 
| 186 |  | 
| 187 struct _StringSetEntry |  | 
| 188 { |  | 
| 189   _StringSetEntry() {} |  | 
| 190   _StringSetEntry(const String& key) |  | 
| 191       : first(key) |  | 
| 192   { |  | 
| 193   } |  | 
| 194 |  | 
| 195   String first; |  | 
| 196 }; |  | 
| 197 |  | 
| 198 class StringSet : public _HashContainer<_StringSetEntry> |  | 
| 199 { |  | 
| 200 }; |  | 
| 201 |  | 
| 202 template<class T> |  | 
| 203 struct _StringMapEntry |  | 
| 204 { |  | 
| 205   _StringMapEntry() {} |  | 
| 206   _StringMapEntry(const String& key) |  | 
| 207       : first(key) |  | 
| 208   { |  | 
| 209   } |  | 
| 210   _StringMapEntry(const String& key, T value) |  | 
| 211       : first(key), second(value) |  | 
| 212   { |  | 
| 213   } |  | 
| 214 |  | 
| 215   String first; |  | 
| 216   T second; |  | 
| 217 }; |  | 
| 218 |  | 
| 219 template<class T> |  | 
| 220 class StringMap : public _HashContainer<_StringMapEntry<T>> |  | 
| 221 { |  | 
| 222 public: |  | 
| 223   typedef _HashContainer<_StringMapEntry<T>> super; |  | 
| 224   typedef typename super::size_type size_type; | 296   typedef typename super::size_type size_type; | 
| 225   typedef typename super::entry_type entry_type; | 297   typedef typename super::entry_type entry_type; | 
|  | 298   typedef typename super::const_reference const_reference; | 
|  | 299   typedef StringMap_internal::StringMapEntryReference<T> reference; | 
|  | 300   friend class StringMap_internal::StringMapEntryReference<T>; | 
| 226 | 301 | 
| 227   explicit StringMap(size_type expectedEntries = 0) | 302   explicit StringMap(size_type expectedEntries = 0) | 
| 228       : super(expectedEntries) | 303       : super(expectedEntries) | 
| 229   { | 304   { | 
| 230   } | 305   } | 
| 231 | 306 | 
| 232   StringMap(std::initializer_list<entry_type> list) | 307   StringMap(std::initializer_list<entry_type> list) | 
| 233       : super(list.size()) | 308       : super(list.size()) | 
| 234   { | 309   { | 
| 235     for (auto it = list.begin(); it != list.end(); ++it) | 310     for (auto it = list.begin(); it != list.end(); ++it) | 
| 236       super::insert(*it); | 311       super::insert(*it); | 
| 237   } | 312   } | 
| 238 | 313 | 
| 239   ~StringMap() | 314   ~StringMap() | 
| 240   { | 315   { | 
| 241   } | 316   } | 
| 242 | 317 | 
| 243   T& operator[](const String& key) | 318   T& operator[](const String& key) | 
| 244   { | 319   { | 
| 245     entry_type* entry = super::find_entry(key); | 320     entry_type* entry = super::find_bucket(key); | 
| 246     if (entry->first.is_invalid()) | 321     if (entry->first.is_invalid()) | 
| 247       entry = super::assign(entry, key); | 322       entry = super::assign(entry, key); | 
| 248     return entry->second; | 323     return entry->second; | 
| 249   } | 324   } | 
|  | 325 | 
|  | 326   const_reference find(const String& key) const | 
|  | 327   { | 
|  | 328     return super::find(key); | 
|  | 329   } | 
|  | 330 | 
|  | 331   reference find(const String& key) | 
|  | 332   { | 
|  | 333     return reference(this, key, super::find_bucket(key)); | 
|  | 334   } | 
| 250 }; | 335 }; | 
| 251 |  | 
| 252 #endif |  | 
| LEFT | RIGHT | 
|---|