| LEFT | RIGHT | 
|    1 #ifndef ADBLOCK_PLUS_STRING_MAP_H |    1 #pragma once | 
|    2 #define ADBLOCK_PLUS_STRING_MAP_H |  | 
|    3  |    2  | 
|    4 #include <cstddef> |    3 #include <cstddef> | 
|    5 #include <cmath> |    4 #include <cmath> | 
|    6 #include <initializer_list> |    5 #include <initializer_list> | 
 |    6 #include <memory> | 
|    7  |    7  | 
|    8 #include "String.h" |    8 #include "String.h" | 
|    9 #include "debug.h" |    9 #include "debug.h" | 
|   10  |   10  | 
|   11 template<typename Entry> |   11 template<typename T> | 
|   12 struct _HashContainerIterator |   12 class StringMap; | 
 |   13  | 
 |   14 namespace StringMap_internal | 
|   13 { |   15 { | 
|   14   typedef Entry entry_type; |   16   template<typename Entry> | 
|   15   typedef _HashContainerIterator<Entry> iterator; |   17   struct HashContainerIterator | 
|   16  |   18   { | 
|   17   const entry_type* mPos; |   19     typedef Entry entry_type; | 
|   18   const entry_type* mEnd; |   20     typedef HashContainerIterator<Entry> iterator; | 
|   19  |   21  | 
|   20   explicit _HashContainerIterator(const entry_type* start, const entry_type* end
     ) |   22     const entry_type* mPos; | 
|   21       : mPos(start), mEnd(end) |   23     const entry_type* mEnd; | 
|   22   { |   24  | 
|   23     if (mPos != mEnd && mPos->first.is_invalid()) |   25     explicit HashContainerIterator(const entry_type* start, const entry_type* en
     d) | 
|   24       ++(*this); |   26         : mPos(start), mEnd(end) | 
|   25   } |   27     { | 
|   26  |   28       if (mPos != mEnd && mPos->first.is_invalid()) | 
|   27   _HashContainerIterator() |   29         ++(*this); | 
|   28   { |   30     } | 
|   29   } |   31  | 
|   30  |   32     const entry_type& operator*() const | 
|   31   const entry_type& operator*() const |   33     { | 
|   32   { |   34       return *mPos; | 
|   33     return *mPos; |   35     } | 
|   34   } |   36  | 
|   35  |   37     const entry_type* operator->() const | 
|   36   const entry_type* operator->() const |   38     { | 
|   37   { |   39       return mPos; | 
|   38     return mPos; |   40     } | 
|   39   } |   41  | 
|   40  |   42     iterator& operator++() | 
|   41   iterator& operator++() |   43     { | 
|   42   { |   44       do { | 
|   43     do { |   45         ++mPos; | 
|   44       ++mPos; |   46       } while(mPos != mEnd && mPos->first.is_invalid()); | 
|   45     } while(mPos != mEnd && mPos->first.is_invalid()); |   47       return *this; | 
|   46     return *this; |   48     } | 
|   47   } |   49  | 
|   48  |   50     bool operator==(const iterator& it) const | 
|   49   bool operator==(const iterator& it) const |   51     { | 
|   50   { |   52       return mPos == it.mPos; | 
|   51     return mPos == it.mPos; |   53     } | 
|   52   } |   54  | 
|   53  |   55     bool operator!=(const iterator& it) const | 
|   54   bool operator!=(const iterator& it) const |   56     { | 
|   55   { |   57       return mPos != it.mPos; | 
|   56     return mPos != it.mPos; |   58     } | 
|   57   } |   59   }; | 
 |   60  | 
 |   61   template<typename Entry> | 
 |   62   struct HashContainerReference | 
 |   63   { | 
 |   64     typedef Entry entry_type; | 
 |   65  | 
 |   66     entry_type* mEntry; | 
 |   67  | 
 |   68     explicit HashContainerReference(entry_type* entry) | 
 |   69         : mEntry(entry) | 
 |   70     { | 
 |   71     } | 
 |   72  | 
 |   73     const entry_type* operator->() const | 
 |   74     { | 
 |   75       return mEntry; | 
 |   76     } | 
 |   77  | 
 |   78     operator bool() const | 
 |   79     { | 
 |   80       return !mEntry->first.is_invalid(); | 
 |   81     } | 
 |   82   }; | 
 |   83  | 
 |   84   template<typename Entry> | 
 |   85   class HashContainer | 
 |   86   { | 
 |   87   public: | 
 |   88     typedef Entry entry_type; | 
 |   89     typedef size_t size_type; | 
 |   90     typedef HashContainerIterator<Entry> const_iterator; | 
 |   91     typedef HashContainerReference<const Entry> const_reference; | 
 |   92  | 
 |   93   private: | 
 |   94     explicit HashContainer(const HashContainer& other); | 
 |   95     void operator=(const HashContainer& other); | 
 |   96  | 
 |   97   protected: | 
 |   98     static constexpr size_type MIN_BUCKETS = 1; | 
 |   99     static constexpr double LOAD_FACTOR = 0.8; | 
 |  100     std::unique_ptr<entry_type[]> mBuckets; | 
 |  101     size_type mBucketCount; | 
 |  102     size_type mEntryCount; | 
 |  103  | 
 |  104 #if defined(DEBUG) | 
 |  105     size_type mInsertCounter; | 
 |  106 #endif | 
 |  107  | 
 |  108     explicit HashContainer(size_type expectedEntries = 0) | 
 |  109         : mEntryCount(0) | 
 |  110     { | 
 |  111       expectedEntries = ceil(expectedEntries / LOAD_FACTOR); | 
 |  112       mBucketCount = MIN_BUCKETS; | 
 |  113       while (mBucketCount < expectedEntries) | 
 |  114         mBucketCount <<= 1; | 
 |  115  | 
 |  116       mBuckets.reset(new entry_type[mBucketCount]); | 
 |  117       // Working around https://github.com/waywardmonkeys/emscripten-trace-colle
     ctor/issues/2 here | 
 |  118       annotate_address(reinterpret_cast<size_type*>(mBuckets.get()) - 1, "Hash t
     able buffer"); | 
 |  119     } | 
 |  120  | 
 |  121     static size_type hash(const String& str) | 
 |  122     { | 
 |  123       // FNV-1a hash function | 
 |  124       size_type result = 2166136261; | 
 |  125       for (String::size_type i = 0; i < str.length(); i++) | 
 |  126         result = (result ^ str[i]) * 16777619; | 
 |  127       return result; | 
 |  128     } | 
 |  129  | 
 |  130     entry_type* find_bucket(const String& key) const | 
 |  131     { | 
 |  132       size_type h = hash(key); | 
 |  133  | 
 |  134       // This does quadratic probing, effectively the following formula is used: | 
 |  135       // pos = (hash + 1/2 i + 1/2 i ^ 2) mod bucketCount | 
 |  136       for (size_type i = 0; ; ++i) | 
 |  137       { | 
 |  138         // mBucketCount is 2^n so (h & mBucketCount - 1) is equivalent to | 
 |  139         // h % mBucketCount but significantly faster. | 
 |  140         entry_type* entry = &mBuckets[h & (mBucketCount - 1)]; | 
 |  141         if (entry->first.is_invalid() || entry->first.equals(key)) | 
 |  142           return entry; | 
 |  143         h += i; | 
 |  144       } | 
 |  145     } | 
 |  146  | 
 |  147     void resize(size_type bucketCount) | 
 |  148     { | 
 |  149       std::unique_ptr<entry_type[]> oldBuckets(std::move(mBuckets)); | 
 |  150       size_type oldCount = mBucketCount; | 
 |  151  | 
 |  152       mEntryCount = 0; | 
 |  153       mBucketCount = bucketCount; | 
 |  154       mBuckets.reset(new entry_type[mBucketCount]); | 
 |  155       // Working around https://github.com/waywardmonkeys/emscripten-trace-colle
     ctor/issues/2 here | 
 |  156       annotate_address(reinterpret_cast<size_type*>(mBuckets.get()) - 1, "Hash t
     able buffer"); | 
 |  157  | 
 |  158       // Copy old entries into the new buffer | 
 |  159       for (size_type i = 0; i < oldCount; i++) | 
 |  160       { | 
 |  161         entry_type& entry = oldBuckets[i]; | 
 |  162         if (!entry.first.is_invalid() && !entry.first.is_deleted()) | 
 |  163         { | 
 |  164           *find_bucket(entry.first) = entry; | 
 |  165           mEntryCount++; | 
 |  166         } | 
 |  167       } | 
 |  168     } | 
 |  169  | 
 |  170     entry_type* assign(entry_type* existing, const entry_type& entry) | 
 |  171     { | 
 |  172       if (existing->first.is_invalid()) | 
 |  173       { | 
 |  174         if (mEntryCount + 1 >= mBucketCount * LOAD_FACTOR) | 
 |  175         { | 
 |  176           resize(mBucketCount << 1); | 
 |  177           existing = find_bucket(entry.first); | 
 |  178         } | 
 |  179         mEntryCount++; | 
 |  180 #if defined(DEBUG) | 
 |  181         mInsertCounter++; | 
 |  182 #endif | 
 |  183       } | 
 |  184       *existing = entry; | 
 |  185       return existing; | 
 |  186     } | 
 |  187  | 
 |  188   public: | 
 |  189     void insert(const entry_type& entry) | 
 |  190     { | 
 |  191       assign(find_bucket(entry.first), entry); | 
 |  192     } | 
 |  193  | 
 |  194     bool erase(const String& key) | 
 |  195     { | 
 |  196       entry_type* entry = find_bucket(key); | 
 |  197       if (entry->first.is_invalid()) | 
 |  198         return false; | 
 |  199  | 
 |  200       entry->first.erase(); | 
 |  201       return true; | 
 |  202     } | 
 |  203  | 
 |  204     const_reference find(const String& key) const | 
 |  205     { | 
 |  206       return const_reference(find_bucket(key)); | 
 |  207     } | 
 |  208  | 
 |  209     const_iterator begin() const | 
 |  210     { | 
 |  211       return const_iterator(&mBuckets[0], &mBuckets[mBucketCount]); | 
 |  212     } | 
 |  213  | 
 |  214     const_iterator end() const | 
 |  215     { | 
 |  216       return const_iterator(&mBuckets[mBucketCount], &mBuckets[mBucketCount]); | 
 |  217     } | 
 |  218   }; | 
 |  219  | 
 |  220   struct StringSetEntry | 
 |  221   { | 
 |  222     StringSetEntry() {} | 
 |  223     StringSetEntry(const String& key) | 
 |  224         : first(key) | 
 |  225     { | 
 |  226     } | 
 |  227  | 
 |  228     DependentString first; | 
 |  229   }; | 
 |  230  | 
 |  231   template<typename T> | 
 |  232   struct StringMapEntry | 
 |  233   { | 
 |  234     StringMapEntry() {} | 
 |  235     StringMapEntry(const String& key) | 
 |  236         : first(key), second() | 
 |  237     { | 
 |  238     } | 
 |  239     StringMapEntry(const String& key, T value) | 
 |  240         : first(key), second(value) | 
 |  241     { | 
 |  242     } | 
 |  243  | 
 |  244     DependentString first; | 
 |  245     T second; | 
 |  246   }; | 
 |  247  | 
 |  248   template<typename T> | 
 |  249   struct StringMapEntryReference : public HashContainerReference<StringMapEntry<
     T>> | 
 |  250   { | 
 |  251     typedef HashContainerReference<StringMapEntry<T>> super; | 
 |  252     typedef typename super::entry_type entry_type; | 
 |  253     typedef StringMap<T> map_type; | 
 |  254  | 
 |  255     map_type* mMap; | 
 |  256  | 
 |  257 #if defined(DEBUG) | 
 |  258     typename map_type::size_type mInsertCounter; | 
 |  259     typename map_type::size_type mHash; | 
 |  260 #endif | 
 |  261  | 
 |  262     StringMapEntryReference(map_type* map, const String& key, entry_type* entry) | 
 |  263         : super(entry), mMap(map) | 
 |  264     { | 
 |  265 #if defined(DEBUG) | 
 |  266       mInsertCounter = mMap->mInsertCounter; | 
 |  267       mHash = mMap->hash(key); | 
 |  268 #endif | 
 |  269     } | 
 |  270  | 
 |  271     void assign(const String& key, const T& value) | 
 |  272     { | 
 |  273 #if defined(DEBUG) | 
 |  274       assert(mInsertCounter == mMap->mInsertCounter, | 
 |  275           u"There should be no insert operations performed between map.find() an
     d assign()"_str); | 
 |  276       assert(mHash == mMap->hash(key), | 
 |  277           u"The keys used in map.find() and assign() should be identical"_str); | 
 |  278 #endif | 
 |  279  | 
 |  280       mMap->assign(this->mEntry, entry_type(key, value)); | 
 |  281     } | 
 |  282   }; | 
 |  283 } | 
 |  284  | 
 |  285 class StringSet | 
 |  286   : public StringMap_internal::HashContainer<StringMap_internal::StringSetEntry> | 
 |  287 { | 
|   58 }; |  288 }; | 
|   59  |  289  | 
|   60 template<typename Entry> |  290 template<typename T> | 
|   61 class _HashContainer |  291 class StringMap | 
 |  292   : public StringMap_internal::HashContainer<StringMap_internal::StringMapEntry<
     T>> | 
|   62 { |  293 { | 
|   63 public: |  294 public: | 
|   64   typedef Entry entry_type; |  295   typedef StringMap_internal::HashContainer<StringMap_internal::StringMapEntry<T
     >> super; | 
|   65   typedef size_t size_type; |  | 
|   66   typedef _HashContainerIterator<Entry> iterator; |  | 
|   67  |  | 
|   68 private: |  | 
|   69   _HashContainer(const _HashContainer& other) {} |  | 
|   70   void operator=(const _HashContainer& other) {} |  | 
|   71  |  | 
|   72 protected: |  | 
|   73   static constexpr size_type MIN_BUCKETS = 1; |  | 
|   74   static constexpr double LOAD_FACTOR = 0.8; |  | 
|   75   entry_type* mBuckets; |  | 
|   76   size_type mBucketCount; |  | 
|   77   size_type mEntryCount; |  | 
|   78  |  | 
|   79   explicit _HashContainer(size_type expectedEntries = 0) |  | 
|   80       : mEntryCount(0) |  | 
|   81   { |  | 
|   82     expectedEntries = ceil(expectedEntries / LOAD_FACTOR); |  | 
|   83     mBucketCount = MIN_BUCKETS; |  | 
|   84     while (mBucketCount < expectedEntries) |  | 
|   85       mBucketCount <<= 1; |  | 
|   86  |  | 
|   87     mBuckets = new entry_type[mBucketCount]; |  | 
|   88     // Working around https://github.com/waywardmonkeys/emscripten-trace-collect
     or/issues/2 here |  | 
|   89     annotate_address(reinterpret_cast<size_type*>(mBuckets) - 1, "Hash table buf
     fer"); |  | 
|   90   } |  | 
|   91  |  | 
|   92   ~_HashContainer() |  | 
|   93   { |  | 
|   94     delete[] mBuckets; |  | 
|   95   } |  | 
|   96  |  | 
|   97   static size_type hash(const String& str) |  | 
|   98   { |  | 
|   99     // FNV-1a hash function |  | 
|  100     size_type result = 2166136261; |  | 
|  101     for (String::size_type i = 0; i < str.length(); i++) |  | 
|  102       result = (result ^ str[i]) * 16777619; |  | 
|  103     return result; |  | 
|  104   } |  | 
|  105  |  | 
|  106   entry_type* find_entry(const String& key) const |  | 
|  107   { |  | 
|  108     size_type h = hash(key); |  | 
|  109  |  | 
|  110     // This does quadratic probing, effectively the following formula is used: |  | 
|  111     // pos = (hash + 1/2 i + 1/2 i ^ 2) mod bucketCount |  | 
|  112     for (size_type i = 0; ; ++i) |  | 
|  113     { |  | 
|  114       entry_type* entry = mBuckets + h % mBucketCount; |  | 
|  115       if (entry->first.is_invalid() || entry->first.equals(key)) |  | 
|  116         return entry; |  | 
|  117       h += i; |  | 
|  118     } |  | 
|  119   } |  | 
|  120  |  | 
|  121   void resize(size_type bucketCount) |  | 
|  122   { |  | 
|  123     entry_type* oldBuckets = mBuckets; |  | 
|  124     size_type oldCount = mBucketCount; |  | 
|  125  |  | 
|  126     mEntryCount = 0; |  | 
|  127     mBucketCount = bucketCount; |  | 
|  128     mBuckets = new entry_type[mBucketCount]; |  | 
|  129     // Working around https://github.com/waywardmonkeys/emscripten-trace-collect
     or/issues/2 here |  | 
|  130     annotate_address(reinterpret_cast<size_type*>(mBuckets) - 1, "Hash table buf
     fer"); |  | 
|  131  |  | 
|  132     for (size_type i = 0; i < oldCount; i++) |  | 
|  133     { |  | 
|  134       entry_type& entry = oldBuckets[i]; |  | 
|  135       if (!entry.first.is_invalid()) |  | 
|  136       { |  | 
|  137         *find_entry(entry.first) = entry; |  | 
|  138         mEntryCount++; |  | 
|  139       } |  | 
|  140     } |  | 
|  141  |  | 
|  142     delete[] oldBuckets; |  | 
|  143   } |  | 
|  144  |  | 
|  145   entry_type* assign(entry_type* existing, const entry_type& entry) |  | 
|  146   { |  | 
|  147     bool added = existing->first.is_invalid(); |  | 
|  148     *existing = entry; |  | 
|  149     if (added) |  | 
|  150     { |  | 
|  151       mEntryCount++; |  | 
|  152       if (mEntryCount >= mBucketCount * LOAD_FACTOR) |  | 
|  153       { |  | 
|  154         resize(mBucketCount << 1); |  | 
|  155         existing = find_entry(entry.first); |  | 
|  156       } |  | 
|  157     } |  | 
|  158     return existing; |  | 
|  159   } |  | 
|  160  |  | 
|  161 public: |  | 
|  162   void insert(const entry_type& entry) |  | 
|  163   { |  | 
|  164     assign(find_entry(entry.first), entry); |  | 
|  165   } |  | 
|  166  |  | 
|  167   iterator find(const String& key) const |  | 
|  168   { |  | 
|  169     entry_type* entry = find_entry(key); |  | 
|  170     if (entry->first.is_invalid()) |  | 
|  171       return end(); |  | 
|  172     else |  | 
|  173       return iterator(entry, mBuckets + mBucketCount); |  | 
|  174   } |  | 
|  175  |  | 
|  176   iterator begin() const |  | 
|  177   { |  | 
|  178     return iterator(mBuckets, mBuckets + mBucketCount); |  | 
|  179   } |  | 
|  180  |  | 
|  181   iterator end() const |  | 
|  182   { |  | 
|  183     return iterator(mBuckets + mBucketCount, mBuckets + mBucketCount); |  | 
|  184   } |  | 
|  185 }; |  | 
|  186  |  | 
|  187 struct _StringSetEntry |  | 
|  188 { |  | 
|  189   _StringSetEntry() {} |  | 
|  190   _StringSetEntry(const String& key) |  | 
|  191       : first(key) |  | 
|  192   { |  | 
|  193   } |  | 
|  194  |  | 
|  195   String first; |  | 
|  196 }; |  | 
|  197  |  | 
|  198 class StringSet : public _HashContainer<_StringSetEntry> |  | 
|  199 { |  | 
|  200 }; |  | 
|  201  |  | 
|  202 template<typename T> |  | 
|  203 struct _StringMapEntry |  | 
|  204 { |  | 
|  205   _StringMapEntry() {} |  | 
|  206   _StringMapEntry(const String& key) |  | 
|  207       : first(key) |  | 
|  208   { |  | 
|  209   } |  | 
|  210   _StringMapEntry(const String& key, T value) |  | 
|  211       : first(key), second(value) |  | 
|  212   { |  | 
|  213   } |  | 
|  214  |  | 
|  215   String first; |  | 
|  216   T second; |  | 
|  217 }; |  | 
|  218  |  | 
|  219 template<typename T> |  | 
|  220 class StringMap : public _HashContainer<_StringMapEntry<T>> |  | 
|  221 { |  | 
|  222 public: |  | 
|  223   typedef _HashContainer<_StringMapEntry<T>> super; |  | 
|  224   typedef typename super::size_type size_type; |  296   typedef typename super::size_type size_type; | 
|  225   typedef typename super::entry_type entry_type; |  297   typedef typename super::entry_type entry_type; | 
 |  298   typedef typename super::const_reference const_reference; | 
 |  299   typedef StringMap_internal::StringMapEntryReference<T> reference; | 
 |  300   friend class StringMap_internal::StringMapEntryReference<T>; | 
|  226  |  301  | 
|  227   explicit StringMap(size_type expectedEntries = 0) |  302   explicit StringMap(size_type expectedEntries = 0) | 
|  228       : super(expectedEntries) |  303       : super(expectedEntries) | 
|  229   { |  304   { | 
|  230   } |  305   } | 
|  231  |  306  | 
|  232   StringMap(std::initializer_list<entry_type> list) |  307   StringMap(std::initializer_list<entry_type> list) | 
|  233       : super(list.size()) |  308       : super(list.size()) | 
|  234   { |  309   { | 
|  235     for (auto it = list.begin(); it != list.end(); ++it) |  310     for (auto it = list.begin(); it != list.end(); ++it) | 
|  236       super::insert(*it); |  311       super::insert(*it); | 
|  237   } |  312   } | 
|  238  |  313  | 
|  239   ~StringMap() |  314   ~StringMap() | 
|  240   { |  315   { | 
|  241   } |  316   } | 
|  242  |  317  | 
|  243   T& operator[](const String& key) |  318   T& operator[](const String& key) | 
|  244   { |  319   { | 
|  245     entry_type* entry = super::find_entry(key); |  320     entry_type* entry = super::find_bucket(key); | 
|  246     if (entry->first.is_invalid()) |  321     if (entry->first.is_invalid()) | 
|  247       entry = super::assign(entry, key); |  322       entry = super::assign(entry, key); | 
|  248     return entry->second; |  323     return entry->second; | 
|  249   } |  324   } | 
 |  325  | 
 |  326   const_reference find(const String& key) const | 
 |  327   { | 
 |  328     return super::find(key); | 
 |  329   } | 
 |  330  | 
 |  331   reference find(const String& key) | 
 |  332   { | 
 |  333     return reference(this, key, super::find_bucket(key)); | 
 |  334   } | 
|  250 }; |  335 }; | 
|  251  |  | 
|  252 #endif |  | 
| LEFT | RIGHT |