| LEFT | RIGHT | 
|    1 #ifndef ADBLOCK_PLUS_STRING_H |    1 #pragma once | 
|    2 #define ADBLOCK_PLUS_STRING_H |  | 
|    3  |    2  | 
|    4 #include <cstddef> |    3 #include <cstddef> | 
|    5 #include <cstring> |    4 #include <cstring> | 
|    6 #include <algorithm> |    5 #include <algorithm> | 
|    7  |    6  | 
 |    7 #include <emscripten.h> | 
 |    8  | 
|    8 #include "debug.h" |    9 #include "debug.h" | 
|    9  |   10  | 
 |   11 inline void String_assert_readonly(bool readOnly); | 
 |   12  | 
|   10 class String |   13 class String | 
|   11 { |   14 { | 
 |   15   friend class DependentString; | 
 |   16   friend class OwnedString; | 
 |   17  | 
|   12 public: |   18 public: | 
|   13   typedef char16_t value_type; |   19   typedef char16_t value_type; | 
|   14   typedef size_t size_type; |   20   typedef size_t size_type; | 
|   15  |   21  | 
|   16   // Type flags, stored in the top 2 bits of the mLen member |   22   // Type flags, stored in the top 2 bits of the mLen member | 
|   17   static constexpr size_type OWNBUFFER = 0xC0000000; |   23   static constexpr size_type INVALID = 0xC0000000; | 
|   18   static constexpr size_type INVALID = 0x80000000; |   24   static constexpr size_type DELETED = 0x80000000; | 
|   19   static constexpr size_type DELETED = 0x40000000; |   25   static constexpr size_type READ_ONLY = 0x40000000; | 
|   20   static constexpr size_type DEPENDENT = 0x00000000; |   26   static constexpr size_type READ_WRITE = 0x00000000; | 
 |   27  | 
|   21   static constexpr size_type FLAGS_MASK = 0xC0000000; |   28   static constexpr size_type FLAGS_MASK = 0xC0000000; | 
|   22   static constexpr size_type LENGTH_MASK = 0x3FFFFFFF; |   29   static constexpr size_type LENGTH_MASK = 0x3FFFFFFF; | 
|   23  |   30  | 
|   24   static constexpr size_type npos = -1; |   31   static constexpr size_type npos = -1; | 
|   25  |   32  | 
|   26 private: |   33 protected: | 
|   27   value_type* mBuf; |   34   value_type* mBuf; | 
|   28   size_type mLen; |   35   size_type mLen; | 
|   29  |   36  | 
|   30   value_type* allocate(size_type len) |   37   explicit String(value_type* buf, size_type len, size_type flags) | 
|   31   { |   38       : mBuf(buf), mLen((len & LENGTH_MASK) | flags) | 
|   32     if (len) |   39   { | 
|   33       return new value_type[len]; |   40   } | 
|   34     else |   41  | 
|   35       return nullptr; |   42   ~String() | 
|   36   } |   43   { | 
|   37  |   44   } | 
|   38   void resize(size_type newLength, bool copy) |   45  | 
|   39   { |   46   void reset(value_type* buf, size_type len, size_type flags) | 
|   40     bool owningOldBuffer = owns_buffer(); |   47   { | 
|   41     size_type oldLength = length(); |   48     mBuf = buf; | 
|   42     value_type* oldBuffer = mBuf; |   49     mLen = (len & LENGTH_MASK) | flags; | 
|   43  |  | 
|   44     newLength &= LENGTH_MASK; |  | 
|   45     mBuf = allocate(newLength); |  | 
|   46     annotate_address(mBuf, "String"); |  | 
|   47     mLen = OWNBUFFER | newLength; |  | 
|   48  |  | 
|   49     if (copy && oldLength) |  | 
|   50       memcpy(mBuf, oldBuffer, sizeof(value_type) * std::min(oldLength, newLength
     )); |  | 
|   51     if (owningOldBuffer) |  | 
|   52       delete[] oldBuffer; |  | 
|   53   } |   50   } | 
|   54  |   51  | 
|   55 public: |   52 public: | 
|   56   String() : mBuf(nullptr), mLen(INVALID) {} |  | 
|   57  |  | 
|   58   String(size_type len) |  | 
|   59       : mBuf(allocate(len & LENGTH_MASK)), mLen(OWNBUFFER | (len & LENGTH_MASK)) |  | 
|   60   { |  | 
|   61     annotate_address(mBuf, "String"); |  | 
|   62   } |  | 
|   63  |  | 
|   64   String(value_type* buf, size_type len) |  | 
|   65       : mBuf(buf), mLen(DEPENDENT | (buf ? len & LENGTH_MASK: 0)) |  | 
|   66   { |  | 
|   67   } |  | 
|   68  |  | 
|   69   String(const String& str, size_type pos = 0, size_type len = npos) |  | 
|   70       : mBuf(str.mBuf + std::min(pos, str.length())), |  | 
|   71         mLen(DEPENDENT | std::min(len, str.length() - (mBuf - str.mBuf))) |  | 
|   72   { |  | 
|   73   } |  | 
|   74  |  | 
|   75   String(String&& str) |  | 
|   76   { |  | 
|   77     *this = std::move(str); |  | 
|   78   } |  | 
|   79  |  | 
|   80   String(const char* source, size_type len) |  | 
|   81       : String(len) |  | 
|   82   { |  | 
|   83     for (size_type i = 0; i < len; i++) |  | 
|   84       mBuf[i] = source[i]; |  | 
|   85   } |  | 
|   86  |  | 
|   87   void operator=(const String& str) |  | 
|   88   { |  | 
|   89     mBuf = str.mBuf; |  | 
|   90     mLen = DEPENDENT | str.length(); |  | 
|   91   } |  | 
|   92  |  | 
|   93   void operator=(String& str) |  | 
|   94   { |  | 
|   95     reset(str); |  | 
|   96   } |  | 
|   97  |  | 
|   98   String& operator=(String&& str) |  | 
|   99   { |  | 
|  100     mBuf = str.mBuf; |  | 
|  101     mLen = str.mLen; |  | 
|  102     str.mBuf = nullptr; |  | 
|  103     str.mLen = INVALID; |  | 
|  104     ensure_own_buffer(); |  | 
|  105     return *this; |  | 
|  106   } |  | 
|  107  |  | 
|  108   ~String() |  | 
|  109   { |  | 
|  110     if (owns_buffer()) |  | 
|  111       delete[] mBuf; |  | 
|  112   } |  | 
|  113  |  | 
|  114   void reset(value_type* buf, size_type len) |  | 
|  115   { |  | 
|  116     mBuf = buf; |  | 
|  117     mLen = (DEPENDENT | (buf ? len & LENGTH_MASK: 0)); |  | 
|  118   } |  | 
|  119  |  | 
|  120   void reset(const String& str, size_type pos = 0, size_type len = npos) |  | 
|  121   { |  | 
|  122     pos = std::min(pos, str.length()); |  | 
|  123     len = std::min(len, str.length() - pos); |  | 
|  124     reset(str.mBuf + pos, len); |  | 
|  125   } |  | 
|  126  |  | 
|  127   size_type length() const |   53   size_type length() const | 
|  128   { |   54   { | 
|  129     return mLen & LENGTH_MASK; |   55     return mLen & LENGTH_MASK; | 
|  130   } |   56   } | 
|  131  |   57  | 
|  132   bool empty() const |   58   bool empty() const | 
|  133   { |   59   { | 
|  134     return !(mLen & LENGTH_MASK); |   60     return !(mLen & LENGTH_MASK); | 
|  135   } |   61   } | 
|  136  |   62  | 
|  137   const value_type* data() const |   63   const value_type* data() const | 
|  138   { |   64   { | 
|  139     return mBuf; |   65     return mBuf; | 
|  140   } |   66   } | 
|  141  |   67  | 
|  142   value_type* data() |   68   value_type* data() | 
|  143   { |   69   { | 
 |   70     String_assert_readonly(is_readOnly()); | 
|  144     return mBuf; |   71     return mBuf; | 
|  145   } |   72   } | 
|  146  |   73  | 
|  147   const value_type& operator[](size_type pos) const |   74   const value_type& operator[](size_type pos) const | 
|  148   { |   75   { | 
|  149     return mBuf[pos]; |   76     return mBuf[pos]; | 
|  150   } |   77   } | 
|  151  |   78  | 
|  152   value_type& operator[](size_type pos) |   79   value_type& operator[](size_type pos) | 
|  153   { |   80   { | 
 |   81     String_assert_readonly(is_readOnly()); | 
|  154     return mBuf[pos]; |   82     return mBuf[pos]; | 
|  155   } |   83   } | 
|  156  |   84  | 
 |   85   bool is_readOnly() const | 
 |   86   { | 
 |   87     return (mLen & FLAGS_MASK) != READ_WRITE; | 
 |   88   } | 
 |   89  | 
|  157   bool equals(const String& other) const |   90   bool equals(const String& other) const | 
|  158   { |   91   { | 
|  159     if (mLen != other.mLen) |   92     if (length() != other.length()) | 
|  160       return false; |   93       return false; | 
|  161  |   94  | 
|  162     return memcmp(mBuf, other.mBuf, sizeof(value_type) * length()) == 0; |   95     return std::memcmp(mBuf, other.mBuf, sizeof(value_type) * length()) == 0; | 
|  163   } |   96   } | 
|  164  |   97  | 
|  165   size_type find(value_type c, size_type pos = 0) const |   98   size_type find(value_type c, size_type pos = 0) const | 
|  166   { |   99   { | 
|  167     for (size_type i = pos; i < length(); ++i) |  100     for (size_type i = pos; i < length(); ++i) | 
|  168       if (mBuf[i] == c) |  101       if (mBuf[i] == c) | 
|  169         return i; |  102         return i; | 
|  170     return npos; |  103     return npos; | 
|  171   } |  104   } | 
|  172  |  105  | 
|  173   size_type find(const String& str, size_type pos = 0) const |  106   size_type find(const String& str, size_type pos = 0) const | 
|  174   { |  107   { | 
 |  108     if (pos > LENGTH_MASK || pos + str.length() > length()) | 
 |  109       return npos; | 
 |  110  | 
|  175     if (!str.length()) |  111     if (!str.length()) | 
|  176       return pos; |  112       return pos; | 
|  177  |  113  | 
|  178     if (length() - pos < str.length()) |  114     for (; pos + str.length() <= length(); ++pos) | 
|  179       return npos; |  | 
|  180  |  | 
|  181     for (; pos < length() - str.length(); ++pos) |  | 
|  182     { |  115     { | 
|  183       if (mBuf[pos] == str[0] && |  116       if (mBuf[pos] == str[0] && | 
|  184           memcmp(mBuf + pos, str.mBuf, sizeof(value_type) * str.length()) == 0) |  117           std::memcmp(mBuf + pos, str.mBuf, sizeof(value_type) * str.length()) =
     = 0) | 
|  185       { |  118       { | 
|  186         return pos; |  119         return pos; | 
|  187       } |  120       } | 
|  188     } |  121     } | 
|  189  |  122  | 
|  190     return npos; |  123     return npos; | 
|  191   } |  124   } | 
|  192  |  125  | 
|  193   size_type rfind(value_type c, size_type pos = npos) const |  126   size_type rfind(value_type c, size_type pos = npos) const | 
|  194   { |  127   { | 
|  195     if (length() == 0) |  128     if (length() == 0) | 
|  196       return npos; |  129       return npos; | 
|  197  |  130  | 
|  198     if (pos == npos) |  131     if (pos >= length()) | 
|  199       pos = length() - 1; |  132       pos = length() - 1; | 
|  200  |  133  | 
|  201     for (int i = pos; i >= 0; --i) |  134     for (int i = pos; i >= 0; --i) | 
|  202       if (mBuf[i] == c) |  135       if (mBuf[i] == c) | 
|  203         return i; |  136         return i; | 
|  204     return npos; |  137     return npos; | 
|  205   } |  138   } | 
|  206  |  139  | 
 |  140   bool is_invalid() const | 
 |  141   { | 
 |  142     return (mLen & FLAGS_MASK) == INVALID; | 
 |  143   } | 
 |  144  | 
 |  145   bool is_deleted() const | 
 |  146   { | 
 |  147     return (mLen & FLAGS_MASK) == DELETED; | 
 |  148   } | 
 |  149  | 
 |  150   void toLower() | 
 |  151   { | 
 |  152     size_type len = length(); | 
 |  153     for (size_type i = 0; i < len; ++i) | 
 |  154     { | 
 |  155       value_type currChar = mBuf[i]; | 
 |  156  | 
 |  157       // This should be more efficient with a lookup table but I couldn't measur
     e | 
 |  158       // any performance difference. | 
 |  159       if (currChar >= u'A' && currChar <= u'Z') | 
 |  160         mBuf[i] = currChar + u'a' - u'A'; | 
 |  161       else if (currChar >= 128) | 
 |  162       { | 
 |  163         // It seems that calling JS is the easiest solution for lowercasing | 
 |  164         // Unicode characters. | 
 |  165         mBuf[i] = EM_ASM_INT({ | 
 |  166           return String.fromCharCode($0).toLowerCase().charCodeAt(0); | 
 |  167         }, currChar); | 
 |  168       } | 
 |  169     } | 
 |  170   } | 
 |  171 }; | 
 |  172  | 
 |  173 class DependentString : public String | 
 |  174 { | 
 |  175 public: | 
 |  176   explicit DependentString() | 
 |  177       : String(nullptr, 0, INVALID) | 
 |  178   { | 
 |  179   } | 
 |  180  | 
 |  181   explicit DependentString(value_type* buf, size_type len) | 
 |  182       : String(buf, len, READ_WRITE) | 
 |  183   { | 
 |  184   } | 
 |  185  | 
 |  186   explicit DependentString(const value_type* buf, size_type len) | 
 |  187       : String(const_cast<value_type*>(buf), len, READ_ONLY) | 
 |  188   { | 
 |  189   } | 
 |  190  | 
 |  191   explicit DependentString(String& str, size_type pos = 0, size_type len = npos) | 
 |  192       : String( | 
 |  193           str.mBuf + std::min(pos, str.length()), | 
 |  194           std::min(len, str.length() - std::min(pos, str.length())), | 
 |  195           str.is_readOnly() ? READ_ONLY : READ_WRITE | 
 |  196         ) | 
 |  197   { | 
 |  198   } | 
 |  199  | 
 |  200   explicit DependentString(const String& str, size_type pos = 0, | 
 |  201       size_type len = npos) | 
 |  202       : String( | 
 |  203           str.mBuf + std::min(pos, str.length()), | 
 |  204           std::min(len, str.length() - std::min(pos, str.length())), | 
 |  205           READ_ONLY | 
 |  206         ) | 
 |  207   { | 
 |  208   } | 
 |  209  | 
 |  210   void reset(value_type* buf, size_type len) | 
 |  211   { | 
 |  212     *this = DependentString(buf, len); | 
 |  213   } | 
 |  214  | 
 |  215   void reset(const value_type* buf, size_type len) | 
 |  216   { | 
 |  217     *this = DependentString(buf, len); | 
 |  218   } | 
 |  219  | 
 |  220   void reset(String& str, size_type pos = 0, size_type len = npos) | 
 |  221   { | 
 |  222     *this = DependentString(str, pos, len); | 
 |  223   } | 
 |  224  | 
 |  225   void reset(const String& str, size_type pos = 0, size_type len = npos) | 
 |  226   { | 
 |  227     *this = DependentString(str, pos, len); | 
 |  228   } | 
 |  229  | 
 |  230   void erase() | 
 |  231   { | 
 |  232     *this = DependentString(); | 
 |  233     mLen = DELETED; | 
 |  234   } | 
 |  235 }; | 
 |  236  | 
 |  237 inline DependentString operator "" _str(const String::value_type* str, | 
 |  238     String::size_type len) | 
 |  239 { | 
 |  240   return DependentString(str, len); | 
 |  241 } | 
 |  242  | 
 |  243 inline void String_assert_readonly(bool readOnly) | 
 |  244 { | 
 |  245   assert(!readOnly, u"Writing access to a read-only string"_str); | 
 |  246 } | 
 |  247  | 
 |  248 class OwnedString : public String | 
 |  249 { | 
 |  250 private: | 
 |  251   void grow(size_type additionalSize) | 
 |  252   { | 
 |  253     OwnedString newValue(length() + additionalSize); | 
 |  254     if (length() > 0) | 
 |  255       std::memcpy(newValue.mBuf, mBuf, sizeof(value_type) * length()); | 
 |  256     *this = std::move(newValue); | 
 |  257   } | 
 |  258  | 
 |  259 public: | 
 |  260   explicit OwnedString(size_type len = 0) | 
 |  261       : String(nullptr, len, READ_WRITE) | 
 |  262   { | 
 |  263     if (len) | 
 |  264     { | 
 |  265       mBuf = new value_type[length()]; | 
 |  266       annotate_address(mBuf, "String"); | 
 |  267     } | 
 |  268     else | 
 |  269       mBuf = nullptr; | 
 |  270   } | 
 |  271  | 
 |  272   explicit OwnedString(const String& str) | 
 |  273       : OwnedString(str.length()) | 
 |  274   { | 
 |  275     if (length()) | 
 |  276       std::memcpy(mBuf, str.mBuf, sizeof(value_type) * length()); | 
 |  277   } | 
 |  278  | 
 |  279   OwnedString(const OwnedString& str) | 
 |  280       : OwnedString(static_cast<const String&>(str)) | 
 |  281   { | 
 |  282   } | 
 |  283  | 
 |  284   explicit OwnedString(const value_type* str, size_type len) | 
 |  285       : OwnedString(DependentString(str, len)) | 
 |  286   { | 
 |  287   } | 
 |  288  | 
 |  289   explicit OwnedString(OwnedString&& str) | 
 |  290       : OwnedString(0) | 
 |  291   { | 
 |  292     mBuf = str.mBuf; | 
 |  293     mLen = str.mLen; | 
 |  294     str.mBuf = nullptr; | 
 |  295     str.mLen = READ_WRITE | 0; | 
 |  296   } | 
 |  297  | 
 |  298   ~OwnedString() | 
 |  299   { | 
 |  300     if (mBuf) | 
 |  301       delete[] mBuf; | 
 |  302   } | 
 |  303  | 
 |  304   OwnedString& operator=(const String& str) | 
 |  305   { | 
 |  306     *this = std::move(OwnedString(str)); | 
 |  307     return *this; | 
 |  308   } | 
 |  309  | 
 |  310   OwnedString& operator=(const OwnedString& str) | 
 |  311   { | 
 |  312     *this = std::move(OwnedString(str)); | 
 |  313     return *this; | 
 |  314   } | 
 |  315  | 
 |  316   OwnedString& operator=(OwnedString&& str) | 
 |  317   { | 
 |  318     std::swap(mBuf, str.mBuf); | 
 |  319     std::swap(mLen, str.mLen); | 
 |  320     return *this; | 
 |  321   } | 
 |  322  | 
|  207   void append(const value_type* source, size_type sourceLen) |  323   void append(const value_type* source, size_type sourceLen) | 
|  208   { |  324   { | 
|  209     if (!sourceLen) |  325     if (!sourceLen) | 
|  210       return; |  326       return; | 
|  211  |  327  | 
 |  328     assert(source, u"Null buffer passed to OwnedString.append()"_str); | 
|  212     size_t oldLength = length(); |  329     size_t oldLength = length(); | 
|  213     resize(oldLength + sourceLen, true); |  330     grow(sourceLen); | 
|  214     memcpy(mBuf + oldLength, source, sizeof(value_type) * sourceLen); |  331     std::memcpy(mBuf + oldLength, source, sizeof(value_type) * sourceLen); | 
|  215   } |  332   } | 
|  216  |  333  | 
|  217   void append(const String& str) |  334   void append(const String& str) | 
|  218   { |  335   { | 
|  219     append(str.mBuf, str.length()); |  336     append(str.mBuf, str.length()); | 
|  220   } |  337   } | 
|  221  |  338  | 
|  222   void append(value_type c) |  339   void append(value_type c) | 
|  223   { |  340   { | 
|  224     append(&c, 1); |  341     append(&c, 1); | 
|  225   } |  342   } | 
|  226  |  | 
|  227   bool owns_buffer() const |  | 
|  228   { |  | 
|  229     return mBuf && (mLen & FLAGS_MASK) == OWNBUFFER; |  | 
|  230   } |  | 
|  231  |  | 
|  232   void ensure_own_buffer() |  | 
|  233   { |  | 
|  234     size_type len = length(); |  | 
|  235     if (len && !owns_buffer()) |  | 
|  236       resize(len, true); |  | 
|  237   } |  | 
|  238  |  | 
|  239   bool is_dependent() const |  | 
|  240   { |  | 
|  241     return (mLen & FLAGS_MASK) == DEPENDENT; |  | 
|  242   } |  | 
|  243  |  | 
|  244   bool is_invalid() const |  | 
|  245   { |  | 
|  246     return (mLen & FLAGS_MASK) == INVALID; |  | 
|  247   } |  | 
|  248  |  | 
|  249   bool is_deleted() const |  | 
|  250   { |  | 
|  251     return (mLen & FLAGS_MASK) == DELETED; |  | 
|  252   } |  | 
|  253 }; |  343 }; | 
|  254  |  | 
|  255 inline String operator "" _str(const String::value_type* str, |  | 
|  256     String::size_type len) |  | 
|  257 { |  | 
|  258   return String(const_cast<String::value_type*>(str), len); |  | 
|  259 } |  | 
|  260 #endif |  | 
| LEFT | RIGHT |