LEFT | RIGHT |
1 #ifndef ADBLOCK_PLUS_STRING_H | 1 #pragma once |
2 #define ADBLOCK_PLUS_STRING_H | |
3 | 2 |
4 #include <cstddef> | 3 #include <cstddef> |
5 #include <cstring> | 4 #include <cstring> |
6 #include <algorithm> | 5 #include <algorithm> |
7 | 6 |
| 7 #include <emscripten.h> |
| 8 |
8 #include "debug.h" | 9 #include "debug.h" |
9 | 10 |
10 inline void String_assert_readonly(bool readOnly); | 11 inline void String_assert_readonly(bool readOnly); |
11 | 12 |
12 class String | 13 class String |
13 { | 14 { |
| 15 friend class DependentString; |
| 16 friend class OwnedString; |
| 17 |
14 public: | 18 public: |
15 typedef char16_t value_type; | 19 typedef char16_t value_type; |
16 typedef size_t size_type; | 20 typedef size_t size_type; |
17 | 21 |
18 // Type flags, stored in the top 2 bits of the mLen member | 22 // Type flags, stored in the top 2 bits of the mLen member |
19 static constexpr size_type OWNBUFFER = 0xC0000000; | 23 static constexpr size_type INVALID = 0xC0000000; |
20 static constexpr size_type INVALID = 0x80000000; | 24 static constexpr size_type DELETED = 0x80000000; |
21 static constexpr size_type DELETED = 0x40000000; | 25 static constexpr size_type READ_ONLY = 0x40000000; |
22 static constexpr size_type DEPENDENT = 0x00000000; | |
23 | |
24 // Read-only flag (for debug asserts only) | |
25 static constexpr size_type READ_ONLY = 0x20000000; | |
26 static constexpr size_type READ_WRITE = 0x00000000; | 26 static constexpr size_type READ_WRITE = 0x00000000; |
27 | 27 |
28 static constexpr size_type FLAGS_MASK = 0xE0000000; | 28 static constexpr size_type FLAGS_MASK = 0xC0000000; |
29 static constexpr size_type LENGTH_MASK = 0x1FFFFFFF; | 29 static constexpr size_type LENGTH_MASK = 0x3FFFFFFF; |
30 | 30 |
31 static constexpr size_type npos = -1; | 31 static constexpr size_type npos = -1; |
32 | 32 |
33 private: | 33 protected: |
34 value_type* mBuf; | 34 value_type* mBuf; |
35 size_type mLen; | 35 size_type mLen; |
36 | 36 |
37 value_type* allocate(size_type len) | 37 explicit String(value_type* buf, size_type len, size_type flags) |
38 { | 38 : mBuf(buf), mLen((len & LENGTH_MASK) | flags) |
39 if (len) | 39 { |
40 return new value_type[len]; | 40 } |
41 else | 41 |
42 return nullptr; | 42 ~String() |
43 } | 43 { |
44 | 44 } |
45 void resize(size_type newLength, bool copy) | 45 |
46 { | 46 void reset(value_type* buf, size_type len, size_type flags) |
47 bool owningOldBuffer = owns_buffer(); | 47 { |
48 size_type oldLength = length(); | 48 mBuf = buf; |
49 value_type* oldBuffer = mBuf; | 49 mLen = (len & LENGTH_MASK) | flags; |
50 | |
51 newLength &= LENGTH_MASK; | |
52 mBuf = allocate(newLength); | |
53 annotate_address(mBuf, "String"); | |
54 mLen = OWNBUFFER | READ_WRITE | newLength; | |
55 | |
56 if (copy && oldLength) | |
57 memcpy(mBuf, oldBuffer, sizeof(value_type) * std::min(oldLength, newLength
)); | |
58 if (owningOldBuffer) | |
59 delete[] oldBuffer; | |
60 } | 50 } |
61 | 51 |
62 public: | 52 public: |
63 String() : mBuf(nullptr), mLen(INVALID) {} | |
64 | |
65 String(size_type len) | |
66 : mBuf(allocate(len & LENGTH_MASK)), | |
67 mLen(OWNBUFFER | READ_WRITE | (len & LENGTH_MASK)) | |
68 { | |
69 annotate_address(mBuf, "String"); | |
70 } | |
71 | |
72 String(value_type* buf, size_type len) | |
73 : mBuf(buf), mLen(DEPENDENT | READ_WRITE | (buf ? len & LENGTH_MASK: 0)) | |
74 { | |
75 } | |
76 | |
77 String(const value_type* buf, size_type len) | |
78 : mBuf(const_cast<value_type*>(buf)), | |
79 mLen(DEPENDENT | READ_ONLY | (buf ? len & LENGTH_MASK: 0)) | |
80 { | |
81 } | |
82 | |
83 String(String& str, size_type pos = 0, size_type len = npos) | |
84 : mBuf(str.mBuf + std::min(pos, str.length())), | |
85 mLen(DEPENDENT | READ_WRITE | std::min(len, str.length() - (mBuf - str.m
Buf))) | |
86 { | |
87 } | |
88 | |
89 String(const String& str, size_type pos = 0, size_type len = npos) | |
90 : mBuf(str.mBuf + std::min(pos, str.length())), | |
91 mLen(DEPENDENT | READ_ONLY | std::min(len, str.length() - (mBuf - str.mB
uf))) | |
92 { | |
93 } | |
94 | |
95 String(String&& str) | |
96 { | |
97 *this = std::move(str); | |
98 } | |
99 | |
100 String(const char* source, size_type len) | |
101 : String(len) | |
102 { | |
103 for (size_type i = 0; i < len; i++) | |
104 mBuf[i] = source[i]; | |
105 } | |
106 | |
107 String& operator=(const String& str) | |
108 { | |
109 reset(str); | |
110 return *this; | |
111 } | |
112 | |
113 String& operator=(String& str) | |
114 { | |
115 reset(str); | |
116 return *this; | |
117 } | |
118 | |
119 String& operator=(String&& str) | |
120 { | |
121 mBuf = str.mBuf; | |
122 mLen = str.mLen; | |
123 str.mBuf = nullptr; | |
124 str.mLen = INVALID; | |
125 return *this; | |
126 } | |
127 | |
128 ~String() | |
129 { | |
130 if (owns_buffer()) | |
131 delete[] mBuf; | |
132 } | |
133 | |
134 void reset(value_type* buf, size_type len) | |
135 { | |
136 mBuf = buf; | |
137 mLen = (DEPENDENT | READ_WRITE | (buf ? len & LENGTH_MASK: 0)); | |
138 } | |
139 | |
140 void reset(const value_type* buf, size_type len) | |
141 { | |
142 mBuf = const_cast<value_type*>(buf); | |
143 mLen = (DEPENDENT | READ_ONLY | (buf ? len & LENGTH_MASK: 0)); | |
144 } | |
145 | |
146 void reset(String& str, size_type pos = 0, size_type len = npos) | |
147 { | |
148 pos = std::min(pos, str.length()); | |
149 len = std::min(len, str.length() - pos); | |
150 reset(str.mBuf + pos, len); | |
151 } | |
152 | |
153 void reset(const String& str, size_type pos = 0, size_type len = npos) | |
154 { | |
155 pos = std::min(pos, str.length()); | |
156 len = std::min(len, str.length() - pos); | |
157 reset(const_cast<const value_type*>(str.mBuf + pos), len); | |
158 } | |
159 | |
160 size_type length() const | 53 size_type length() const |
161 { | 54 { |
162 return mLen & LENGTH_MASK; | 55 return mLen & LENGTH_MASK; |
163 } | 56 } |
164 | 57 |
165 bool empty() const | 58 bool empty() const |
166 { | 59 { |
167 return !(mLen & LENGTH_MASK); | 60 return !(mLen & LENGTH_MASK); |
168 } | 61 } |
169 | 62 |
170 const value_type* data() const | 63 const value_type* data() const |
171 { | 64 { |
172 return mBuf; | 65 return mBuf; |
173 } | 66 } |
174 | 67 |
175 value_type* data() | 68 value_type* data() |
176 { | 69 { |
177 String_assert_readonly(mLen & READ_ONLY); | 70 String_assert_readonly(is_readOnly()); |
178 return mBuf; | 71 return mBuf; |
179 } | 72 } |
180 | 73 |
181 const value_type& operator[](size_type pos) const | 74 const value_type& operator[](size_type pos) const |
182 { | 75 { |
183 return mBuf[pos]; | 76 return mBuf[pos]; |
184 } | 77 } |
185 | 78 |
186 value_type& operator[](size_type pos) | 79 value_type& operator[](size_type pos) |
187 { | 80 { |
188 String_assert_readonly(mLen & READ_ONLY); | 81 String_assert_readonly(is_readOnly()); |
189 return mBuf[pos]; | 82 return mBuf[pos]; |
| 83 } |
| 84 |
| 85 bool is_readOnly() const |
| 86 { |
| 87 return (mLen & FLAGS_MASK) != READ_WRITE; |
190 } | 88 } |
191 | 89 |
192 bool equals(const String& other) const | 90 bool equals(const String& other) const |
193 { | 91 { |
194 if (length() != other.length()) | 92 if (length() != other.length()) |
195 return false; | 93 return false; |
196 | 94 |
197 return memcmp(mBuf, other.mBuf, sizeof(value_type) * length()) == 0; | 95 return std::memcmp(mBuf, other.mBuf, sizeof(value_type) * length()) == 0; |
198 } | 96 } |
199 | 97 |
200 size_type find(value_type c, size_type pos = 0) const | 98 size_type find(value_type c, size_type pos = 0) const |
201 { | 99 { |
202 for (size_type i = pos; i < length(); ++i) | 100 for (size_type i = pos; i < length(); ++i) |
203 if (mBuf[i] == c) | 101 if (mBuf[i] == c) |
204 return i; | 102 return i; |
205 return npos; | 103 return npos; |
206 } | 104 } |
207 | 105 |
208 size_type find(const String& str, size_type pos = 0) const | 106 size_type find(const String& str, size_type pos = 0) const |
209 { | 107 { |
| 108 if (pos > LENGTH_MASK || pos + str.length() > length()) |
| 109 return npos; |
| 110 |
210 if (!str.length()) | 111 if (!str.length()) |
211 return pos; | 112 return pos; |
212 | 113 |
213 if (length() - pos < str.length()) | 114 for (; pos + str.length() <= length(); ++pos) |
214 return npos; | |
215 | |
216 for (; pos < length() - str.length(); ++pos) | |
217 { | 115 { |
218 if (mBuf[pos] == str[0] && | 116 if (mBuf[pos] == str[0] && |
219 memcmp(mBuf + pos, str.mBuf, sizeof(value_type) * str.length()) == 0) | 117 std::memcmp(mBuf + pos, str.mBuf, sizeof(value_type) * str.length()) =
= 0) |
220 { | 118 { |
221 return pos; | 119 return pos; |
222 } | 120 } |
223 } | 121 } |
224 | 122 |
225 return npos; | 123 return npos; |
226 } | 124 } |
227 | 125 |
228 size_type rfind(value_type c, size_type pos = npos) const | 126 size_type rfind(value_type c, size_type pos = npos) const |
229 { | 127 { |
230 if (length() == 0) | 128 if (length() == 0) |
231 return npos; | 129 return npos; |
232 | 130 |
233 if (pos == npos) | 131 if (pos >= length()) |
234 pos = length() - 1; | 132 pos = length() - 1; |
235 | 133 |
236 for (int i = pos; i >= 0; --i) | 134 for (int i = pos; i >= 0; --i) |
237 if (mBuf[i] == c) | 135 if (mBuf[i] == c) |
238 return i; | 136 return i; |
239 return npos; | 137 return npos; |
240 } | 138 } |
241 | 139 |
| 140 bool is_invalid() const |
| 141 { |
| 142 return (mLen & FLAGS_MASK) == INVALID; |
| 143 } |
| 144 |
| 145 bool is_deleted() const |
| 146 { |
| 147 return (mLen & FLAGS_MASK) == DELETED; |
| 148 } |
| 149 |
| 150 void toLower() |
| 151 { |
| 152 size_type len = length(); |
| 153 for (size_type i = 0; i < len; ++i) |
| 154 { |
| 155 value_type currChar = mBuf[i]; |
| 156 |
| 157 // This should be more efficient with a lookup table but I couldn't measur
e |
| 158 // any performance difference. |
| 159 if (currChar >= u'A' && currChar <= u'Z') |
| 160 mBuf[i] = currChar + u'a' - u'A'; |
| 161 else if (currChar >= 128) |
| 162 { |
| 163 // It seems that calling JS is the easiest solution for lowercasing |
| 164 // Unicode characters. |
| 165 mBuf[i] = EM_ASM_INT({ |
| 166 return String.fromCharCode($0).toLowerCase().charCodeAt(0); |
| 167 }, currChar); |
| 168 } |
| 169 } |
| 170 } |
| 171 }; |
| 172 |
| 173 class DependentString : public String |
| 174 { |
| 175 public: |
| 176 explicit DependentString() |
| 177 : String(nullptr, 0, INVALID) |
| 178 { |
| 179 } |
| 180 |
| 181 explicit DependentString(value_type* buf, size_type len) |
| 182 : String(buf, len, READ_WRITE) |
| 183 { |
| 184 } |
| 185 |
| 186 explicit DependentString(const value_type* buf, size_type len) |
| 187 : String(const_cast<value_type*>(buf), len, READ_ONLY) |
| 188 { |
| 189 } |
| 190 |
| 191 explicit DependentString(String& str, size_type pos = 0, size_type len = npos) |
| 192 : String( |
| 193 str.mBuf + std::min(pos, str.length()), |
| 194 std::min(len, str.length() - std::min(pos, str.length())), |
| 195 str.is_readOnly() ? READ_ONLY : READ_WRITE |
| 196 ) |
| 197 { |
| 198 } |
| 199 |
| 200 explicit DependentString(const String& str, size_type pos = 0, |
| 201 size_type len = npos) |
| 202 : String( |
| 203 str.mBuf + std::min(pos, str.length()), |
| 204 std::min(len, str.length() - std::min(pos, str.length())), |
| 205 READ_ONLY |
| 206 ) |
| 207 { |
| 208 } |
| 209 |
| 210 void reset(value_type* buf, size_type len) |
| 211 { |
| 212 *this = DependentString(buf, len); |
| 213 } |
| 214 |
| 215 void reset(const value_type* buf, size_type len) |
| 216 { |
| 217 *this = DependentString(buf, len); |
| 218 } |
| 219 |
| 220 void reset(String& str, size_type pos = 0, size_type len = npos) |
| 221 { |
| 222 *this = DependentString(str, pos, len); |
| 223 } |
| 224 |
| 225 void reset(const String& str, size_type pos = 0, size_type len = npos) |
| 226 { |
| 227 *this = DependentString(str, pos, len); |
| 228 } |
| 229 |
| 230 void erase() |
| 231 { |
| 232 *this = DependentString(); |
| 233 mLen = DELETED; |
| 234 } |
| 235 }; |
| 236 |
| 237 inline DependentString operator "" _str(const String::value_type* str, |
| 238 String::size_type len) |
| 239 { |
| 240 return DependentString(str, len); |
| 241 } |
| 242 |
| 243 inline void String_assert_readonly(bool readOnly) |
| 244 { |
| 245 assert(!readOnly, u"Writing access to a read-only string"_str); |
| 246 } |
| 247 |
| 248 class OwnedString : public String |
| 249 { |
| 250 private: |
| 251 void grow(size_type additionalSize) |
| 252 { |
| 253 OwnedString newValue(length() + additionalSize); |
| 254 if (length() > 0) |
| 255 std::memcpy(newValue.mBuf, mBuf, sizeof(value_type) * length()); |
| 256 *this = std::move(newValue); |
| 257 } |
| 258 |
| 259 public: |
| 260 explicit OwnedString(size_type len = 0) |
| 261 : String(nullptr, len, READ_WRITE) |
| 262 { |
| 263 if (len) |
| 264 { |
| 265 mBuf = new value_type[length()]; |
| 266 annotate_address(mBuf, "String"); |
| 267 } |
| 268 else |
| 269 mBuf = nullptr; |
| 270 } |
| 271 |
| 272 explicit OwnedString(const String& str) |
| 273 : OwnedString(str.length()) |
| 274 { |
| 275 if (length()) |
| 276 std::memcpy(mBuf, str.mBuf, sizeof(value_type) * length()); |
| 277 } |
| 278 |
| 279 OwnedString(const OwnedString& str) |
| 280 : OwnedString(static_cast<const String&>(str)) |
| 281 { |
| 282 } |
| 283 |
| 284 explicit OwnedString(const value_type* str, size_type len) |
| 285 : OwnedString(DependentString(str, len)) |
| 286 { |
| 287 } |
| 288 |
| 289 explicit OwnedString(OwnedString&& str) |
| 290 : OwnedString(0) |
| 291 { |
| 292 mBuf = str.mBuf; |
| 293 mLen = str.mLen; |
| 294 str.mBuf = nullptr; |
| 295 str.mLen = READ_WRITE | 0; |
| 296 } |
| 297 |
| 298 ~OwnedString() |
| 299 { |
| 300 if (mBuf) |
| 301 delete[] mBuf; |
| 302 } |
| 303 |
| 304 OwnedString& operator=(const String& str) |
| 305 { |
| 306 *this = std::move(OwnedString(str)); |
| 307 return *this; |
| 308 } |
| 309 |
| 310 OwnedString& operator=(const OwnedString& str) |
| 311 { |
| 312 *this = std::move(OwnedString(str)); |
| 313 return *this; |
| 314 } |
| 315 |
| 316 OwnedString& operator=(OwnedString&& str) |
| 317 { |
| 318 std::swap(mBuf, str.mBuf); |
| 319 std::swap(mLen, str.mLen); |
| 320 return *this; |
| 321 } |
| 322 |
242 void append(const value_type* source, size_type sourceLen) | 323 void append(const value_type* source, size_type sourceLen) |
243 { | 324 { |
244 if (!sourceLen) | 325 if (!sourceLen) |
245 return; | 326 return; |
246 | 327 |
| 328 assert(source, u"Null buffer passed to OwnedString.append()"_str); |
247 size_t oldLength = length(); | 329 size_t oldLength = length(); |
248 resize(oldLength + sourceLen, true); | 330 grow(sourceLen); |
249 memcpy(mBuf + oldLength, source, sizeof(value_type) * sourceLen); | 331 std::memcpy(mBuf + oldLength, source, sizeof(value_type) * sourceLen); |
250 } | 332 } |
251 | 333 |
252 void append(const String& str) | 334 void append(const String& str) |
253 { | 335 { |
254 append(str.mBuf, str.length()); | 336 append(str.mBuf, str.length()); |
255 } | 337 } |
256 | 338 |
257 void append(value_type c) | 339 void append(value_type c) |
258 { | 340 { |
259 append(&c, 1); | 341 append(&c, 1); |
260 } | 342 } |
261 | |
262 bool owns_buffer() const | |
263 { | |
264 return mBuf && (mLen & FLAGS_MASK) == OWNBUFFER; | |
265 } | |
266 | |
267 String& ensure_own_buffer() | |
268 { | |
269 size_type len = length(); | |
270 if (len && !owns_buffer()) | |
271 resize(len, true); | |
272 return *this; | |
273 } | |
274 | |
275 bool is_dependent() const | |
276 { | |
277 return (mLen & FLAGS_MASK) == DEPENDENT; | |
278 } | |
279 | |
280 bool is_invalid() const | |
281 { | |
282 return (mLen & FLAGS_MASK) == INVALID; | |
283 } | |
284 | |
285 bool is_deleted() const | |
286 { | |
287 return (mLen & FLAGS_MASK) == DELETED; | |
288 } | |
289 }; | 343 }; |
290 | |
291 inline String operator "" _str(const String::value_type* str, | |
292 String::size_type len) | |
293 { | |
294 return String(const_cast<String::value_type*>(str), len); | |
295 } | |
296 | |
297 inline void String_assert_readonly(bool readOnly) | |
298 { | |
299 assert(!readOnly, u"Writing access to a read-only string"_str); | |
300 } | |
301 | |
302 #endif | |
LEFT | RIGHT |