Left: | ||
Right: |
LEFT | RIGHT |
---|---|
1 #pragma once | 1 #pragma once |
2 | 2 |
3 #include <cstddef> | 3 #include <cstddef> |
4 #include <cstring> | 4 #include <cstring> |
5 #include <algorithm> | 5 #include <algorithm> |
6 | |
7 #include <emscripten.h> | |
6 | 8 |
7 #include "debug.h" | 9 #include "debug.h" |
8 | 10 |
9 inline void String_assert_readonly(bool readOnly); | 11 inline void String_assert_readonly(bool readOnly); |
10 | 12 |
11 class String | 13 class String |
12 { | 14 { |
13 friend class DependentString; | 15 friend class DependentString; |
14 friend class OwnedString; | 16 friend class OwnedString; |
15 | 17 |
16 public: | 18 public: |
17 typedef char16_t value_type; | 19 typedef char16_t value_type; |
18 typedef size_t size_type; | 20 typedef size_t size_type; |
19 | 21 |
20 // Type flags, stored in the top 2 bits of the mLen member | 22 // Type flags, stored in the top 2 bits of the mLen member |
21 static constexpr size_type INVALID = 0xC0000000; | 23 static constexpr size_type INVALID = 0xC0000000; |
22 static constexpr size_type DELETED = 0x80000000; | 24 static constexpr size_type DELETED = 0x80000000; |
23 static constexpr size_type READ_ONLY = 0x40000000; | 25 static constexpr size_type READ_ONLY = 0x40000000; |
24 static constexpr size_type READ_WRITE = 0x00000000; | 26 static constexpr size_type READ_WRITE = 0x00000000; |
25 | 27 |
26 static constexpr size_type FLAGS_MASK = 0xC0000000; | 28 static constexpr size_type FLAGS_MASK = 0xC0000000; |
27 static constexpr size_type LENGTH_MASK = 0x3FFFFFFF; | 29 static constexpr size_type LENGTH_MASK = 0x3FFFFFFF; |
28 | 30 |
29 static constexpr size_type npos = -1; | 31 static constexpr size_type npos = -1; |
30 | 32 |
31 protected: | 33 protected: |
32 value_type* mBuf; | 34 value_type* mBuf; |
33 size_type mLen; | 35 size_type mLen; |
34 | 36 |
35 String(value_type* buf, size_type len, size_type flags) | 37 explicit String(value_type* buf, size_type len, size_type flags) |
36 : mBuf(buf), mLen((len & LENGTH_MASK) | flags) | 38 : mBuf(buf), mLen((len & LENGTH_MASK) | flags) |
39 { | |
40 } | |
41 | |
42 ~String() | |
37 { | 43 { |
38 } | 44 } |
39 | 45 |
40 void reset(value_type* buf, size_type len, size_type flags) | 46 void reset(value_type* buf, size_type len, size_type flags) |
41 { | 47 { |
42 mBuf = buf; | 48 mBuf = buf; |
43 mLen = (len & LENGTH_MASK) | flags; | 49 mLen = (len & LENGTH_MASK) | flags; |
44 } | 50 } |
45 | 51 |
sergei
2016/02/17 12:54:53
What about adding protected destructor or virtual
Wladimir Palant
2016/02/18 16:06:53
Nice one, I thought that having a protected constr
| |
46 public: | 52 public: |
47 size_type length() const | 53 size_type length() const |
48 { | 54 { |
49 return mLen & LENGTH_MASK; | 55 return mLen & LENGTH_MASK; |
50 } | 56 } |
51 | 57 |
52 bool empty() const | 58 bool empty() const |
53 { | 59 { |
54 return !(mLen & LENGTH_MASK); | 60 return !(mLen & LENGTH_MASK); |
55 } | 61 } |
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
92 size_type find(value_type c, size_type pos = 0) const | 98 size_type find(value_type c, size_type pos = 0) const |
93 { | 99 { |
94 for (size_type i = pos; i < length(); ++i) | 100 for (size_type i = pos; i < length(); ++i) |
95 if (mBuf[i] == c) | 101 if (mBuf[i] == c) |
96 return i; | 102 return i; |
97 return npos; | 103 return npos; |
98 } | 104 } |
99 | 105 |
100 size_type find(const String& str, size_type pos = 0) const | 106 size_type find(const String& str, size_type pos = 0) const |
101 { | 107 { |
108 if (pos > LENGTH_MASK || pos + str.length() > length()) | |
109 return npos; | |
110 | |
102 if (!str.length()) | 111 if (!str.length()) |
103 return pos; | 112 return pos; |
sergei
2016/02/17 12:54:39
Strictly speaking, if `pos > this->length()` then
Wladimir Palant
2016/02/18 16:06:58
Done.
| |
104 | 113 |
105 if (length() - pos < str.length()) | 114 for (; pos + str.length() <= length(); ++pos) |
sergei
2016/02/17 12:54:42
We also should check whether `this->length()` is n
Wladimir Palant
2016/02/18 16:06:47
If length() is 0 then length() - pos will be at mo
sergei
2016/02/22 12:45:46
I wanted to have at the beginning of the method th
Wladimir Palant
2016/02/23 12:37:21
Yes, in our case an integer overflow is luckily im
sergei
2016/02/23 15:07:27
As a matter of form we still need to check that `p
Wladimir Palant
2016/02/23 21:34:54
True, done.
| |
106 return npos; | |
107 | |
108 for (; pos < length() - str.length(); ++pos) | |
109 { | 115 { |
110 if (mBuf[pos] == str[0] && | 116 if (mBuf[pos] == str[0] && |
111 std::memcmp(mBuf + pos, str.mBuf, sizeof(value_type) * str.length()) = = 0) | 117 std::memcmp(mBuf + pos, str.mBuf, sizeof(value_type) * str.length()) = = 0) |
112 { | 118 { |
113 return pos; | 119 return pos; |
114 } | 120 } |
115 } | 121 } |
116 | 122 |
117 return npos; | 123 return npos; |
118 } | 124 } |
119 | 125 |
120 size_type rfind(value_type c, size_type pos = npos) const | 126 size_type rfind(value_type c, size_type pos = npos) const |
121 { | 127 { |
122 if (length() == 0) | 128 if (length() == 0) |
123 return npos; | 129 return npos; |
124 | 130 |
125 if (pos == npos) | 131 if (pos >= length()) |
126 pos = length() - 1; | 132 pos = length() - 1; |
sergei
2016/02/17 12:54:54
It seems safer to use something like `pos = min(po
Wladimir Palant
2016/02/18 16:06:46
Done.
| |
127 | 133 |
128 for (int i = pos; i >= 0; --i) | 134 for (int i = pos; i >= 0; --i) |
129 if (mBuf[i] == c) | 135 if (mBuf[i] == c) |
130 return i; | 136 return i; |
131 return npos; | 137 return npos; |
132 } | 138 } |
139 | |
140 bool is_invalid() const | |
141 { | |
142 return (mLen & FLAGS_MASK) == INVALID; | |
143 } | |
144 | |
145 bool is_deleted() const | |
146 { | |
147 return (mLen & FLAGS_MASK) == DELETED; | |
148 } | |
149 | |
150 void toLower() | |
151 { | |
152 size_type len = length(); | |
153 for (size_type i = 0; i < len; ++i) | |
154 { | |
155 value_type currChar = mBuf[i]; | |
156 | |
157 // This should be more efficient with a lookup table but I couldn't measur e | |
158 // any performance difference. | |
159 if (currChar >= u'A' && currChar <= u'Z') | |
160 mBuf[i] = currChar + u'a' - u'A'; | |
161 else if (currChar >= 128) | |
162 { | |
163 // It seems that calling JS is the easiest solution for lowercasing | |
164 // Unicode characters. | |
165 mBuf[i] = EM_ASM_INT({ | |
166 return String.fromCharCode($0).toLowerCase().charCodeAt(0); | |
167 }, currChar); | |
168 } | |
169 } | |
170 } | |
133 }; | 171 }; |
134 | 172 |
135 class DependentString : public String | 173 class DependentString : public String |
136 { | 174 { |
137 public: | 175 public: |
138 DependentString() | 176 explicit DependentString() |
139 : String(nullptr, 0, INVALID) | 177 : String(nullptr, 0, INVALID) |
140 { | 178 { |
141 } | 179 } |
142 | 180 |
143 DependentString(value_type* buf, size_type len) | 181 explicit DependentString(value_type* buf, size_type len) |
144 : String(buf, len, READ_WRITE) | 182 : String(buf, len, READ_WRITE) |
145 { | 183 { |
146 } | 184 } |
147 | 185 |
148 DependentString(const value_type* buf, size_type len) | 186 explicit DependentString(const value_type* buf, size_type len) |
149 : String(const_cast<value_type*>(buf), len, READ_ONLY) | 187 : String(const_cast<value_type*>(buf), len, READ_ONLY) |
150 { | 188 { |
151 } | 189 } |
152 | 190 |
153 DependentString(String& str, size_type pos = 0, size_type len = npos) | 191 explicit DependentString(String& str, size_type pos = 0, size_type len = npos) |
sergei
2016/02/17 12:54:51
I would add `explicit` for this particular method.
Wladimir Palant
2016/02/18 16:06:51
Why? I think that passing OwnedString to a functio
sergei
2016/02/22 12:45:45
May be it's desired. It seems it's not actually us
Wladimir Palant
2016/02/23 12:37:26
It's being used in various getters, e.g. Filter::G
sergei
2016/02/23 15:07:26
Yes, the variant with const, but this constructor
Wladimir Palant
2016/02/23 21:34:54
bindings.h currently enforces the return value to
| |
154 : String( | 192 : String( |
155 str.mBuf + std::min(pos, str.length()), | 193 str.mBuf + std::min(pos, str.length()), |
156 std::min(len, str.length() - std::min(pos, str.length())), | 194 std::min(len, str.length() - std::min(pos, str.length())), |
157 READ_WRITE | 195 str.is_readOnly() ? READ_ONLY : READ_WRITE |
158 ) | 196 ) |
159 { | 197 { |
160 } | 198 } |
161 | 199 |
162 DependentString(const String& str, size_type pos = 0, size_type len = npos) | 200 explicit DependentString(const String& str, size_type pos = 0, |
201 size_type len = npos) | |
163 : String( | 202 : String( |
164 str.mBuf + std::min(pos, str.length()), | 203 str.mBuf + std::min(pos, str.length()), |
165 std::min(len, str.length() - std::min(pos, str.length())), | 204 std::min(len, str.length() - std::min(pos, str.length())), |
166 READ_ONLY | 205 READ_ONLY |
167 ) | 206 ) |
168 { | 207 { |
169 } | 208 } |
170 | 209 |
171 void reset(value_type* buf, size_type len) | 210 void reset(value_type* buf, size_type len) |
172 { | 211 { |
173 *this = DependentString(buf, len); | 212 *this = DependentString(buf, len); |
174 } | 213 } |
175 | 214 |
176 void reset(const value_type* buf, size_type len) | 215 void reset(const value_type* buf, size_type len) |
177 { | 216 { |
178 *this = DependentString(buf, len); | 217 *this = DependentString(buf, len); |
179 } | 218 } |
180 | 219 |
181 void reset(String& str, size_type pos = 0, size_type len = npos) | 220 void reset(String& str, size_type pos = 0, size_type len = npos) |
182 { | 221 { |
183 *this = DependentString(str, pos, len); | 222 *this = DependentString(str, pos, len); |
184 } | 223 } |
185 | 224 |
186 void reset(const String& str, size_type pos = 0, size_type len = npos) | 225 void reset(const String& str, size_type pos = 0, size_type len = npos) |
187 { | 226 { |
188 *this = DependentString(str, pos, len); | 227 *this = DependentString(str, pos, len); |
189 } | 228 } |
190 | 229 |
191 bool is_invalid() const | 230 void erase() |
sergei
2016/02/17 12:54:52
what about moving it into base class String?
Wladimir Palant
2016/02/18 16:06:52
Given that it is only necessary for StringMap - wh
sergei
2016/02/22 12:45:49
Because flag values are defined in `String`, flag
Wladimir Palant
2016/02/23 12:37:24
Done.
| |
192 { | 231 { |
193 return (mLen & FLAGS_MASK) == INVALID; | 232 *this = DependentString(); |
194 } | 233 mLen = DELETED; |
195 | |
196 bool is_deleted() const | |
197 { | |
198 return (mLen & FLAGS_MASK) == DELETED; | |
sergei
2016/02/17 12:54:49
How can it actually happen? It seems it is never s
Wladimir Palant
2016/02/18 16:06:55
No, it's not - yet. The reason is that StringMap c
sergei
2016/02/22 12:45:50
Acknowledged.
| |
199 } | 234 } |
200 }; | 235 }; |
201 | 236 |
237 inline DependentString operator "" _str(const String::value_type* str, | |
238 String::size_type len) | |
239 { | |
240 return DependentString(str, len); | |
241 } | |
242 | |
243 inline void String_assert_readonly(bool readOnly) | |
244 { | |
245 assert(!readOnly, u"Writing access to a read-only string"_str); | |
246 } | |
247 | |
202 class OwnedString : public String | 248 class OwnedString : public String |
203 { | 249 { |
204 private: | 250 private: |
205 value_type* allocate(size_type len) | 251 void grow(size_type additionalSize) |
252 { | |
253 OwnedString newValue(length() + additionalSize); | |
254 if (length() > 0) | |
255 std::memcpy(newValue.mBuf, mBuf, sizeof(value_type) * length()); | |
256 *this = std::move(newValue); | |
257 } | |
258 | |
259 public: | |
260 explicit OwnedString(size_type len = 0) | |
261 : String(nullptr, len, READ_WRITE) | |
206 { | 262 { |
207 if (len) | 263 if (len) |
208 return new value_type[len]; | 264 { |
265 mBuf = new value_type[length()]; | |
266 annotate_address(mBuf, "String"); | |
267 } | |
209 else | 268 else |
210 return nullptr; | 269 mBuf = nullptr; |
211 } | 270 } |
212 | 271 |
213 void resize(size_type newLength, bool copy) | 272 explicit OwnedString(const String& str) |
sergei
2016/02/17 12:54:43
I'm not sure about `copy` argument, it's called on
Wladimir Palant
2016/02/18 16:06:49
Indeed, not any more - removed that parameter.
| |
214 { | |
215 size_type oldLength = length(); | |
216 value_type* oldBuffer = mBuf; | |
217 | |
218 reset(nullptr, newLength, READ_WRITE); | |
219 newLength = length(); | |
220 mBuf = allocate(newLength); | |
221 annotate_address(mBuf, "String"); | |
222 | |
223 if (copy && oldLength) | |
224 std::memcpy(mBuf, oldBuffer, sizeof(value_type) * std::min(oldLength, newL ength)); | |
sergei
2016/02/17 12:54:41
If either destination or source buffer is nullptr
Wladimir Palant
2016/02/18 16:06:57
oldLength is non-zero so oldBuffer cannot be nullp
sergei
2016/02/22 12:45:38
Acknowledged, sorry, overlooked.
| |
225 if (oldBuffer) | |
226 delete[] oldBuffer; | |
227 } | |
228 | |
229 public: | |
230 OwnedString(size_type len = 0) | |
231 : String(nullptr, len, READ_WRITE) | |
232 { | |
233 mBuf = allocate(length()); | |
234 annotate_address(mBuf, "String"); | |
235 } | |
236 | |
237 OwnedString(const String& str) | |
238 : OwnedString(str.length()) | 273 : OwnedString(str.length()) |
239 { | 274 { |
240 std::memcpy(mBuf, str.mBuf, sizeof(value_type) * length()); | 275 if (length()) |
sergei
2016/02/17 12:54:40
Here both mBuf and str.mBuf can be nullptr.
Wladimir Palant
2016/02/18 16:06:54
Should be fine as length() will be 0 in that case?
sergei
2016/02/22 12:45:48
According to http://en.cppreference.com/w/cpp/stri
Wladimir Palant
2016/02/23 12:37:23
Done.
| |
276 std::memcpy(mBuf, str.mBuf, sizeof(value_type) * length()); | |
241 } | 277 } |
242 | 278 |
243 OwnedString(const OwnedString& str) | 279 OwnedString(const OwnedString& str) |
244 : OwnedString(static_cast<const String&>(str)) | 280 : OwnedString(static_cast<const String&>(str)) |
245 { | 281 { |
246 } | 282 } |
247 | 283 |
248 OwnedString(const value_type* str, size_type len) | 284 explicit OwnedString(const value_type* str, size_type len) |
249 : OwnedString(DependentString(str, len)) | 285 : OwnedString(DependentString(str, len)) |
250 { | 286 { |
251 } | 287 } |
252 | 288 |
253 OwnedString(OwnedString&& str) | 289 explicit OwnedString(OwnedString&& str) |
254 : OwnedString(str.length()) | 290 : OwnedString(0) |
255 { | |
256 mBuf = str.mBuf; | |
257 str.mBuf = nullptr; | |
258 str.mLen = READ_WRITE | 0; | |
259 } | |
260 | |
261 OwnedString(const char* source, size_type len) | |
262 : OwnedString(len) | |
263 { | |
264 for (size_type i = 0; i < len; i++) | |
265 mBuf[i] = source[i]; | |
266 } | |
267 | |
268 ~OwnedString() | |
269 { | |
270 if (mBuf) | |
271 delete[] mBuf; | |
272 } | |
273 | |
274 OwnedString& operator=(const String& str) | |
275 { | |
276 *this = std::move(OwnedString(str)); | |
277 return *this; | |
278 } | |
279 | |
280 OwnedString& operator=(const OwnedString& str) | |
281 { | |
282 *this = std::move(OwnedString(str)); | |
283 return *this; | |
284 } | |
285 | |
286 OwnedString& operator=(OwnedString&& str) | |
287 { | 291 { |
288 mBuf = str.mBuf; | 292 mBuf = str.mBuf; |
289 mLen = str.mLen; | 293 mLen = str.mLen; |
290 str.mBuf = nullptr; | 294 str.mBuf = nullptr; |
291 str.mLen = READ_WRITE | 0; | 295 str.mLen = READ_WRITE | 0; |
296 } | |
297 | |
298 ~OwnedString() | |
299 { | |
300 if (mBuf) | |
301 delete[] mBuf; | |
302 } | |
303 | |
304 OwnedString& operator=(const String& str) | |
305 { | |
306 *this = std::move(OwnedString(str)); | |
307 return *this; | |
308 } | |
309 | |
310 OwnedString& operator=(const OwnedString& str) | |
311 { | |
312 *this = std::move(OwnedString(str)); | |
313 return *this; | |
314 } | |
315 | |
316 OwnedString& operator=(OwnedString&& str) | |
317 { | |
318 std::swap(mBuf, str.mBuf); | |
319 std::swap(mLen, str.mLen); | |
292 return *this; | 320 return *this; |
293 } | 321 } |
294 | 322 |
295 void append(const value_type* source, size_type sourceLen) | 323 void append(const value_type* source, size_type sourceLen) |
296 { | 324 { |
297 if (!sourceLen) | 325 if (!sourceLen) |
sergei
2016/02/17 12:54:50
it would be also good to check that source is not
Wladimir Palant
2016/02/18 16:06:50
That would be a bug in the caller - meaning that w
sergei
2016/02/22 12:45:39
Good.
| |
298 return; | 326 return; |
299 | 327 |
328 assert(source, u"Null buffer passed to OwnedString.append()"_str); | |
300 size_t oldLength = length(); | 329 size_t oldLength = length(); |
301 resize(oldLength + sourceLen, true); | 330 grow(sourceLen); |
302 std::memcpy(mBuf + oldLength, source, sizeof(value_type) * sourceLen); | 331 std::memcpy(mBuf + oldLength, source, sizeof(value_type) * sourceLen); |
303 } | 332 } |
304 | 333 |
305 void append(const String& str) | 334 void append(const String& str) |
306 { | 335 { |
307 append(str.mBuf, str.length()); | 336 append(str.mBuf, str.length()); |
308 } | 337 } |
309 | 338 |
310 void append(value_type c) | 339 void append(value_type c) |
311 { | 340 { |
312 append(&c, 1); | 341 append(&c, 1); |
313 } | 342 } |
314 }; | 343 }; |
315 | |
316 inline DependentString operator "" _str(const String::value_type* str, | |
317 String::size_type len) | |
318 { | |
319 return DependentString(str, len); | |
320 } | |
321 | |
322 inline void String_assert_readonly(bool readOnly) | |
323 { | |
324 assert(!readOnly, u"Writing access to a read-only string"_str); | |
325 } | |
LEFT | RIGHT |