Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Delta Between Two Patch Sets: compiled/String.h

Issue 29333474: Issue 4125 - [emscripten] Convert filter classes to C++ (Closed)
Left Patch Set: Optimized hash lookup performance a bit Created Feb. 8, 2016, 7:11 p.m.
Right Patch Set: Addressed comments from Patch Set 28 Created March 21, 2017, 10:04 a.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
Left: Side by side diff | Download
Right: Side by side diff | Download
« no previous file with change/comment | « compiled/RegExpFilter.cpp ('k') | compiled/StringMap.h » ('j') | no next file with change/comment »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
LEFTRIGHT
1 #pragma once 1 #pragma once
2 2
3 #include <cstddef> 3 #include <cstddef>
4 #include <cstring> 4 #include <cstring>
5 #include <algorithm> 5 #include <algorithm>
6
7 #include <emscripten.h>
6 8
7 #include "debug.h" 9 #include "debug.h"
8 10
9 inline void String_assert_readonly(bool readOnly); 11 inline void String_assert_readonly(bool readOnly);
10 12
11 class String 13 class String
12 { 14 {
13 friend class DependentString; 15 friend class DependentString;
14 friend class OwnedString; 16 friend class OwnedString;
15 17
16 public: 18 public:
17 typedef char16_t value_type; 19 typedef char16_t value_type;
18 typedef size_t size_type; 20 typedef size_t size_type;
19 21
20 // Type flags, stored in the top 2 bits of the mLen member 22 // Type flags, stored in the top 2 bits of the mLen member
21 static constexpr size_type INVALID = 0xC0000000; 23 static constexpr size_type INVALID = 0xC0000000;
22 static constexpr size_type DELETED = 0x80000000; 24 static constexpr size_type DELETED = 0x80000000;
23 static constexpr size_type READ_ONLY = 0x40000000; 25 static constexpr size_type READ_ONLY = 0x40000000;
24 static constexpr size_type READ_WRITE = 0x00000000; 26 static constexpr size_type READ_WRITE = 0x00000000;
25 27
26 static constexpr size_type FLAGS_MASK = 0xC0000000; 28 static constexpr size_type FLAGS_MASK = 0xC0000000;
27 static constexpr size_type LENGTH_MASK = 0x3FFFFFFF; 29 static constexpr size_type LENGTH_MASK = 0x3FFFFFFF;
28 30
29 static constexpr size_type npos = -1; 31 static constexpr size_type npos = -1;
30 32
31 protected: 33 protected:
32 value_type* mBuf; 34 value_type* mBuf;
33 size_type mLen; 35 size_type mLen;
34 36
35 String(value_type* buf, size_type len, size_type flags) 37 explicit String(value_type* buf, size_type len, size_type flags)
36 : mBuf(buf), mLen((len & LENGTH_MASK) | flags) 38 : mBuf(buf), mLen((len & LENGTH_MASK) | flags)
39 {
40 }
41
42 ~String()
37 { 43 {
38 } 44 }
39 45
40 void reset(value_type* buf, size_type len, size_type flags) 46 void reset(value_type* buf, size_type len, size_type flags)
41 { 47 {
42 mBuf = buf; 48 mBuf = buf;
43 mLen = (len & LENGTH_MASK) | flags; 49 mLen = (len & LENGTH_MASK) | flags;
44 } 50 }
45 51
sergei 2016/02/17 12:54:53 What about adding protected destructor or virtual
Wladimir Palant 2016/02/18 16:06:53 Nice one, I thought that having a protected constr
46 public: 52 public:
47 size_type length() const 53 size_type length() const
48 { 54 {
49 return mLen & LENGTH_MASK; 55 return mLen & LENGTH_MASK;
50 } 56 }
51 57
52 bool empty() const 58 bool empty() const
53 { 59 {
54 return !(mLen & LENGTH_MASK); 60 return !(mLen & LENGTH_MASK);
55 } 61 }
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after
92 size_type find(value_type c, size_type pos = 0) const 98 size_type find(value_type c, size_type pos = 0) const
93 { 99 {
94 for (size_type i = pos; i < length(); ++i) 100 for (size_type i = pos; i < length(); ++i)
95 if (mBuf[i] == c) 101 if (mBuf[i] == c)
96 return i; 102 return i;
97 return npos; 103 return npos;
98 } 104 }
99 105
100 size_type find(const String& str, size_type pos = 0) const 106 size_type find(const String& str, size_type pos = 0) const
101 { 107 {
108 if (pos > LENGTH_MASK || pos + str.length() > length())
109 return npos;
110
102 if (!str.length()) 111 if (!str.length())
103 return pos; 112 return pos;
sergei 2016/02/17 12:54:39 Strictly speaking, if `pos > this->length()` then
Wladimir Palant 2016/02/18 16:06:58 Done.
104 113
105 if (length() - pos < str.length()) 114 for (; pos + str.length() <= length(); ++pos)
sergei 2016/02/17 12:54:42 We also should check whether `this->length()` is n
Wladimir Palant 2016/02/18 16:06:47 If length() is 0 then length() - pos will be at mo
sergei 2016/02/22 12:45:46 I wanted to have at the beginning of the method th
Wladimir Palant 2016/02/23 12:37:21 Yes, in our case an integer overflow is luckily im
sergei 2016/02/23 15:07:27 As a matter of form we still need to check that `p
Wladimir Palant 2016/02/23 21:34:54 True, done.
106 return npos;
107
108 for (; pos < length() - str.length(); ++pos)
109 { 115 {
110 if (mBuf[pos] == str[0] && 116 if (mBuf[pos] == str[0] &&
111 std::memcmp(mBuf + pos, str.mBuf, sizeof(value_type) * str.length()) = = 0) 117 std::memcmp(mBuf + pos, str.mBuf, sizeof(value_type) * str.length()) = = 0)
112 { 118 {
113 return pos; 119 return pos;
114 } 120 }
115 } 121 }
116 122
117 return npos; 123 return npos;
118 } 124 }
119 125
120 size_type rfind(value_type c, size_type pos = npos) const 126 size_type rfind(value_type c, size_type pos = npos) const
121 { 127 {
122 if (length() == 0) 128 if (length() == 0)
123 return npos; 129 return npos;
124 130
125 if (pos == npos) 131 if (pos >= length())
126 pos = length() - 1; 132 pos = length() - 1;
sergei 2016/02/17 12:54:54 It seems safer to use something like `pos = min(po
Wladimir Palant 2016/02/18 16:06:46 Done.
127 133
128 for (int i = pos; i >= 0; --i) 134 for (int i = pos; i >= 0; --i)
129 if (mBuf[i] == c) 135 if (mBuf[i] == c)
130 return i; 136 return i;
131 return npos; 137 return npos;
132 } 138 }
139
140 bool is_invalid() const
141 {
142 return (mLen & FLAGS_MASK) == INVALID;
143 }
144
145 bool is_deleted() const
146 {
147 return (mLen & FLAGS_MASK) == DELETED;
148 }
149
150 void toLower()
151 {
152 size_type len = length();
153 for (size_type i = 0; i < len; ++i)
154 {
155 value_type currChar = mBuf[i];
156
157 // This should be more efficient with a lookup table but I couldn't measur e
158 // any performance difference.
159 if (currChar >= u'A' && currChar <= u'Z')
160 mBuf[i] = currChar + u'a' - u'A';
161 else if (currChar >= 128)
162 {
163 // It seems that calling JS is the easiest solution for lowercasing
164 // Unicode characters.
165 mBuf[i] = EM_ASM_INT({
166 return String.fromCharCode($0).toLowerCase().charCodeAt(0);
167 }, currChar);
168 }
169 }
170 }
133 }; 171 };
134 172
135 class DependentString : public String 173 class DependentString : public String
136 { 174 {
137 public: 175 public:
138 DependentString() 176 explicit DependentString()
139 : String(nullptr, 0, INVALID) 177 : String(nullptr, 0, INVALID)
140 { 178 {
141 } 179 }
142 180
143 DependentString(value_type* buf, size_type len) 181 explicit DependentString(value_type* buf, size_type len)
144 : String(buf, len, READ_WRITE) 182 : String(buf, len, READ_WRITE)
145 { 183 {
146 } 184 }
147 185
148 DependentString(const value_type* buf, size_type len) 186 explicit DependentString(const value_type* buf, size_type len)
149 : String(const_cast<value_type*>(buf), len, READ_ONLY) 187 : String(const_cast<value_type*>(buf), len, READ_ONLY)
150 { 188 {
151 } 189 }
152 190
153 DependentString(String& str, size_type pos = 0, size_type len = npos) 191 explicit DependentString(String& str, size_type pos = 0, size_type len = npos)
sergei 2016/02/17 12:54:51 I would add `explicit` for this particular method.
Wladimir Palant 2016/02/18 16:06:51 Why? I think that passing OwnedString to a functio
sergei 2016/02/22 12:45:45 May be it's desired. It seems it's not actually us
Wladimir Palant 2016/02/23 12:37:26 It's being used in various getters, e.g. Filter::G
sergei 2016/02/23 15:07:26 Yes, the variant with const, but this constructor
Wladimir Palant 2016/02/23 21:34:54 bindings.h currently enforces the return value to
154 : String( 192 : String(
155 str.mBuf + std::min(pos, str.length()), 193 str.mBuf + std::min(pos, str.length()),
156 std::min(len, str.length() - std::min(pos, str.length())), 194 std::min(len, str.length() - std::min(pos, str.length())),
157 READ_WRITE 195 str.is_readOnly() ? READ_ONLY : READ_WRITE
158 ) 196 )
159 { 197 {
160 } 198 }
161 199
162 DependentString(const String& str, size_type pos = 0, size_type len = npos) 200 explicit DependentString(const String& str, size_type pos = 0,
201 size_type len = npos)
163 : String( 202 : String(
164 str.mBuf + std::min(pos, str.length()), 203 str.mBuf + std::min(pos, str.length()),
165 std::min(len, str.length() - std::min(pos, str.length())), 204 std::min(len, str.length() - std::min(pos, str.length())),
166 READ_ONLY 205 READ_ONLY
167 ) 206 )
168 { 207 {
169 } 208 }
170 209
171 void reset(value_type* buf, size_type len) 210 void reset(value_type* buf, size_type len)
172 { 211 {
173 *this = DependentString(buf, len); 212 *this = DependentString(buf, len);
174 } 213 }
175 214
176 void reset(const value_type* buf, size_type len) 215 void reset(const value_type* buf, size_type len)
177 { 216 {
178 *this = DependentString(buf, len); 217 *this = DependentString(buf, len);
179 } 218 }
180 219
181 void reset(String& str, size_type pos = 0, size_type len = npos) 220 void reset(String& str, size_type pos = 0, size_type len = npos)
182 { 221 {
183 *this = DependentString(str, pos, len); 222 *this = DependentString(str, pos, len);
184 } 223 }
185 224
186 void reset(const String& str, size_type pos = 0, size_type len = npos) 225 void reset(const String& str, size_type pos = 0, size_type len = npos)
187 { 226 {
188 *this = DependentString(str, pos, len); 227 *this = DependentString(str, pos, len);
189 } 228 }
190 229
191 bool is_invalid() const 230 void erase()
sergei 2016/02/17 12:54:52 what about moving it into base class String?
Wladimir Palant 2016/02/18 16:06:52 Given that it is only necessary for StringMap - wh
sergei 2016/02/22 12:45:49 Because flag values are defined in `String`, flag
Wladimir Palant 2016/02/23 12:37:24 Done.
192 { 231 {
193 return (mLen & FLAGS_MASK) == INVALID; 232 *this = DependentString();
194 } 233 mLen = DELETED;
195
196 bool is_deleted() const
197 {
198 return (mLen & FLAGS_MASK) == DELETED;
sergei 2016/02/17 12:54:49 How can it actually happen? It seems it is never s
Wladimir Palant 2016/02/18 16:06:55 No, it's not - yet. The reason is that StringMap c
sergei 2016/02/22 12:45:50 Acknowledged.
199 } 234 }
200 }; 235 };
201 236
237 inline DependentString operator "" _str(const String::value_type* str,
238 String::size_type len)
239 {
240 return DependentString(str, len);
241 }
242
243 inline void String_assert_readonly(bool readOnly)
244 {
245 assert(!readOnly, u"Writing access to a read-only string"_str);
246 }
247
202 class OwnedString : public String 248 class OwnedString : public String
203 { 249 {
204 private: 250 private:
205 value_type* allocate(size_type len) 251 void grow(size_type additionalSize)
252 {
253 OwnedString newValue(length() + additionalSize);
254 if (length() > 0)
255 std::memcpy(newValue.mBuf, mBuf, sizeof(value_type) * length());
256 *this = std::move(newValue);
257 }
258
259 public:
260 explicit OwnedString(size_type len = 0)
261 : String(nullptr, len, READ_WRITE)
206 { 262 {
207 if (len) 263 if (len)
208 return new value_type[len]; 264 {
265 mBuf = new value_type[length()];
266 annotate_address(mBuf, "String");
267 }
209 else 268 else
210 return nullptr; 269 mBuf = nullptr;
211 } 270 }
212 271
213 void resize(size_type newLength, bool copy) 272 explicit OwnedString(const String& str)
sergei 2016/02/17 12:54:43 I'm not sure about `copy` argument, it's called on
Wladimir Palant 2016/02/18 16:06:49 Indeed, not any more - removed that parameter.
214 {
215 size_type oldLength = length();
216 value_type* oldBuffer = mBuf;
217
218 reset(nullptr, newLength, READ_WRITE);
219 newLength = length();
220 mBuf = allocate(newLength);
221 annotate_address(mBuf, "String");
222
223 if (copy && oldLength)
224 std::memcpy(mBuf, oldBuffer, sizeof(value_type) * std::min(oldLength, newL ength));
sergei 2016/02/17 12:54:41 If either destination or source buffer is nullptr
Wladimir Palant 2016/02/18 16:06:57 oldLength is non-zero so oldBuffer cannot be nullp
sergei 2016/02/22 12:45:38 Acknowledged, sorry, overlooked.
225 if (oldBuffer)
226 delete[] oldBuffer;
227 }
228
229 public:
230 OwnedString(size_type len = 0)
231 : String(nullptr, len, READ_WRITE)
232 {
233 mBuf = allocate(length());
234 annotate_address(mBuf, "String");
235 }
236
237 OwnedString(const String& str)
238 : OwnedString(str.length()) 273 : OwnedString(str.length())
239 { 274 {
240 std::memcpy(mBuf, str.mBuf, sizeof(value_type) * length()); 275 if (length())
sergei 2016/02/17 12:54:40 Here both mBuf and str.mBuf can be nullptr.
Wladimir Palant 2016/02/18 16:06:54 Should be fine as length() will be 0 in that case?
sergei 2016/02/22 12:45:48 According to http://en.cppreference.com/w/cpp/stri
Wladimir Palant 2016/02/23 12:37:23 Done.
276 std::memcpy(mBuf, str.mBuf, sizeof(value_type) * length());
241 } 277 }
242 278
243 OwnedString(const OwnedString& str) 279 OwnedString(const OwnedString& str)
244 : OwnedString(static_cast<const String&>(str)) 280 : OwnedString(static_cast<const String&>(str))
245 { 281 {
246 } 282 }
247 283
248 OwnedString(const value_type* str, size_type len) 284 explicit OwnedString(const value_type* str, size_type len)
249 : OwnedString(DependentString(str, len)) 285 : OwnedString(DependentString(str, len))
250 { 286 {
251 } 287 }
252 288
253 OwnedString(OwnedString&& str) 289 explicit OwnedString(OwnedString&& str)
254 : OwnedString(str.length()) 290 : OwnedString(0)
255 {
256 mBuf = str.mBuf;
257 str.mBuf = nullptr;
258 str.mLen = READ_WRITE | 0;
259 }
260
261 OwnedString(const char* source, size_type len)
262 : OwnedString(len)
263 {
264 for (size_type i = 0; i < len; i++)
265 mBuf[i] = source[i];
266 }
267
268 ~OwnedString()
269 {
270 if (mBuf)
271 delete[] mBuf;
272 }
273
274 OwnedString& operator=(const String& str)
275 {
276 *this = std::move(OwnedString(str));
277 return *this;
278 }
279
280 OwnedString& operator=(const OwnedString& str)
281 {
282 *this = std::move(OwnedString(str));
283 return *this;
284 }
285
286 OwnedString& operator=(OwnedString&& str)
287 { 291 {
288 mBuf = str.mBuf; 292 mBuf = str.mBuf;
289 mLen = str.mLen; 293 mLen = str.mLen;
290 str.mBuf = nullptr; 294 str.mBuf = nullptr;
291 str.mLen = READ_WRITE | 0; 295 str.mLen = READ_WRITE | 0;
296 }
297
298 ~OwnedString()
299 {
300 if (mBuf)
301 delete[] mBuf;
302 }
303
304 OwnedString& operator=(const String& str)
305 {
306 *this = std::move(OwnedString(str));
307 return *this;
308 }
309
310 OwnedString& operator=(const OwnedString& str)
311 {
312 *this = std::move(OwnedString(str));
313 return *this;
314 }
315
316 OwnedString& operator=(OwnedString&& str)
317 {
318 std::swap(mBuf, str.mBuf);
319 std::swap(mLen, str.mLen);
292 return *this; 320 return *this;
293 } 321 }
294 322
295 void append(const value_type* source, size_type sourceLen) 323 void append(const value_type* source, size_type sourceLen)
296 { 324 {
297 if (!sourceLen) 325 if (!sourceLen)
sergei 2016/02/17 12:54:50 it would be also good to check that source is not
Wladimir Palant 2016/02/18 16:06:50 That would be a bug in the caller - meaning that w
sergei 2016/02/22 12:45:39 Good.
298 return; 326 return;
299 327
328 assert(source, u"Null buffer passed to OwnedString.append()"_str);
300 size_t oldLength = length(); 329 size_t oldLength = length();
301 resize(oldLength + sourceLen, true); 330 grow(sourceLen);
302 std::memcpy(mBuf + oldLength, source, sizeof(value_type) * sourceLen); 331 std::memcpy(mBuf + oldLength, source, sizeof(value_type) * sourceLen);
303 } 332 }
304 333
305 void append(const String& str) 334 void append(const String& str)
306 { 335 {
307 append(str.mBuf, str.length()); 336 append(str.mBuf, str.length());
308 } 337 }
309 338
310 void append(value_type c) 339 void append(value_type c)
311 { 340 {
312 append(&c, 1); 341 append(&c, 1);
313 } 342 }
314 }; 343 };
315
316 inline DependentString operator "" _str(const String::value_type* str,
317 String::size_type len)
318 {
319 return DependentString(str, len);
320 }
321
322 inline void String_assert_readonly(bool readOnly)
323 {
324 assert(!readOnly, u"Writing access to a read-only string"_str);
325 }
LEFTRIGHT

Powered by Google App Engine
This is Rietveld