Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Delta Between Two Patch Sets: compiled/String.h

Issue 29333474: Issue 4125 - [emscripten] Convert filter classes to C++ (Closed)
Left Patch Set: Rebased, addressed comments, changed StringMap::find() return value Created Feb. 18, 2016, 4:02 p.m.
Right Patch Set: Addressed comments from Patch Set 28 Created March 21, 2017, 10:04 a.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
Left: Side by side diff | Download
Right: Side by side diff | Download
« no previous file with change/comment | « compiled/RegExpFilter.cpp ('k') | compiled/StringMap.h » ('j') | no next file with change/comment »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
LEFTRIGHT
1 #pragma once 1 #pragma once
2 2
3 #include <cstddef> 3 #include <cstddef>
4 #include <cstring> 4 #include <cstring>
5 #include <algorithm> 5 #include <algorithm>
6
7 #include <emscripten.h>
6 8
7 #include "debug.h" 9 #include "debug.h"
8 10
9 inline void String_assert_readonly(bool readOnly); 11 inline void String_assert_readonly(bool readOnly);
10 12
11 class String 13 class String
12 { 14 {
13 friend class DependentString; 15 friend class DependentString;
14 friend class OwnedString; 16 friend class OwnedString;
15 17
16 public: 18 public:
17 typedef char16_t value_type; 19 typedef char16_t value_type;
18 typedef size_t size_type; 20 typedef size_t size_type;
19 21
20 // Type flags, stored in the top 2 bits of the mLen member 22 // Type flags, stored in the top 2 bits of the mLen member
21 static constexpr size_type INVALID = 0xC0000000; 23 static constexpr size_type INVALID = 0xC0000000;
22 static constexpr size_type DELETED = 0x80000000; 24 static constexpr size_type DELETED = 0x80000000;
23 static constexpr size_type READ_ONLY = 0x40000000; 25 static constexpr size_type READ_ONLY = 0x40000000;
24 static constexpr size_type READ_WRITE = 0x00000000; 26 static constexpr size_type READ_WRITE = 0x00000000;
25 27
26 static constexpr size_type FLAGS_MASK = 0xC0000000; 28 static constexpr size_type FLAGS_MASK = 0xC0000000;
27 static constexpr size_type LENGTH_MASK = 0x3FFFFFFF; 29 static constexpr size_type LENGTH_MASK = 0x3FFFFFFF;
28 30
29 static constexpr size_type npos = -1; 31 static constexpr size_type npos = -1;
30 32
31 protected: 33 protected:
32 value_type* mBuf; 34 value_type* mBuf;
33 size_type mLen; 35 size_type mLen;
34 36
35 String(value_type* buf, size_type len, size_type flags) 37 explicit String(value_type* buf, size_type len, size_type flags)
36 : mBuf(buf), mLen((len & LENGTH_MASK) | flags) 38 : mBuf(buf), mLen((len & LENGTH_MASK) | flags)
37 { 39 {
38 } 40 }
sergei 2016/02/22 12:46:01 What about assert(mLen > 0 || mBuf == nullptr)?
Wladimir Palant 2016/02/23 12:37:32 Somewhat problematic since asserts take string lit
39 41
40 ~String() 42 ~String()
41 { 43 {
42 } 44 }
43 45
44 void reset(value_type* buf, size_type len, size_type flags) 46 void reset(value_type* buf, size_type len, size_type flags)
45 { 47 {
46 mBuf = buf; 48 mBuf = buf;
47 mLen = (len & LENGTH_MASK) | flags; 49 mLen = (len & LENGTH_MASK) | flags;
48 } 50 }
(...skipping 47 matching lines...) Expand 10 before | Expand all | Expand 10 after
96 size_type find(value_type c, size_type pos = 0) const 98 size_type find(value_type c, size_type pos = 0) const
97 { 99 {
98 for (size_type i = pos; i < length(); ++i) 100 for (size_type i = pos; i < length(); ++i)
99 if (mBuf[i] == c) 101 if (mBuf[i] == c)
100 return i; 102 return i;
101 return npos; 103 return npos;
102 } 104 }
103 105
104 size_type find(const String& str, size_type pos = 0) const 106 size_type find(const String& str, size_type pos = 0) const
105 { 107 {
106 if (pos + str.length() > length()) 108 if (pos > LENGTH_MASK || pos + str.length() > length())
107 return npos; 109 return npos;
108 110
109 if (!str.length()) 111 if (!str.length())
110 return pos; 112 return pos;
111 113
112 for (; pos + str.length() <= length(); ++pos) 114 for (; pos + str.length() <= length(); ++pos)
113 { 115 {
114 if (mBuf[pos] == str[0] && 116 if (mBuf[pos] == str[0] &&
115 std::memcmp(mBuf + pos, str.mBuf, sizeof(value_type) * str.length()) = = 0) 117 std::memcmp(mBuf + pos, str.mBuf, sizeof(value_type) * str.length()) = = 0)
116 { 118 {
(...skipping 10 matching lines...) Expand all
127 return npos; 129 return npos;
128 130
129 if (pos >= length()) 131 if (pos >= length())
130 pos = length() - 1; 132 pos = length() - 1;
131 133
132 for (int i = pos; i >= 0; --i) 134 for (int i = pos; i >= 0; --i)
133 if (mBuf[i] == c) 135 if (mBuf[i] == c)
134 return i; 136 return i;
135 return npos; 137 return npos;
136 } 138 }
139
140 bool is_invalid() const
141 {
142 return (mLen & FLAGS_MASK) == INVALID;
143 }
144
145 bool is_deleted() const
146 {
147 return (mLen & FLAGS_MASK) == DELETED;
148 }
149
150 void toLower()
151 {
152 size_type len = length();
153 for (size_type i = 0; i < len; ++i)
154 {
155 value_type currChar = mBuf[i];
156
157 // This should be more efficient with a lookup table but I couldn't measur e
158 // any performance difference.
159 if (currChar >= u'A' && currChar <= u'Z')
160 mBuf[i] = currChar + u'a' - u'A';
161 else if (currChar >= 128)
162 {
163 // It seems that calling JS is the easiest solution for lowercasing
164 // Unicode characters.
165 mBuf[i] = EM_ASM_INT({
166 return String.fromCharCode($0).toLowerCase().charCodeAt(0);
167 }, currChar);
168 }
169 }
170 }
137 }; 171 };
138 172
139 class DependentString : public String 173 class DependentString : public String
140 { 174 {
141 public: 175 public:
142 DependentString() 176 explicit DependentString()
143 : String(nullptr, 0, INVALID) 177 : String(nullptr, 0, INVALID)
144 { 178 {
145 } 179 }
146 180
147 DependentString(value_type* buf, size_type len) 181 explicit DependentString(value_type* buf, size_type len)
148 : String(buf, len, READ_WRITE) 182 : String(buf, len, READ_WRITE)
149 { 183 {
150 } 184 }
151 185
152 DependentString(const value_type* buf, size_type len) 186 explicit DependentString(const value_type* buf, size_type len)
153 : String(const_cast<value_type*>(buf), len, READ_ONLY) 187 : String(const_cast<value_type*>(buf), len, READ_ONLY)
154 { 188 {
155 } 189 }
156 190
157 DependentString(String& str, size_type pos = 0, size_type len = npos) 191 explicit DependentString(String& str, size_type pos = 0, size_type len = npos)
158 : String( 192 : String(
159 str.mBuf + std::min(pos, str.length()), 193 str.mBuf + std::min(pos, str.length()),
160 std::min(len, str.length() - std::min(pos, str.length())), 194 std::min(len, str.length() - std::min(pos, str.length())),
161 READ_WRITE 195 str.is_readOnly() ? READ_ONLY : READ_WRITE
sergei 2016/02/22 12:45:58 BTW, `READ_WRITE` here should be str.is_readOnly(
162 ) 196 )
163 { 197 {
164 } 198 }
165 199
166 DependentString(const String& str, size_type pos = 0, size_type len = npos) 200 explicit DependentString(const String& str, size_type pos = 0,
201 size_type len = npos)
167 : String( 202 : String(
168 str.mBuf + std::min(pos, str.length()), 203 str.mBuf + std::min(pos, str.length()),
169 std::min(len, str.length() - std::min(pos, str.length())), 204 std::min(len, str.length() - std::min(pos, str.length())),
170 READ_ONLY 205 READ_ONLY
171 ) 206 )
172 { 207 {
173 } 208 }
174 209
175 void reset(value_type* buf, size_type len) 210 void reset(value_type* buf, size_type len)
176 { 211 {
177 *this = DependentString(buf, len); 212 *this = DependentString(buf, len);
178 } 213 }
179 214
180 void reset(const value_type* buf, size_type len) 215 void reset(const value_type* buf, size_type len)
181 { 216 {
182 *this = DependentString(buf, len); 217 *this = DependentString(buf, len);
183 } 218 }
184 219
185 void reset(String& str, size_type pos = 0, size_type len = npos) 220 void reset(String& str, size_type pos = 0, size_type len = npos)
186 { 221 {
187 *this = DependentString(str, pos, len); 222 *this = DependentString(str, pos, len);
188 } 223 }
189 224
190 void reset(const String& str, size_type pos = 0, size_type len = npos) 225 void reset(const String& str, size_type pos = 0, size_type len = npos)
191 { 226 {
192 *this = DependentString(str, pos, len); 227 *this = DependentString(str, pos, len);
193 } 228 }
194 229
195 bool is_invalid() const 230 void erase()
196 { 231 {
197 return (mLen & FLAGS_MASK) == INVALID; 232 *this = DependentString();
198 } 233 mLen = DELETED;
199
200 bool is_deleted() const
201 {
202 return (mLen & FLAGS_MASK) == DELETED;
203 } 234 }
204 }; 235 };
205 236
206 inline DependentString operator "" _str(const String::value_type* str, 237 inline DependentString operator "" _str(const String::value_type* str,
207 String::size_type len) 238 String::size_type len)
208 { 239 {
209 return DependentString(str, len); 240 return DependentString(str, len);
210 } 241 }
211 242
212 inline void String_assert_readonly(bool readOnly) 243 inline void String_assert_readonly(bool readOnly)
213 { 244 {
214 assert(!readOnly, u"Writing access to a read-only string"_str); 245 assert(!readOnly, u"Writing access to a read-only string"_str);
215 } 246 }
216 247
217 class OwnedString : public String 248 class OwnedString : public String
218 { 249 {
219 private: 250 private:
220 value_type* allocate(size_type len) 251 void grow(size_type additionalSize)
252 {
253 OwnedString newValue(length() + additionalSize);
254 if (length() > 0)
255 std::memcpy(newValue.mBuf, mBuf, sizeof(value_type) * length());
256 *this = std::move(newValue);
257 }
258
259 public:
260 explicit OwnedString(size_type len = 0)
261 : String(nullptr, len, READ_WRITE)
221 { 262 {
222 if (len) 263 if (len)
223 return new value_type[len]; 264 {
265 mBuf = new value_type[length()];
266 annotate_address(mBuf, "String");
267 }
224 else 268 else
225 return nullptr; 269 mBuf = nullptr;
226 } 270 }
227 271
228 void grow(size_type additionalSize) 272 explicit OwnedString(const String& str)
229 {
230 size_type oldLength = length();
231 size_type newLength = oldLength + additionalSize;
232 value_type* oldBuffer = mBuf;
233
234 reset(nullptr, newLength, READ_WRITE);
235 newLength = length();
236 mBuf = allocate(newLength);
237 annotate_address(mBuf, "String");
238
239 if (oldLength)
240 std::memcpy(mBuf, oldBuffer, sizeof(value_type) * oldLength);
241 if (oldBuffer)
242 delete[] oldBuffer;
243 }
244
245 public:
246 OwnedString(size_type len = 0)
247 : String(nullptr, len, READ_WRITE)
248 {
249 mBuf = allocate(length());
250 annotate_address(mBuf, "String");
251 }
252
253 OwnedString(const String& str)
254 : OwnedString(str.length()) 273 : OwnedString(str.length())
255 { 274 {
256 std::memcpy(mBuf, str.mBuf, sizeof(value_type) * length()); 275 if (length())
276 std::memcpy(mBuf, str.mBuf, sizeof(value_type) * length());
257 } 277 }
258 278
259 OwnedString(const OwnedString& str) 279 OwnedString(const OwnedString& str)
260 : OwnedString(static_cast<const String&>(str)) 280 : OwnedString(static_cast<const String&>(str))
261 { 281 {
262 } 282 }
263 283
264 OwnedString(const value_type* str, size_type len) 284 explicit OwnedString(const value_type* str, size_type len)
265 : OwnedString(DependentString(str, len)) 285 : OwnedString(DependentString(str, len))
266 { 286 {
267 } 287 }
268 288
269 OwnedString(OwnedString&& str) 289 explicit OwnedString(OwnedString&& str)
270 : OwnedString(str.length()) 290 : OwnedString(0)
271 {
272 mBuf = str.mBuf;
273 str.mBuf = nullptr;
274 str.mLen = READ_WRITE | 0;
275 }
sergei 2016/02/22 12:45:59 I think here is a memory leak. `OwnedString(str.le
Wladimir Palant 2016/02/23 12:37:33 Done.
276
277 OwnedString(const char* source, size_type len)
sergei 2016/02/22 12:46:00 I'm not sure whether we need this method because i
Wladimir Palant 2016/02/23 12:37:30 Yes, it's somewhat of a hack - removed.
278 : OwnedString(len)
279 {
280 for (size_type i = 0; i < len; i++)
281 mBuf[i] = source[i];
282 }
283
284 ~OwnedString()
285 {
286 if (mBuf)
287 delete[] mBuf;
288 }
289
290 OwnedString& operator=(const String& str)
291 {
292 *this = std::move(OwnedString(str));
293 return *this;
294 }
295
296 OwnedString& operator=(const OwnedString& str)
297 {
298 *this = std::move(OwnedString(str));
299 return *this;
300 }
301
302 OwnedString& operator=(OwnedString&& str)
303 { 291 {
304 mBuf = str.mBuf; 292 mBuf = str.mBuf;
305 mLen = str.mLen; 293 mLen = str.mLen;
306 str.mBuf = nullptr; 294 str.mBuf = nullptr;
307 str.mLen = READ_WRITE | 0; 295 str.mLen = READ_WRITE | 0;
296 }
297
298 ~OwnedString()
299 {
300 if (mBuf)
301 delete[] mBuf;
302 }
303
304 OwnedString& operator=(const String& str)
305 {
306 *this = std::move(OwnedString(str));
307 return *this;
308 }
309
310 OwnedString& operator=(const OwnedString& str)
311 {
312 *this = std::move(OwnedString(str));
313 return *this;
314 }
315
316 OwnedString& operator=(OwnedString&& str)
317 {
318 std::swap(mBuf, str.mBuf);
319 std::swap(mLen, str.mLen);
308 return *this; 320 return *this;
309 } 321 }
310 322
311 void append(const value_type* source, size_type sourceLen) 323 void append(const value_type* source, size_type sourceLen)
312 { 324 {
313 if (!sourceLen) 325 if (!sourceLen)
314 return; 326 return;
315 327
316 assert(source, u"Null buffer passed to OwnedString.append()"_str); 328 assert(source, u"Null buffer passed to OwnedString.append()"_str);
317 size_t oldLength = length(); 329 size_t oldLength = length();
318 grow(sourceLen); 330 grow(sourceLen);
319 std::memcpy(mBuf + oldLength, source, sizeof(value_type) * sourceLen); 331 std::memcpy(mBuf + oldLength, source, sizeof(value_type) * sourceLen);
320 } 332 }
321 333
322 void append(const String& str) 334 void append(const String& str)
323 { 335 {
324 append(str.mBuf, str.length()); 336 append(str.mBuf, str.length());
325 } 337 }
326 338
327 void append(value_type c) 339 void append(value_type c)
328 { 340 {
329 append(&c, 1); 341 append(&c, 1);
330 } 342 }
331 }; 343 };
LEFTRIGHT

Powered by Google App Engine
This is Rietveld