Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Delta Between Two Patch Sets: compiled/String.h

Issue 29333474: Issue 4125 - [emscripten] Convert filter classes to C++ (Closed)
Left Patch Set: Fixed annotation for hash buffers Created Jan. 28, 2016, 5:07 p.m.
Right Patch Set: Addressed comments from Patch Set 28 Created March 21, 2017, 10:04 a.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
Left: Side by side diff | Download
Right: Side by side diff | Download
« no previous file with change/comment | « compiled/RegExpFilter.cpp ('k') | compiled/StringMap.h » ('j') | no next file with change/comment »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
LEFTRIGHT
1 #ifndef ADBLOCK_PLUS_STRING_H 1 #pragma once
2 #define ADBLOCK_PLUS_STRING_H
3 2
4 #include <cstddef> 3 #include <cstddef>
5 #include <cstring> 4 #include <cstring>
6 #include <algorithm> 5 #include <algorithm>
7 6
7 #include <emscripten.h>
8
8 #include "debug.h" 9 #include "debug.h"
9 10
11 inline void String_assert_readonly(bool readOnly);
12
10 class String 13 class String
11 { 14 {
15 friend class DependentString;
16 friend class OwnedString;
17
12 public: 18 public:
13 typedef char16_t value_type; 19 typedef char16_t value_type;
14 typedef size_t size_type; 20 typedef size_t size_type;
15 21
16 // Type flags, stored in the top 2 bits of the mLen member 22 // Type flags, stored in the top 2 bits of the mLen member
17 static constexpr size_type OWNBUFFER = 0xC0000000; 23 static constexpr size_type INVALID = 0xC0000000;
18 static constexpr size_type INVALID = 0x80000000; 24 static constexpr size_type DELETED = 0x80000000;
19 static constexpr size_type DELETED = 0x40000000; 25 static constexpr size_type READ_ONLY = 0x40000000;
20 static constexpr size_type DEPENDENT = 0x00000000; 26 static constexpr size_type READ_WRITE = 0x00000000;
27
21 static constexpr size_type FLAGS_MASK = 0xC0000000; 28 static constexpr size_type FLAGS_MASK = 0xC0000000;
22 static constexpr size_type LENGTH_MASK = 0x3FFFFFFF; 29 static constexpr size_type LENGTH_MASK = 0x3FFFFFFF;
23 30
24 static constexpr size_type npos = -1; 31 static constexpr size_type npos = -1;
25 32
26 private: 33 protected:
27 value_type* mBuf; 34 value_type* mBuf;
28 size_type mLen; 35 size_type mLen;
29 36
30 value_type* allocate(size_type len) 37 explicit String(value_type* buf, size_type len, size_type flags)
31 { 38 : mBuf(buf), mLen((len & LENGTH_MASK) | flags)
32 if (len) 39 {
33 return new value_type[len]; 40 }
34 else 41
35 return nullptr; 42 ~String()
36 } 43 {
37 44 }
38 void resize(size_type newLength, bool copy) 45
39 { 46 void reset(value_type* buf, size_type len, size_type flags)
40 bool owningOldBuffer = owns_buffer(); 47 {
41 size_type oldLength = length(); 48 mBuf = buf;
42 value_type* oldBuffer = mBuf; 49 mLen = (len & LENGTH_MASK) | flags;
43
44 newLength &= LENGTH_MASK;
45 mBuf = allocate(newLength);
46 annotate_address(mBuf, "String");
47 mLen = OWNBUFFER | newLength;
48
49 if (copy && oldLength)
50 memcpy(mBuf, oldBuffer, sizeof(value_type) * std::min(oldLength, newLength ));
51 if (owningOldBuffer)
52 delete[] oldBuffer;
53 } 50 }
54 51
55 public: 52 public:
56 String() : mBuf(nullptr), mLen(INVALID) {}
57
58 String(size_type len)
59 : mBuf(allocate(len & LENGTH_MASK)), mLen(OWNBUFFER | (len & LENGTH_MASK))
60 {
61 annotate_address(mBuf, "String");
62 }
63
64 String(value_type* buf, size_type len)
65 : mBuf(buf), mLen(DEPENDENT | (buf ? len & LENGTH_MASK: 0))
66 {
67 }
68
69 String(const String& str, size_type pos = 0, size_type len = npos)
70 : mBuf(str.mBuf + std::min(pos, str.length())),
71 mLen(DEPENDENT | std::min(len, str.length() - (mBuf - str.mBuf)))
72 {
73 }
74
75 String(String&& str)
76 {
77 *this = std::move(str);
78 }
79
80 String(const char* source, size_type len)
81 : String(len)
82 {
83 for (size_type i = 0; i < len; i++)
84 mBuf[i] = source[i];
85 }
86
87 void operator=(const String& str)
88 {
89 mBuf = str.mBuf;
90 mLen = DEPENDENT | str.length();
91 }
92
93 void operator=(String& str)
94 {
95 reset(str);
96 }
97
98 String& operator=(String&& str)
99 {
100 mBuf = str.mBuf;
101 mLen = str.mLen;
102 str.mBuf = nullptr;
103 str.mLen = INVALID;
104 ensure_own_buffer();
105 return *this;
106 }
107
108 ~String()
109 {
110 if (owns_buffer())
111 delete[] mBuf;
112 }
113
114 void reset(value_type* buf, size_type len)
115 {
116 mBuf = buf;
117 mLen = (DEPENDENT | (buf ? len & LENGTH_MASK: 0));
118 }
119
120 void reset(const String& str, size_type pos = 0, size_type len = npos)
121 {
122 pos = std::min(pos, str.length());
123 len = std::min(len, str.length() - pos);
124 reset(str.mBuf + pos, len);
125 }
126
127 size_type length() const 53 size_type length() const
128 { 54 {
129 return mLen & LENGTH_MASK; 55 return mLen & LENGTH_MASK;
130 } 56 }
131 57
132 bool empty() const 58 bool empty() const
133 { 59 {
134 return !(mLen & LENGTH_MASK); 60 return !(mLen & LENGTH_MASK);
135 } 61 }
136 62
137 const value_type* data() const 63 const value_type* data() const
138 { 64 {
139 return mBuf; 65 return mBuf;
140 } 66 }
141 67
142 value_type* data() 68 value_type* data()
143 { 69 {
70 String_assert_readonly(is_readOnly());
144 return mBuf; 71 return mBuf;
145 } 72 }
146 73
147 const value_type& operator[](size_type pos) const 74 const value_type& operator[](size_type pos) const
148 { 75 {
149 return mBuf[pos]; 76 return mBuf[pos];
150 } 77 }
151 78
152 value_type& operator[](size_type pos) 79 value_type& operator[](size_type pos)
153 { 80 {
81 String_assert_readonly(is_readOnly());
154 return mBuf[pos]; 82 return mBuf[pos];
155 } 83 }
156 84
85 bool is_readOnly() const
86 {
87 return (mLen & FLAGS_MASK) != READ_WRITE;
88 }
89
157 bool equals(const String& other) const 90 bool equals(const String& other) const
158 { 91 {
159 if (mLen != other.mLen) 92 if (length() != other.length())
160 return false; 93 return false;
161 94
162 return memcmp(mBuf, other.mBuf, sizeof(value_type) * length()) == 0; 95 return std::memcmp(mBuf, other.mBuf, sizeof(value_type) * length()) == 0;
163 } 96 }
164 97
165 size_type find(value_type c, size_type pos = 0) const 98 size_type find(value_type c, size_type pos = 0) const
166 { 99 {
167 for (size_type i = pos; i < length(); ++i) 100 for (size_type i = pos; i < length(); ++i)
168 if (mBuf[i] == c) 101 if (mBuf[i] == c)
169 return i; 102 return i;
170 return npos; 103 return npos;
171 } 104 }
172 105
173 size_type find(const String& str, size_type pos = 0) const 106 size_type find(const String& str, size_type pos = 0) const
174 { 107 {
108 if (pos > LENGTH_MASK || pos + str.length() > length())
109 return npos;
110
175 if (!str.length()) 111 if (!str.length())
176 return pos; 112 return pos;
177 113
178 if (length() - pos < str.length()) 114 for (; pos + str.length() <= length(); ++pos)
179 return npos;
180
181 for (; pos < length() - str.length(); ++pos)
182 { 115 {
183 if (mBuf[pos] == str[0] && 116 if (mBuf[pos] == str[0] &&
184 memcmp(mBuf + pos, str.mBuf, sizeof(value_type) * str.length()) == 0) 117 std::memcmp(mBuf + pos, str.mBuf, sizeof(value_type) * str.length()) = = 0)
185 { 118 {
186 return pos; 119 return pos;
187 } 120 }
188 } 121 }
189 122
190 return npos; 123 return npos;
191 } 124 }
192 125
193 size_type rfind(value_type c, size_type pos = npos) const 126 size_type rfind(value_type c, size_type pos = npos) const
194 { 127 {
195 if (length() == 0) 128 if (length() == 0)
196 return npos; 129 return npos;
197 130
198 if (pos == npos) 131 if (pos >= length())
199 pos = length() - 1; 132 pos = length() - 1;
200 133
201 for (int i = pos; i >= 0; --i) 134 for (int i = pos; i >= 0; --i)
202 if (mBuf[i] == c) 135 if (mBuf[i] == c)
203 return i; 136 return i;
204 return npos; 137 return npos;
205 } 138 }
206 139
140 bool is_invalid() const
141 {
142 return (mLen & FLAGS_MASK) == INVALID;
143 }
144
145 bool is_deleted() const
146 {
147 return (mLen & FLAGS_MASK) == DELETED;
148 }
149
150 void toLower()
151 {
152 size_type len = length();
153 for (size_type i = 0; i < len; ++i)
154 {
155 value_type currChar = mBuf[i];
156
157 // This should be more efficient with a lookup table but I couldn't measur e
158 // any performance difference.
159 if (currChar >= u'A' && currChar <= u'Z')
160 mBuf[i] = currChar + u'a' - u'A';
161 else if (currChar >= 128)
162 {
163 // It seems that calling JS is the easiest solution for lowercasing
164 // Unicode characters.
165 mBuf[i] = EM_ASM_INT({
166 return String.fromCharCode($0).toLowerCase().charCodeAt(0);
167 }, currChar);
168 }
169 }
170 }
171 };
172
173 class DependentString : public String
174 {
175 public:
176 explicit DependentString()
177 : String(nullptr, 0, INVALID)
178 {
179 }
180
181 explicit DependentString(value_type* buf, size_type len)
182 : String(buf, len, READ_WRITE)
183 {
184 }
185
186 explicit DependentString(const value_type* buf, size_type len)
187 : String(const_cast<value_type*>(buf), len, READ_ONLY)
188 {
189 }
190
191 explicit DependentString(String& str, size_type pos = 0, size_type len = npos)
192 : String(
193 str.mBuf + std::min(pos, str.length()),
194 std::min(len, str.length() - std::min(pos, str.length())),
195 str.is_readOnly() ? READ_ONLY : READ_WRITE
196 )
197 {
198 }
199
200 explicit DependentString(const String& str, size_type pos = 0,
201 size_type len = npos)
202 : String(
203 str.mBuf + std::min(pos, str.length()),
204 std::min(len, str.length() - std::min(pos, str.length())),
205 READ_ONLY
206 )
207 {
208 }
209
210 void reset(value_type* buf, size_type len)
211 {
212 *this = DependentString(buf, len);
213 }
214
215 void reset(const value_type* buf, size_type len)
216 {
217 *this = DependentString(buf, len);
218 }
219
220 void reset(String& str, size_type pos = 0, size_type len = npos)
221 {
222 *this = DependentString(str, pos, len);
223 }
224
225 void reset(const String& str, size_type pos = 0, size_type len = npos)
226 {
227 *this = DependentString(str, pos, len);
228 }
229
230 void erase()
231 {
232 *this = DependentString();
233 mLen = DELETED;
234 }
235 };
236
237 inline DependentString operator "" _str(const String::value_type* str,
238 String::size_type len)
239 {
240 return DependentString(str, len);
241 }
242
243 inline void String_assert_readonly(bool readOnly)
244 {
245 assert(!readOnly, u"Writing access to a read-only string"_str);
246 }
247
248 class OwnedString : public String
249 {
250 private:
251 void grow(size_type additionalSize)
252 {
253 OwnedString newValue(length() + additionalSize);
254 if (length() > 0)
255 std::memcpy(newValue.mBuf, mBuf, sizeof(value_type) * length());
256 *this = std::move(newValue);
257 }
258
259 public:
260 explicit OwnedString(size_type len = 0)
261 : String(nullptr, len, READ_WRITE)
262 {
263 if (len)
264 {
265 mBuf = new value_type[length()];
266 annotate_address(mBuf, "String");
267 }
268 else
269 mBuf = nullptr;
270 }
271
272 explicit OwnedString(const String& str)
273 : OwnedString(str.length())
274 {
275 if (length())
276 std::memcpy(mBuf, str.mBuf, sizeof(value_type) * length());
277 }
278
279 OwnedString(const OwnedString& str)
280 : OwnedString(static_cast<const String&>(str))
281 {
282 }
283
284 explicit OwnedString(const value_type* str, size_type len)
285 : OwnedString(DependentString(str, len))
286 {
287 }
288
289 explicit OwnedString(OwnedString&& str)
290 : OwnedString(0)
291 {
292 mBuf = str.mBuf;
293 mLen = str.mLen;
294 str.mBuf = nullptr;
295 str.mLen = READ_WRITE | 0;
296 }
297
298 ~OwnedString()
299 {
300 if (mBuf)
301 delete[] mBuf;
302 }
303
304 OwnedString& operator=(const String& str)
305 {
306 *this = std::move(OwnedString(str));
307 return *this;
308 }
309
310 OwnedString& operator=(const OwnedString& str)
311 {
312 *this = std::move(OwnedString(str));
313 return *this;
314 }
315
316 OwnedString& operator=(OwnedString&& str)
317 {
318 std::swap(mBuf, str.mBuf);
319 std::swap(mLen, str.mLen);
320 return *this;
321 }
322
207 void append(const value_type* source, size_type sourceLen) 323 void append(const value_type* source, size_type sourceLen)
208 { 324 {
209 if (!sourceLen) 325 if (!sourceLen)
210 return; 326 return;
211 327
328 assert(source, u"Null buffer passed to OwnedString.append()"_str);
212 size_t oldLength = length(); 329 size_t oldLength = length();
213 resize(oldLength + sourceLen, true); 330 grow(sourceLen);
214 memcpy(mBuf + oldLength, source, sizeof(value_type) * sourceLen); 331 std::memcpy(mBuf + oldLength, source, sizeof(value_type) * sourceLen);
215 } 332 }
216 333
217 void append(const String& str) 334 void append(const String& str)
218 { 335 {
219 append(str.mBuf, str.length()); 336 append(str.mBuf, str.length());
220 } 337 }
221 338
222 void append(value_type c) 339 void append(value_type c)
223 { 340 {
224 append(&c, 1); 341 append(&c, 1);
225 } 342 }
226
227 bool owns_buffer() const
228 {
229 return mBuf && (mLen & FLAGS_MASK) == OWNBUFFER;
230 }
231
232 void ensure_own_buffer()
233 {
234 size_type len = length();
235 if (len && !owns_buffer())
236 resize(len, true);
237 }
238
239 bool is_dependent() const
240 {
241 return (mLen & FLAGS_MASK) == DEPENDENT;
242 }
243
244 bool is_invalid() const
245 {
246 return (mLen & FLAGS_MASK) == INVALID;
247 }
248
249 bool is_deleted() const
250 {
251 return (mLen & FLAGS_MASK) == DELETED;
252 }
253 }; 343 };
254
255 inline String operator "" _str(const String::value_type* str,
256 String::size_type len)
257 {
258 return String(const_cast<String::value_type*>(str), len);
259 }
260 #endif
LEFTRIGHT

Powered by Google App Engine
This is Rietveld