Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Delta Between Two Patch Sets: compiled/String.h

Issue 29333474: Issue 4125 - [emscripten] Convert filter classes to C++ (Closed)
Left Patch Set: Merged filter parsing and normalization Created Feb. 4, 2016, 3:01 p.m.
Right Patch Set: Addressed comments from Patch Set 28 Created March 21, 2017, 10:04 a.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
Left: Side by side diff | Download
Right: Side by side diff | Download
« no previous file with change/comment | « compiled/RegExpFilter.cpp ('k') | compiled/StringMap.h » ('j') | no next file with change/comment »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
LEFTRIGHT
1 #ifndef ADBLOCK_PLUS_STRING_H 1 #pragma once
2 #define ADBLOCK_PLUS_STRING_H
3 2
4 #include <cstddef> 3 #include <cstddef>
5 #include <cstring> 4 #include <cstring>
6 #include <algorithm> 5 #include <algorithm>
7 6
7 #include <emscripten.h>
8
8 #include "debug.h" 9 #include "debug.h"
9 10
10 inline void String_assert_readonly(bool readOnly); 11 inline void String_assert_readonly(bool readOnly);
11 12
12 class String 13 class String
13 { 14 {
15 friend class DependentString;
16 friend class OwnedString;
17
14 public: 18 public:
15 typedef char16_t value_type; 19 typedef char16_t value_type;
16 typedef size_t size_type; 20 typedef size_t size_type;
17 21
18 // Type flags, stored in the top 2 bits of the mLen member 22 // Type flags, stored in the top 2 bits of the mLen member
19 static constexpr size_type OWNBUFFER = 0xC0000000; 23 static constexpr size_type INVALID = 0xC0000000;
20 static constexpr size_type INVALID = 0x80000000; 24 static constexpr size_type DELETED = 0x80000000;
21 static constexpr size_type DELETED = 0x40000000; 25 static constexpr size_type READ_ONLY = 0x40000000;
22 static constexpr size_type DEPENDENT = 0x00000000;
23
24 // Read-only flag (for debug asserts only)
25 static constexpr size_type READ_ONLY = 0x20000000;
26 static constexpr size_type READ_WRITE = 0x00000000; 26 static constexpr size_type READ_WRITE = 0x00000000;
27 27
28 static constexpr size_type FLAGS_MASK = 0xE0000000; 28 static constexpr size_type FLAGS_MASK = 0xC0000000;
29 static constexpr size_type LENGTH_MASK = 0x1FFFFFFF; 29 static constexpr size_type LENGTH_MASK = 0x3FFFFFFF;
30 30
31 static constexpr size_type npos = -1; 31 static constexpr size_type npos = -1;
32 32
33 private: 33 protected:
34 value_type* mBuf; 34 value_type* mBuf;
35 size_type mLen; 35 size_type mLen;
36 36
37 value_type* allocate(size_type len) 37 explicit String(value_type* buf, size_type len, size_type flags)
38 { 38 : mBuf(buf), mLen((len & LENGTH_MASK) | flags)
39 if (len) 39 {
40 return new value_type[len]; 40 }
41 else 41
42 return nullptr; 42 ~String()
43 } 43 {
44 44 }
45 void resize(size_type newLength, bool copy) 45
46 { 46 void reset(value_type* buf, size_type len, size_type flags)
47 bool owningOldBuffer = owns_buffer(); 47 {
48 size_type oldLength = length(); 48 mBuf = buf;
49 value_type* oldBuffer = mBuf; 49 mLen = (len & LENGTH_MASK) | flags;
50
51 newLength &= LENGTH_MASK;
52 mBuf = allocate(newLength);
53 annotate_address(mBuf, "String");
54 mLen = OWNBUFFER | READ_WRITE | newLength;
55
56 if (copy && oldLength)
57 memcpy(mBuf, oldBuffer, sizeof(value_type) * std::min(oldLength, newLength ));
58 if (owningOldBuffer)
59 delete[] oldBuffer;
60 } 50 }
61 51
62 public: 52 public:
63 String() : mBuf(nullptr), mLen(INVALID) {}
64
65 String(size_type len)
66 : mBuf(allocate(len & LENGTH_MASK)),
67 mLen(OWNBUFFER | READ_WRITE | (len & LENGTH_MASK))
68 {
69 annotate_address(mBuf, "String");
70 }
71
72 String(value_type* buf, size_type len)
73 : mBuf(buf), mLen(DEPENDENT | READ_WRITE | (buf ? len & LENGTH_MASK: 0))
74 {
75 }
76
77 String(const value_type* buf, size_type len)
78 : mBuf(const_cast<value_type*>(buf)),
79 mLen(DEPENDENT | READ_ONLY | (buf ? len & LENGTH_MASK: 0))
80 {
81 }
82
83 String(String& str, size_type pos = 0, size_type len = npos)
84 : mBuf(str.mBuf + std::min(pos, str.length())),
85 mLen(DEPENDENT | READ_WRITE | std::min(len, str.length() - (mBuf - str.m Buf)))
86 {
87 }
88
89 String(const String& str, size_type pos = 0, size_type len = npos)
90 : mBuf(str.mBuf + std::min(pos, str.length())),
91 mLen(DEPENDENT | READ_ONLY | std::min(len, str.length() - (mBuf - str.mB uf)))
92 {
93 }
94
95 String(String&& str)
96 {
97 *this = std::move(str);
98 }
99
100 String(const char* source, size_type len)
101 : String(len)
102 {
103 for (size_type i = 0; i < len; i++)
104 mBuf[i] = source[i];
105 }
106
107 String& operator=(const String& str)
108 {
109 reset(str);
110 return *this;
111 }
112
113 String& operator=(String& str)
114 {
115 reset(str);
116 return *this;
117 }
118
119 String& operator=(String&& str)
120 {
121 mBuf = str.mBuf;
122 mLen = str.mLen;
123 str.mBuf = nullptr;
124 str.mLen = INVALID;
125 return *this;
126 }
127
128 ~String()
129 {
130 if (owns_buffer())
131 delete[] mBuf;
132 }
133
134 void reset(value_type* buf, size_type len)
135 {
136 mBuf = buf;
137 mLen = (DEPENDENT | READ_WRITE | (buf ? len & LENGTH_MASK: 0));
138 }
139
140 void reset(const value_type* buf, size_type len)
141 {
142 mBuf = const_cast<value_type*>(buf);
143 mLen = (DEPENDENT | READ_ONLY | (buf ? len & LENGTH_MASK: 0));
144 }
145
146 void reset(String& str, size_type pos = 0, size_type len = npos)
147 {
148 pos = std::min(pos, str.length());
149 len = std::min(len, str.length() - pos);
150 reset(str.mBuf + pos, len);
151 }
152
153 void reset(const String& str, size_type pos = 0, size_type len = npos)
154 {
155 pos = std::min(pos, str.length());
156 len = std::min(len, str.length() - pos);
157 reset(const_cast<const value_type*>(str.mBuf + pos), len);
158 }
159
160 size_type length() const 53 size_type length() const
161 { 54 {
162 return mLen & LENGTH_MASK; 55 return mLen & LENGTH_MASK;
163 } 56 }
164 57
165 bool empty() const 58 bool empty() const
166 { 59 {
167 return !(mLen & LENGTH_MASK); 60 return !(mLen & LENGTH_MASK);
168 } 61 }
169 62
170 const value_type* data() const 63 const value_type* data() const
171 { 64 {
172 return mBuf; 65 return mBuf;
173 } 66 }
174 67
175 value_type* data() 68 value_type* data()
176 { 69 {
177 String_assert_readonly(mLen & READ_ONLY); 70 String_assert_readonly(is_readOnly());
178 return mBuf; 71 return mBuf;
179 } 72 }
180 73
181 const value_type& operator[](size_type pos) const 74 const value_type& operator[](size_type pos) const
182 { 75 {
183 return mBuf[pos]; 76 return mBuf[pos];
184 } 77 }
185 78
186 value_type& operator[](size_type pos) 79 value_type& operator[](size_type pos)
187 { 80 {
188 String_assert_readonly(mLen & READ_ONLY); 81 String_assert_readonly(is_readOnly());
189 return mBuf[pos]; 82 return mBuf[pos];
83 }
84
85 bool is_readOnly() const
86 {
87 return (mLen & FLAGS_MASK) != READ_WRITE;
190 } 88 }
191 89
192 bool equals(const String& other) const 90 bool equals(const String& other) const
193 { 91 {
194 if (length() != other.length()) 92 if (length() != other.length())
195 return false; 93 return false;
196 94
197 return memcmp(mBuf, other.mBuf, sizeof(value_type) * length()) == 0; 95 return std::memcmp(mBuf, other.mBuf, sizeof(value_type) * length()) == 0;
198 } 96 }
199 97
200 size_type find(value_type c, size_type pos = 0) const 98 size_type find(value_type c, size_type pos = 0) const
201 { 99 {
202 for (size_type i = pos; i < length(); ++i) 100 for (size_type i = pos; i < length(); ++i)
203 if (mBuf[i] == c) 101 if (mBuf[i] == c)
204 return i; 102 return i;
205 return npos; 103 return npos;
206 } 104 }
207 105
208 size_type find(const String& str, size_type pos = 0) const 106 size_type find(const String& str, size_type pos = 0) const
209 { 107 {
108 if (pos > LENGTH_MASK || pos + str.length() > length())
109 return npos;
110
210 if (!str.length()) 111 if (!str.length())
211 return pos; 112 return pos;
212 113
213 if (length() - pos < str.length()) 114 for (; pos + str.length() <= length(); ++pos)
214 return npos;
215
216 for (; pos < length() - str.length(); ++pos)
217 { 115 {
218 if (mBuf[pos] == str[0] && 116 if (mBuf[pos] == str[0] &&
219 memcmp(mBuf + pos, str.mBuf, sizeof(value_type) * str.length()) == 0) 117 std::memcmp(mBuf + pos, str.mBuf, sizeof(value_type) * str.length()) = = 0)
220 { 118 {
221 return pos; 119 return pos;
222 } 120 }
223 } 121 }
224 122
225 return npos; 123 return npos;
226 } 124 }
227 125
228 size_type rfind(value_type c, size_type pos = npos) const 126 size_type rfind(value_type c, size_type pos = npos) const
229 { 127 {
230 if (length() == 0) 128 if (length() == 0)
231 return npos; 129 return npos;
232 130
233 if (pos == npos) 131 if (pos >= length())
234 pos = length() - 1; 132 pos = length() - 1;
235 133
236 for (int i = pos; i >= 0; --i) 134 for (int i = pos; i >= 0; --i)
237 if (mBuf[i] == c) 135 if (mBuf[i] == c)
238 return i; 136 return i;
239 return npos; 137 return npos;
240 } 138 }
241 139
140 bool is_invalid() const
141 {
142 return (mLen & FLAGS_MASK) == INVALID;
143 }
144
145 bool is_deleted() const
146 {
147 return (mLen & FLAGS_MASK) == DELETED;
148 }
149
150 void toLower()
151 {
152 size_type len = length();
153 for (size_type i = 0; i < len; ++i)
154 {
155 value_type currChar = mBuf[i];
156
157 // This should be more efficient with a lookup table but I couldn't measur e
158 // any performance difference.
159 if (currChar >= u'A' && currChar <= u'Z')
160 mBuf[i] = currChar + u'a' - u'A';
161 else if (currChar >= 128)
162 {
163 // It seems that calling JS is the easiest solution for lowercasing
164 // Unicode characters.
165 mBuf[i] = EM_ASM_INT({
166 return String.fromCharCode($0).toLowerCase().charCodeAt(0);
167 }, currChar);
168 }
169 }
170 }
171 };
172
173 class DependentString : public String
174 {
175 public:
176 explicit DependentString()
177 : String(nullptr, 0, INVALID)
178 {
179 }
180
181 explicit DependentString(value_type* buf, size_type len)
182 : String(buf, len, READ_WRITE)
183 {
184 }
185
186 explicit DependentString(const value_type* buf, size_type len)
187 : String(const_cast<value_type*>(buf), len, READ_ONLY)
188 {
189 }
190
191 explicit DependentString(String& str, size_type pos = 0, size_type len = npos)
192 : String(
193 str.mBuf + std::min(pos, str.length()),
194 std::min(len, str.length() - std::min(pos, str.length())),
195 str.is_readOnly() ? READ_ONLY : READ_WRITE
196 )
197 {
198 }
199
200 explicit DependentString(const String& str, size_type pos = 0,
201 size_type len = npos)
202 : String(
203 str.mBuf + std::min(pos, str.length()),
204 std::min(len, str.length() - std::min(pos, str.length())),
205 READ_ONLY
206 )
207 {
208 }
209
210 void reset(value_type* buf, size_type len)
211 {
212 *this = DependentString(buf, len);
213 }
214
215 void reset(const value_type* buf, size_type len)
216 {
217 *this = DependentString(buf, len);
218 }
219
220 void reset(String& str, size_type pos = 0, size_type len = npos)
221 {
222 *this = DependentString(str, pos, len);
223 }
224
225 void reset(const String& str, size_type pos = 0, size_type len = npos)
226 {
227 *this = DependentString(str, pos, len);
228 }
229
230 void erase()
231 {
232 *this = DependentString();
233 mLen = DELETED;
234 }
235 };
236
237 inline DependentString operator "" _str(const String::value_type* str,
238 String::size_type len)
239 {
240 return DependentString(str, len);
241 }
242
243 inline void String_assert_readonly(bool readOnly)
244 {
245 assert(!readOnly, u"Writing access to a read-only string"_str);
246 }
247
248 class OwnedString : public String
249 {
250 private:
251 void grow(size_type additionalSize)
252 {
253 OwnedString newValue(length() + additionalSize);
254 if (length() > 0)
255 std::memcpy(newValue.mBuf, mBuf, sizeof(value_type) * length());
256 *this = std::move(newValue);
257 }
258
259 public:
260 explicit OwnedString(size_type len = 0)
261 : String(nullptr, len, READ_WRITE)
262 {
263 if (len)
264 {
265 mBuf = new value_type[length()];
266 annotate_address(mBuf, "String");
267 }
268 else
269 mBuf = nullptr;
270 }
271
272 explicit OwnedString(const String& str)
273 : OwnedString(str.length())
274 {
275 if (length())
276 std::memcpy(mBuf, str.mBuf, sizeof(value_type) * length());
277 }
278
279 OwnedString(const OwnedString& str)
280 : OwnedString(static_cast<const String&>(str))
281 {
282 }
283
284 explicit OwnedString(const value_type* str, size_type len)
285 : OwnedString(DependentString(str, len))
286 {
287 }
288
289 explicit OwnedString(OwnedString&& str)
290 : OwnedString(0)
291 {
292 mBuf = str.mBuf;
293 mLen = str.mLen;
294 str.mBuf = nullptr;
295 str.mLen = READ_WRITE | 0;
296 }
297
298 ~OwnedString()
299 {
300 if (mBuf)
301 delete[] mBuf;
302 }
303
304 OwnedString& operator=(const String& str)
305 {
306 *this = std::move(OwnedString(str));
307 return *this;
308 }
309
310 OwnedString& operator=(const OwnedString& str)
311 {
312 *this = std::move(OwnedString(str));
313 return *this;
314 }
315
316 OwnedString& operator=(OwnedString&& str)
317 {
318 std::swap(mBuf, str.mBuf);
319 std::swap(mLen, str.mLen);
320 return *this;
321 }
322
242 void append(const value_type* source, size_type sourceLen) 323 void append(const value_type* source, size_type sourceLen)
243 { 324 {
244 if (!sourceLen) 325 if (!sourceLen)
245 return; 326 return;
246 327
328 assert(source, u"Null buffer passed to OwnedString.append()"_str);
247 size_t oldLength = length(); 329 size_t oldLength = length();
248 resize(oldLength + sourceLen, true); 330 grow(sourceLen);
249 memcpy(mBuf + oldLength, source, sizeof(value_type) * sourceLen); 331 std::memcpy(mBuf + oldLength, source, sizeof(value_type) * sourceLen);
250 } 332 }
251 333
252 void append(const String& str) 334 void append(const String& str)
253 { 335 {
254 append(str.mBuf, str.length()); 336 append(str.mBuf, str.length());
255 } 337 }
256 338
257 void append(value_type c) 339 void append(value_type c)
258 { 340 {
259 append(&c, 1); 341 append(&c, 1);
260 } 342 }
261
262 bool owns_buffer() const
263 {
264 return mBuf && (mLen & FLAGS_MASK) == OWNBUFFER;
265 }
266
267 String& ensure_own_buffer()
268 {
269 size_type len = length();
270 if (len && !owns_buffer())
271 resize(len, true);
272 return *this;
273 }
274
275 bool is_dependent() const
276 {
277 return (mLen & FLAGS_MASK) == DEPENDENT;
278 }
279
280 bool is_invalid() const
281 {
282 return (mLen & FLAGS_MASK) == INVALID;
283 }
284
285 bool is_deleted() const
286 {
287 return (mLen & FLAGS_MASK) == DELETED;
288 }
289 }; 343 };
290
291 inline String operator "" _str(const String::value_type* str,
292 String::size_type len)
293 {
294 return String(const_cast<String::value_type*>(str), len);
295 }
296
297 inline void String_assert_readonly(bool readOnly)
298 {
299 assert(!readOnly, u"Writing access to a read-only string"_str);
300 }
301
302 #endif
LEFTRIGHT

Powered by Google App Engine
This is Rietveld