Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Side by Side Diff: compiled/String.h

Issue 29721753: Issue 6180 - use ABP_TEXT everywhere in order to let String be a UTF-8 string (Closed) Base URL: https://github.com/adblockplus/adblockpluscore.git@adb2678354813ce5b6de095072954c5a784a7bc4
Patch Set: rebase Created March 15, 2018, 1:53 p.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View unified diff | Download patch
« no previous file with comments | « compiled/Map.h ('k') | compiled/String.cpp » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * This file is part of Adblock Plus <https://adblockplus.org/>, 2 * This file is part of Adblock Plus <https://adblockplus.org/>,
3 * Copyright (C) 2006-present eyeo GmbH 3 * Copyright (C) 2006-present eyeo GmbH
4 * 4 *
5 * Adblock Plus is free software: you can redistribute it and/or modify 5 * Adblock Plus is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 3 as 6 * it under the terms of the GNU General Public License version 3 as
7 * published by the Free Software Foundation. 7 * published by the Free Software Foundation.
8 * 8 *
9 * Adblock Plus is distributed in the hope that it will be useful, 9 * Adblock Plus is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
(...skipping 19 matching lines...) Expand all
30 #include <limits> 30 #include <limits>
31 31
32 #include "base.h" 32 #include "base.h"
33 #include "debug.h" 33 #include "debug.h"
34 #include "library.h" 34 #include "library.h"
35 35
36 ABP_NS_BEGIN 36 ABP_NS_BEGIN
37 37
38 inline void String_assert_writable(bool isWritable); 38 inline void String_assert_writable(bool isWritable);
39 39
40 // hacky because without templates
41 #ifdef ABP_UTF8_STRING
42 #define ABP_TEXT(val) val
43 struct StringTraits
44 {
45 typedef char char_type;
46 };
47 #else
48 #define ABP_TEXT(val) u##val
49 struct StringTraits
50 {
51 typedef char16_t char_type;
52 };
53 #endif
54
40 class String 55 class String
41 { 56 {
42 friend class DependentString; 57 friend class DependentString;
43 friend class OwnedString; 58 friend class OwnedString;
44 59
45 public: 60 public:
46 typedef char16_t value_type; 61 typedef StringTraits::char_type value_type;
47 typedef size_t size_type; 62 typedef size_t size_type;
48 63
49 // Type flags, stored in the top 2 bits of the mLen member 64 // Type flags, stored in the top 2 bits of the mLen member
50 static constexpr size_type INVALID = 0xC0000000; 65 static constexpr size_type INVALID = 0xC0000000;
51 static constexpr size_type DELETED = 0x80000000; 66 static constexpr size_type DELETED = 0x80000000;
52 static constexpr size_type READ_ONLY = 0x40000000; 67 static constexpr size_type READ_ONLY = 0x40000000;
53 static constexpr size_type READ_WRITE = 0x00000000; 68 static constexpr size_type READ_WRITE = 0x00000000;
54 69
55 static constexpr size_type FLAGS_MASK = 0xC0000000; 70 static constexpr size_type FLAGS_MASK = 0xC0000000;
56 static constexpr size_type LENGTH_MASK = 0x3FFFFFFF; 71 static constexpr size_type LENGTH_MASK = 0x3FFFFFFF;
(...skipping 134 matching lines...) Expand 10 before | Expand all | Expand 10 after
191 206
192 void toLower() 207 void toLower()
193 { 208 {
194 size_type len = length(); 209 size_type len = length();
195 for (size_type i = 0; i < len; ++i) 210 for (size_type i = 0; i < len; ++i)
196 { 211 {
197 value_type currChar = mBuf[i]; 212 value_type currChar = mBuf[i];
198 213
199 // This should be more efficient with a lookup table but I couldn't measur e 214 // This should be more efficient with a lookup table but I couldn't measur e
200 // any performance difference. 215 // any performance difference.
201 if (currChar >= u'A' && currChar <= u'Z') 216 if (currChar >= ABP_TEXT('A') && currChar <= ABP_TEXT('Z'))
202 mBuf[i] = currChar + u'a' - u'A'; 217 mBuf[i] = currChar + ABP_TEXT('a') - ABP_TEXT('A');
203 else if (currChar >= 128) 218 else if (currChar >= 128)
204 { 219 {
205 mBuf[i] = CharToLower(currChar); 220 mBuf[i] = CharToLower(currChar);
206 } 221 }
207 } 222 }
208 } 223 }
209 }; 224 };
210 225
211 #ifdef INSIDE_TESTS 226 #ifdef INSIDE_TESTS
212 inline std::ostream& operator<<(std::ostream& os, const String& str) 227 inline std::ostream& operator<<(std::ostream& os, const String& str)
213 { 228 {
229 #ifdef ABP_UTF8_STRING
230 os.write(str.data(), str.length());
231 #else
214 #if _MSC_VER >= 1900 232 #if _MSC_VER >= 1900
215 std::wstring_convert<std::codecvt_utf8_utf16<int16_t>, int16_t> converter; 233 std::wstring_convert<std::codecvt_utf8_utf16<int16_t>, int16_t> converter;
216 auto p = reinterpret_cast<const int16_t *>(str.data()); 234 auto p = reinterpret_cast<const int16_t *>(str.data());
217 os << converter.to_bytes(p, p + str.length()); 235 os << converter.to_bytes(p, p + str.length());
218 #else 236 #else
219 std::wstring_convert<std::codecvt_utf8<char16_t>, char16_t> converter; 237 std::wstring_convert<std::codecvt_utf8<char16_t>, char16_t> converter;
220 os << converter.to_bytes(str.data(), str.data() + str.length()); 238 os << converter.to_bytes(str.data(), str.data() + str.length());
221 #endif 239 #endif // _MSC_VER >= 1900
240 #endif // ABP_UTF8_STRING
222 return os; 241 return os;
223 } 242 }
224 #endif 243 #endif // INSIDE_TESTS
225 244
226 class DependentString : public String 245 class DependentString : public String
227 { 246 {
228 public: 247 public:
229 explicit DependentString() 248 explicit DependentString()
230 : String(nullptr, 0, INVALID) 249 : String(nullptr, 0, INVALID)
231 { 250 {
232 } 251 }
233 252
234 explicit DependentString(value_type* buf, size_type len) 253 explicit DependentString(value_type* buf, size_type len)
(...skipping 60 matching lines...) Expand 10 before | Expand all | Expand 10 after
295 #endif 314 #endif
296 315
297 inline DependentString operator "" _str(const String::value_type* str, 316 inline DependentString operator "" _str(const String::value_type* str,
298 String::size_type len) 317 String::size_type len)
299 { 318 {
300 return DependentString(str, len); 319 return DependentString(str, len);
301 } 320 }
302 321
303 inline void String_assert_writable(bool isWritable) 322 inline void String_assert_writable(bool isWritable)
304 { 323 {
305 assert2(isWritable, u"Writing access to a read-only string"_str); 324 assert2(isWritable, ABP_TEXT("Writing access to a read-only string"_str));
306 } 325 }
307 326
308 class OwnedString : public String 327 class OwnedString : public String
309 { 328 {
310 private: 329 private:
311 void grow(size_type additionalSize) 330 void grow(size_type additionalSize)
312 { 331 {
313 OwnedString newValue(length() + additionalSize); 332 OwnedString newValue(length() + additionalSize);
314 if (length() > 0) 333 if (length() > 0)
315 std::memcpy(newValue.mBuf, mBuf, sizeof(value_type) * length()); 334 std::memcpy(newValue.mBuf, mBuf, sizeof(value_type) * length());
(...skipping 77 matching lines...) Expand 10 before | Expand all | Expand 10 after
393 std::swap(mBuf, str.mBuf); 412 std::swap(mBuf, str.mBuf);
394 std::swap(mLen, str.mLen); 413 std::swap(mLen, str.mLen);
395 return *this; 414 return *this;
396 } 415 }
397 416
398 void append(const value_type* source, size_type sourceLen) 417 void append(const value_type* source, size_type sourceLen)
399 { 418 {
400 if (!sourceLen) 419 if (!sourceLen)
401 return; 420 return;
402 421
403 assert2(source, u"Null buffer passed to OwnedString.append()"_str); 422 assert2(source, ABP_TEXT("Null buffer passed to OwnedString.append()"_str));
404 size_t oldLength = length(); 423 size_t oldLength = length();
405 grow(sourceLen); 424 grow(sourceLen);
406 std::memcpy(mBuf + oldLength, source, sizeof(value_type) * sourceLen); 425 std::memcpy(mBuf + oldLength, source, sizeof(value_type) * sourceLen);
407 } 426 }
408 427
428 #ifndef ABP_UTF8_STRING
409 void append(const char* source, size_type sourceLen) 429 void append(const char* source, size_type sourceLen)
410 { 430 {
411 if (!sourceLen) 431 if (!sourceLen)
412 return; 432 return;
413 433
414 assert2(source, u"Null buffer passed to OwnedString.append()"_str); 434 assert2(source, ABP_TEXT("Null buffer passed to OwnedString.append()"_str));
415 size_t oldLength = length(); 435 size_t oldLength = length();
416 grow(sourceLen); 436 grow(sourceLen);
417 for (size_t i = 0; i < sourceLen; i++) 437 for (size_t i = 0; i < sourceLen; i++)
418 mBuf[oldLength + i] = source[i]; 438 mBuf[oldLength + i] = source[i];
419 } 439 }
440 #endif // !ABP_UTF8_STRING
420 441
421 void append(const String& str) 442 void append(const String& str)
422 { 443 {
423 append(str.mBuf, str.length()); 444 append(str.mBuf, str.length());
424 } 445 }
425 446
426 void append(value_type c) 447 void append(value_type c)
427 { 448 {
428 append(&c, 1); 449 append(&c, 1);
429 } 450 }
(...skipping 11 matching lines...) Expand all
441 462
442 size_type size = 0; 463 size_type size = 0;
443 for (T i = num; i; i /= 10) 464 for (T i = num; i; i /= 10)
444 size++; 465 size++;
445 size = (size ? size : 1); 466 size = (size ? size : 1);
446 467
447 size_type pos = length(); 468 size_type pos = length();
448 grow((negative ? 1 : 0) + size); 469 grow((negative ? 1 : 0) + size);
449 470
450 if (negative) 471 if (negative)
451 mBuf[pos++] = '-'; 472 mBuf[pos++] = ABP_TEXT('-');
452 473
453 for (int i = size - 1; i >= 0; i--) 474 for (int i = size - 1; i >= 0; i--)
454 { 475 {
455 mBuf[pos + i] = '0' + (num % 10); 476 mBuf[pos + i] = ABP_TEXT('0') + (num % 10);
456 num /= 10; 477 num /= 10;
457 } 478 }
458 } 479 }
459 }; 480 };
460 481
461 #ifdef INSIDE_TESTS 482 #ifdef INSIDE_TESTS
462 inline std::ostream& operator<<(std::ostream& os, const OwnedString& str) 483 inline std::ostream& operator<<(std::ostream& os, const OwnedString& str)
463 { 484 {
464 return os << static_cast<const String&>(str); 485 return os << static_cast<const String&>(str);
465 } 486 }
466 #endif 487 #endif
467 488
468 template<typename T> 489 template<typename T>
469 struct LexicalCastImpl; 490 struct LexicalCastImpl;
470 491
471 /// Performs common conversions of a text represented value. 492 /// Performs common conversions of a text represented value.
472 template<typename T> 493 template<typename T>
473 inline T lexical_cast(const String& value) 494 inline T lexical_cast(const String& value)
474 { 495 {
475 return LexicalCastImpl<T>::Convert(value); 496 return LexicalCastImpl<T>::Convert(value);
476 } 497 }
477 498
478 template<> 499 template<>
479 struct LexicalCastImpl<bool> 500 struct LexicalCastImpl<bool>
480 { 501 {
481 static bool Convert(const String& value) 502 static bool Convert(const String& value)
482 { 503 {
483 return value == u"true"_str; 504 return value == ABP_TEXT("true"_str);
484 } 505 }
485 }; 506 };
486 507
487 template<typename T> 508 template<typename T>
488 struct LexicalCastImpl 509 struct LexicalCastImpl
489 { 510 {
490 static_assert(std::is_integral<T>::value, "T should be a number"); 511 static_assert(std::is_integral<T>::value, "T should be a number");
491 static T Convert(const String& value) 512 static T Convert(const String& value)
492 { 513 {
493 String::size_type len = value.length(); 514 String::size_type len = value.length();
494 if (len == 0) 515 if (len == 0)
495 return 0; 516 return 0;
496 String::size_type pos = 0; 517 String::size_type pos = 0;
497 bool negative = std::numeric_limits<T>::is_signed && value[0] == u'-'; 518 bool negative = std::numeric_limits<T>::is_signed && value[0] == ABP_TEXT('- ');
498 if (negative) 519 if (negative)
499 { 520 {
500 ++pos; 521 ++pos;
501 } 522 }
502 T result = 0; 523 T result = 0;
503 for (; pos < len; ++pos) 524 for (; pos < len; ++pos)
504 { 525 {
505 auto c = value[pos]; 526 auto c = value[pos];
506 if (c < u'0' || c > u'9') 527 if (c < ABP_TEXT('0') || c > ABP_TEXT('9'))
507 return 0; 528 return 0;
508 // isDangerous is the optimization because there is no need for some check s 529 // isDangerous is the optimization because there is no need for some check s
509 // when the values are far from edge cases. 530 // when the values are far from edge cases.
510 // It targets the normal values, when a value is prefixed with several 531 // It targets the normal values, when a value is prefixed with several
511 // zeros additional checks start to work earlier than the actual value of 532 // zeros additional checks start to work earlier than the actual value of
512 // result reaches an edge case, but it does not affect the result. 533 // result reaches an edge case, but it does not affect the result.
513 bool isDangerous = pos >= std::numeric_limits<T>::digits10; 534 bool isDangerous = pos >= std::numeric_limits<T>::digits10;
514 // It also invalidates the parsing of too big numbers in comparison with 535 // It also invalidates the parsing of too big numbers in comparison with
515 // stopping when it encounters a non numerical character. 536 // stopping when it encounters a non numerical character.
516 // cast<uint8_t>(u"1230"_str) -> 0 537 // cast<uint8_t>(u"1230"_str) -> 0
517 // cast<uint8_t>(u"123E"_str) -> 123 538 // cast<uint8_t>(u"123E"_str) -> 123
518 if (isDangerous && std::numeric_limits<T>::max() / 10 < result) 539 if (isDangerous && std::numeric_limits<T>::max() / 10 < result)
519 { 540 {
520 return 0; 541 return 0;
521 } 542 }
522 result *= 10; 543 result *= 10;
523 uint8_t digit = c - u'0'; 544 uint8_t digit = c - ABP_TEXT('0');
524 if (isDangerous && (std::numeric_limits<T>::max() - digit < result - (nega tive ? 1 : 0))) 545 if (isDangerous && (std::numeric_limits<T>::max() - digit < result - (nega tive ? 1 : 0)))
525 { 546 {
526 return 0; 547 return 0;
527 } 548 }
528 result += digit; 549 result += digit;
529 } 550 }
530 return negative ? -result : result; 551 return negative ? -result : result;
531 } 552 }
532 }; 553 };
533 554
534 template<> 555 template<>
535 inline OwnedString lexical_cast<OwnedString>(const String& value) 556 inline OwnedString lexical_cast<OwnedString>(const String& value)
536 { 557 {
537 return OwnedString{value}; 558 return OwnedString{value};
538 } 559 }
539 560
540 DependentString TrimSpaces(const String& value); 561 DependentString TrimSpaces(const String& value);
541 562
542 // Splits the `value` string into two `DependentString`s excluding the character staying at `separatorPos`. 563 // Splits the `value` string into two `DependentString`s excluding the character staying at `separatorPos`.
543 // Useful for parsing. 564 // Useful for parsing.
544 std::pair<DependentString, DependentString> SplitString(const String& value, Str ing::size_type separatorPos); 565 std::pair<DependentString, DependentString> SplitString(const String& value, Str ing::size_type separatorPos);
545 566
546 ABP_NS_END 567 ABP_NS_END
OLDNEW
« no previous file with comments | « compiled/Map.h ('k') | compiled/String.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld