Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Side by Side Diff: compiled/String.h

Issue 29721753: Issue 6180 - use ABP_TEXT everywhere in order to let String be a UTF-8 string (Closed) Base URL: https://github.com/adblockplus/adblockpluscore.git@adb2678354813ce5b6de095072954c5a784a7bc4
Patch Set: Created March 13, 2018, 6:20 p.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View unified diff | Download patch
« no previous file with comments | « compiled/Map.h ('k') | compiled/String.cpp » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * This file is part of Adblock Plus <https://adblockplus.org/>, 2 * This file is part of Adblock Plus <https://adblockplus.org/>,
3 * Copyright (C) 2006-present eyeo GmbH 3 * Copyright (C) 2006-present eyeo GmbH
4 * 4 *
5 * Adblock Plus is free software: you can redistribute it and/or modify 5 * Adblock Plus is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 3 as 6 * it under the terms of the GNU General Public License version 3 as
7 * published by the Free Software Foundation. 7 * published by the Free Software Foundation.
8 * 8 *
9 * Adblock Plus is distributed in the hope that it will be useful, 9 * Adblock Plus is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
(...skipping 19 matching lines...) Expand all
30 #include <limits> 30 #include <limits>
31 31
32 #include "base.h" 32 #include "base.h"
33 #include "debug.h" 33 #include "debug.h"
34 #include "library.h" 34 #include "library.h"
35 35
36 ABP_NS_BEGIN 36 ABP_NS_BEGIN
37 37
38 inline void String_assert_writable(bool isWritable); 38 inline void String_assert_writable(bool isWritable);
39 39
40 // hacky because without templates
41 #ifdef ABP_UTF8_STRING
42 #define ABP_TEXT(val) val
hub 2018/03/13 21:29:16 Can't we make this macro shorter? Things are getti
sergei 2018/03/14 10:17:10 I'm afraid it can clash with some another macro, s
43 struct StringTraits
44 {
45 typedef char char_type;
46 };
47 #else
48 #define ABP_TEXT(val) u##val
49 struct StringTraits
50 {
51 typedef char16_t char_type;
52 };
53 #endif
54
40 class String 55 class String
41 { 56 {
42 friend class DependentString; 57 friend class DependentString;
43 friend class OwnedString; 58 friend class OwnedString;
44 59
45 public: 60 public:
46 typedef char16_t value_type; 61 typedef StringTraits::char_type value_type;
47 typedef size_t size_type; 62 typedef size_t size_type;
48 63
49 // Type flags, stored in the top 2 bits of the mLen member 64 // Type flags, stored in the top 2 bits of the mLen member
50 static constexpr size_type INVALID = 0xC0000000; 65 static constexpr size_type INVALID = 0xC0000000;
51 static constexpr size_type DELETED = 0x80000000; 66 static constexpr size_type DELETED = 0x80000000;
52 static constexpr size_type READ_ONLY = 0x40000000; 67 static constexpr size_type READ_ONLY = 0x40000000;
53 static constexpr size_type READ_WRITE = 0x00000000; 68 static constexpr size_type READ_WRITE = 0x00000000;
54 69
55 static constexpr size_type FLAGS_MASK = 0xC0000000; 70 static constexpr size_type FLAGS_MASK = 0xC0000000;
56 static constexpr size_type LENGTH_MASK = 0x3FFFFFFF; 71 static constexpr size_type LENGTH_MASK = 0x3FFFFFFF;
(...skipping 134 matching lines...) Expand 10 before | Expand all | Expand 10 after
191 206
192 void toLower() 207 void toLower()
193 { 208 {
194 size_type len = length(); 209 size_type len = length();
195 for (size_type i = 0; i < len; ++i) 210 for (size_type i = 0; i < len; ++i)
196 { 211 {
197 value_type currChar = mBuf[i]; 212 value_type currChar = mBuf[i];
198 213
199 // This should be more efficient with a lookup table but I couldn't measur e 214 // This should be more efficient with a lookup table but I couldn't measur e
200 // any performance difference. 215 // any performance difference.
201 if (currChar >= u'A' && currChar <= u'Z') 216 if (currChar >= ABP_TEXT('A') && currChar <= ABP_TEXT('Z'))
202 mBuf[i] = currChar + u'a' - u'A'; 217 mBuf[i] = currChar + ABP_TEXT('a') - ABP_TEXT('A');
203 else if (currChar >= 128) 218 else if (currChar >= 128)
204 { 219 {
205 mBuf[i] = CharToLower(currChar); 220 mBuf[i] = CharToLower(currChar);
206 } 221 }
207 } 222 }
208 } 223 }
209 }; 224 };
210 225
211 #ifdef INSIDE_TESTS 226 #ifdef INSIDE_TESTS
212 inline std::ostream& operator<<(std::ostream& os, const String& str) 227 inline std::ostream& operator<<(std::ostream& os, const String& str)
213 { 228 {
229 #ifdef ABP_UTF8_STRING
230 os.write(str.data(), str.length());
231 #else
214 std::wstring_convert<std::codecvt_utf8<char16_t>, char16_t> converter; 232 std::wstring_convert<std::codecvt_utf8<char16_t>, char16_t> converter;
215 os << converter.to_bytes(str.data(), str.data() + str.length()); 233 os << converter.to_bytes(str.data(), str.data() + str.length());
234 #endif
216 return os; 235 return os;
217 } 236 }
218 #endif 237 #endif
219 238
220 class DependentString : public String 239 class DependentString : public String
221 { 240 {
222 public: 241 public:
223 explicit DependentString() 242 explicit DependentString()
224 : String(nullptr, 0, INVALID) 243 : String(nullptr, 0, INVALID)
225 { 244 {
(...skipping 63 matching lines...) Expand 10 before | Expand all | Expand 10 after
289 #endif 308 #endif
290 309
291 inline DependentString operator "" _str(const String::value_type* str, 310 inline DependentString operator "" _str(const String::value_type* str,
292 String::size_type len) 311 String::size_type len)
293 { 312 {
294 return DependentString(str, len); 313 return DependentString(str, len);
295 } 314 }
296 315
297 inline void String_assert_writable(bool isWritable) 316 inline void String_assert_writable(bool isWritable)
298 { 317 {
299 assert2(isWritable, u"Writing access to a read-only string"_str); 318 assert2(isWritable, ABP_TEXT("Writing access to a read-only string"_str));
300 } 319 }
301 320
302 class OwnedString : public String 321 class OwnedString : public String
303 { 322 {
304 private: 323 private:
305 void grow(size_type additionalSize) 324 void grow(size_type additionalSize)
306 { 325 {
307 OwnedString newValue(length() + additionalSize); 326 OwnedString newValue(length() + additionalSize);
308 if (length() > 0) 327 if (length() > 0)
309 std::memcpy(newValue.mBuf, mBuf, sizeof(value_type) * length()); 328 std::memcpy(newValue.mBuf, mBuf, sizeof(value_type) * length());
(...skipping 77 matching lines...) Expand 10 before | Expand all | Expand 10 after
387 std::swap(mBuf, str.mBuf); 406 std::swap(mBuf, str.mBuf);
388 std::swap(mLen, str.mLen); 407 std::swap(mLen, str.mLen);
389 return *this; 408 return *this;
390 } 409 }
391 410
392 void append(const value_type* source, size_type sourceLen) 411 void append(const value_type* source, size_type sourceLen)
393 { 412 {
394 if (!sourceLen) 413 if (!sourceLen)
395 return; 414 return;
396 415
397 assert2(source, u"Null buffer passed to OwnedString.append()"_str); 416 assert2(source, ABP_TEXT("Null buffer passed to OwnedString.append()"_str));
398 size_t oldLength = length(); 417 size_t oldLength = length();
399 grow(sourceLen); 418 grow(sourceLen);
400 std::memcpy(mBuf + oldLength, source, sizeof(value_type) * sourceLen); 419 std::memcpy(mBuf + oldLength, source, sizeof(value_type) * sourceLen);
401 } 420 }
402 421
422 #ifndef ABP_UTF8_STRING
403 void append(const char* source, size_type sourceLen) 423 void append(const char* source, size_type sourceLen)
404 { 424 {
405 if (!sourceLen) 425 if (!sourceLen)
406 return; 426 return;
407 427
408 assert2(source, u"Null buffer passed to OwnedString.append()"_str); 428 assert2(source, ABP_TEXT("Null buffer passed to OwnedString.append()"_str));
409 size_t oldLength = length(); 429 size_t oldLength = length();
410 grow(sourceLen); 430 grow(sourceLen);
411 for (size_t i = 0; i < sourceLen; i++) 431 for (size_t i = 0; i < sourceLen; i++)
412 mBuf[oldLength + i] = source[i]; 432 mBuf[oldLength + i] = source[i];
413 } 433 }
434 #endif // !ABP_UTF8_STRING
414 435
415 void append(const String& str) 436 void append(const String& str)
416 { 437 {
417 append(str.mBuf, str.length()); 438 append(str.mBuf, str.length());
418 } 439 }
419 440
420 void append(value_type c) 441 void append(value_type c)
421 { 442 {
422 append(&c, 1); 443 append(&c, 1);
423 } 444 }
(...skipping 11 matching lines...) Expand all
435 456
436 size_type size = 0; 457 size_type size = 0;
437 for (T i = num; i; i /= 10) 458 for (T i = num; i; i /= 10)
438 size++; 459 size++;
439 size = (size ? size : 1); 460 size = (size ? size : 1);
440 461
441 size_type pos = length(); 462 size_type pos = length();
442 grow((negative ? 1 : 0) + size); 463 grow((negative ? 1 : 0) + size);
443 464
444 if (negative) 465 if (negative)
445 mBuf[pos++] = '-'; 466 mBuf[pos++] = ABP_TEXT('-');
446 467
447 for (int i = size - 1; i >= 0; i--) 468 for (int i = size - 1; i >= 0; i--)
448 { 469 {
449 mBuf[pos + i] = '0' + (num % 10); 470 mBuf[pos + i] = ABP_TEXT('0') + (num % 10);
450 num /= 10; 471 num /= 10;
451 } 472 }
452 } 473 }
453 }; 474 };
454 475
455 #ifdef INSIDE_TESTS 476 #ifdef INSIDE_TESTS
456 inline std::ostream& operator<<(std::ostream& os, const OwnedString& str) 477 inline std::ostream& operator<<(std::ostream& os, const OwnedString& str)
457 { 478 {
458 return os << static_cast<const String&>(str); 479 return os << static_cast<const String&>(str);
459 } 480 }
460 #endif 481 #endif
461 482
462 template<typename T> 483 template<typename T>
463 struct LexicalCastImpl; 484 struct LexicalCastImpl;
464 485
465 /// Performs common conversions of a text represented value. 486 /// Performs common conversions of a text represented value.
466 template<typename T> 487 template<typename T>
467 inline T lexical_cast(const String& value) 488 inline T lexical_cast(const String& value)
468 { 489 {
469 return LexicalCastImpl<T>::Convert(value); 490 return LexicalCastImpl<T>::Convert(value);
470 } 491 }
471 492
472 template<> 493 template<>
473 struct LexicalCastImpl<bool> 494 struct LexicalCastImpl<bool>
474 { 495 {
475 static bool Convert(const String& value) 496 static bool Convert(const String& value)
476 { 497 {
477 return value == u"true"_str; 498 return value == ABP_TEXT("true"_str);
478 } 499 }
479 }; 500 };
480 501
481 template<typename T> 502 template<typename T>
482 struct LexicalCastImpl 503 struct LexicalCastImpl
483 { 504 {
484 static_assert(std::is_integral<T>::value, "T should be a number"); 505 static_assert(std::is_integral<T>::value, "T should be a number");
485 static T Convert(const String& value) 506 static T Convert(const String& value)
486 { 507 {
487 String::size_type len = value.length(); 508 String::size_type len = value.length();
488 if (len == 0) 509 if (len == 0)
489 return 0; 510 return 0;
490 String::size_type pos = 0; 511 String::size_type pos = 0;
491 bool negative = std::numeric_limits<T>::is_signed && value[0] == u'-'; 512 bool negative = std::numeric_limits<T>::is_signed && value[0] == ABP_TEXT('- ');
492 if (negative) 513 if (negative)
493 { 514 {
494 ++pos; 515 ++pos;
495 } 516 }
496 T result = 0; 517 T result = 0;
497 for (; pos < len; ++pos) 518 for (; pos < len; ++pos)
498 { 519 {
499 auto c = value[pos]; 520 auto c = value[pos];
500 if (c < u'0' || c > u'9') 521 if (c < ABP_TEXT('0') || c > ABP_TEXT('9'))
501 return 0; 522 return 0;
502 // isDangerous is the optimization because there is no need for some check s 523 // isDangerous is the optimization because there is no need for some check s
503 // when the values are far from edge cases. 524 // when the values are far from edge cases.
504 // It targets the normal values, when a value is prefixed with several 525 // It targets the normal values, when a value is prefixed with several
505 // zeros additional checks start to work earlier than the actual value of 526 // zeros additional checks start to work earlier than the actual value of
506 // result reaches an edge case, but it does not affect the result. 527 // result reaches an edge case, but it does not affect the result.
507 bool isDangerous = pos >= std::numeric_limits<T>::digits10; 528 bool isDangerous = pos >= std::numeric_limits<T>::digits10;
508 // It also invalidates the parsing of too big numbers in comparison with 529 // It also invalidates the parsing of too big numbers in comparison with
509 // stopping when it encounters a non numerical character. 530 // stopping when it encounters a non numerical character.
510 // cast<uint8_t>(u"1230"_str) -> 0 531 // cast<uint8_t>(u"1230"_str) -> 0
511 // cast<uint8_t>(u"123E"_str) -> 123 532 // cast<uint8_t>(u"123E"_str) -> 123
512 if (isDangerous && std::numeric_limits<T>::max() / 10 < result) 533 if (isDangerous && std::numeric_limits<T>::max() / 10 < result)
513 { 534 {
514 return 0; 535 return 0;
515 } 536 }
516 result *= 10; 537 result *= 10;
517 uint8_t digit = c - u'0'; 538 uint8_t digit = c - ABP_TEXT('0');
518 if (isDangerous && (std::numeric_limits<T>::max() - digit < result - (nega tive ? 1 : 0))) 539 if (isDangerous && (std::numeric_limits<T>::max() - digit < result - (nega tive ? 1 : 0)))
519 { 540 {
520 return 0; 541 return 0;
521 } 542 }
522 result += digit; 543 result += digit;
523 } 544 }
524 return negative ? -result : result; 545 return negative ? -result : result;
525 } 546 }
526 }; 547 };
527 548
528 template<> 549 template<>
529 inline OwnedString lexical_cast<OwnedString>(const String& value) 550 inline OwnedString lexical_cast<OwnedString>(const String& value)
530 { 551 {
531 return OwnedString{value}; 552 return OwnedString{value};
532 } 553 }
533 554
534 DependentString TrimSpaces(const String& value); 555 DependentString TrimSpaces(const String& value);
535 556
536 // Splits the `value` string into two `DependentString`s excluding the character staying at `separatorPos`. 557 // Splits the `value` string into two `DependentString`s excluding the character staying at `separatorPos`.
537 // Useful for parsing. 558 // Useful for parsing.
538 std::pair<DependentString, DependentString> SplitString(const String& value, Str ing::size_type separatorPos); 559 std::pair<DependentString, DependentString> SplitString(const String& value, Str ing::size_type separatorPos);
539 560
540 ABP_NS_END 561 ABP_NS_END
OLDNEW
« no previous file with comments | « compiled/Map.h ('k') | compiled/String.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld