string.h 2.0 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970
  1. /**
  2. * Utility functions for managing strings, specifically because C++'s
  3. * std::string/std::regex is not well suited for UTF8 comprehensions.
  4. */
  5. #pragma once
  6. #include <ostream>
  7. #include <string>
  8. #include <string_view>
  9. #if __has_include(<unicode/std_string.h>)
  10. #define JVALIDATE_HAS_ICU
  11. #include <unicode/brkiter.h>
  12. #include <unicode/unistr.h>
  13. #endif
  14. #include <jvalidate/detail/expect.h>
  15. #ifdef JVALIDATE_HAS_ICU
  16. namespace jvalidate::detail {
  17. /**
  18. * @brief Calclates the string-length of the argument, treating multi-byte
  19. * characters an unicode graphemes as single characters (which std::string
  20. * cannot do).
  21. *
  22. * @param arg Any UTF8 compatible string (including a standard ASCII string)
  23. *
  24. * @returns A number no greater than arg.size(), depending on the number of
  25. * graphemes/codepoints in the string.
  26. */
  27. inline size_t length(std::string_view arg) {
  28. icu::UnicodeString ucs = icu::UnicodeString::fromUTF8(icu::StringPiece(arg));
  29. return ucs.countChar32();
  30. }
  31. inline std::string_view to_u8(std::string_view arg) { return arg; }
  32. inline std::string to_u8(std::u32string_view arg) {
  33. icu::UnicodeString const ucs =
  34. icu::UnicodeString::fromUTF32(reinterpret_cast<int const *>(arg.data()), arg.size());
  35. std::string out;
  36. return ucs.toUTF8String(out);
  37. }
  38. inline std::u32string_view to_u32(std::u32string_view arg) { return arg; }
  39. inline std::u32string to_u32(std::string_view arg) {
  40. icu::UnicodeString const ucs = icu::UnicodeString::fromUTF8(icu::StringPiece(arg));
  41. std::u32string rval;
  42. size_t const capacity = ucs.countChar32();
  43. rval.resize(capacity);
  44. UErrorCode status = U_ZERO_ERROR;
  45. ucs.toUTF32(reinterpret_cast<int *>(rval.data()), capacity, status);
  46. // This should never occur - unless there's like an alloc error
  47. if (U_FAILURE(status)) {
  48. JVALIDATE_THROW(std::runtime_error, "UTF-32 Translation Error");
  49. }
  50. return rval;
  51. }
  52. }
  53. namespace std {
  54. inline std::ostream & operator<<(std::ostream & os, std::u32string_view str) {
  55. return os << jvalidate::detail::to_u8(str);
  56. }
  57. }
  58. #else
  59. #endif