string.h 2.2 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768
  1. /**
  2. * Utility functions for managing strings, specifically because C++'s
  3. * std::string/std::regex is not well suited for UTF8 comprehensions.
  4. */
  5. #pragma once
  6. #include <jvalidate/_config.h>
  7. #include <cstring>
  8. #include <memory>
  9. #include <string>
  10. #include <string_view>
  11. #if JVALIDATE_HAS_IDNA
  12. #include <ada/idna/unicode_transcoding.h>
  13. #endif
  14. #include <jvalidate/detail/expect.h>
  15. namespace jvalidate::detail {
  16. inline size_t length_u8(std::string_view arg) { return arg.length(); }
  17. inline size_t length_u32(std::u32string_view arg) { return arg.length(); }
  18. inline std::string_view to_u8(std::string_view arg) { return arg; }
  19. inline std::u32string_view to_u32(std::u32string_view arg) { return arg; }
  20. }
  21. #if JVALIDATE_HAS_IDNA
  22. namespace jvalidate::detail {
  23. /**
  24. * @brief Calclates the string-length of the argument, treating multi-byte
  25. * characters as their individual bytes (as if the string was a std::string).
  26. *
  27. * @param arg A string encoded in UTF32
  28. *
  29. * @returns A number no greater than 4 * arg.length(), depending on the number
  30. * of graphemes/codepoints in the string.
  31. */
  32. inline size_t length_u8(std::u32string_view arg) {
  33. return ada::idna::utf8_length_from_utf32(arg.data(), arg.length());
  34. }
  35. /**
  36. * @brief Calclates the string-length of the argument, treating multi-byte
  37. * characters and unicode graphemes as single characters (which std::string
  38. * cannot do).
  39. *
  40. * @param arg Any UTF8 compatible string (including a standard ASCII string)
  41. *
  42. * @returns A number no greater than arg.length(), depending on the number of
  43. * graphemes/codepoints in the string.
  44. */
  45. inline size_t length_u32(std::string_view arg) {
  46. return ada::idna::utf32_length_from_utf8(arg.data(), arg.length());
  47. }
  48. inline std::string to_u8(std::u32string_view str) {
  49. auto data = std::make_unique_for_overwrite<char[]>(4 * str.length());
  50. size_t bytes = ada::idna::utf32_to_utf8(str.data(), str.length(), data.get());
  51. return std::string(data.get(), data.get() + bytes);
  52. }
  53. inline std::u32string to_u32(std::string_view str) {
  54. auto data = std::make_unique_for_overwrite<char32_t[]>(str.length());
  55. size_t bytes = ada::idna::utf8_to_utf32(str.data(), str.length(), data.get());
  56. return std::u32string(data.get(), data.get() + bytes);
  57. }
  58. }
  59. #endif