string.h 1.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445
  1. /**
  2. * Utility functions for managing strings, specifically because C++'s
  3. * std::string/std::regex is not well suited for UTF8 comprehensions.
  4. */
  5. #pragma once
  6. #include <jvalidate/_config.h>
  7. #include <cstring>
  8. #include <string_view>
  9. #if JVALIDATE_HAS_ICU
  10. #include <unicode/brkiter.h>
  11. #include <unicode/unistr.h>
  12. #endif
  13. namespace jvalidate::detail {
  14. #if JVALIDATE_HAS_ICU
  15. /**
  16. * @brief Calculates the string-length of the argument, treating multi-byte
  17. * characters and unicode graphemes as single characters (which std::string
  18. * cannot do).
  19. *
  20. * @param arg Any UTF8 compatible string (including a standard ASCII string)
  21. *
  22. * @returns A number no greater than arg.length(), depending on the number of
  23. * graphemes/codepoints in the string.
  24. */
  25. inline size_t length(std::string_view arg) {
  26. icu::UnicodeString ucs = icu::UnicodeString::fromUTF8(icu::StringPiece(arg));
  27. return ucs.countChar32();
  28. }
  29. #else
  30. /**
  31. * @brief Calculates the string-length of the argument, without attempting to
  32. * parse out graphemes. This method is provided so that it is possible to use
  33. * maxLength and minLength constraints even when building without IDNA or ICU
  34. * in the toolchain.
  35. *
  36. * @param arg Any UTF8 compatible string (including a standard ASCII string)
  37. *
  38. * @returns arg.length()
  39. */
  40. inline size_t length(std::string_view arg) { return arg.length(); }
  41. #endif
  42. }