string.h 965 B

1234567891011121314151617181920212223242526272829303132333435
  1. /**
  2. * Utility functions for managing strings, specifically because C++'s
  3. * std::string/std::regex is not well suited for UTF8 comprehensions.
  4. */
  5. #pragma once
  6. #include <jvalidate/_config.h>
  7. #include <cstring>
  8. #if JVALIDATE_HAS_ICU
  9. #include <unicode/brkiter.h>
  10. #include <unicode/unistr.h>
  11. #endif
  12. namespace jvalidate::detail {
  13. /**
  14. * @brief Calclates the string-length of the argument, treating multi-byte
  15. * characters and unicode graphemes as single characters (which std::string
  16. * cannot do).
  17. *
  18. * @param arg Any UTF8 compatible string (including a standard ASCII string)
  19. *
  20. * @returns A number no greater than arg.length(), depending on the number of
  21. * graphemes/codepoints in the string.
  22. */
  23. #if JVALIDATE_HAS_ICU
  24. inline size_t length(std::string_view arg) {
  25. icu::UnicodeString ucs = icu::UnicodeString::fromUTF8(icu::StringPiece(arg));
  26. return ucs.countChar32();
  27. }
  28. #else
  29. inline size_t length(std::string_view arg) { return arg.length(); }
  30. #endif
  31. }