| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970 |
- /**
- * Utility functions for managing strings, specifically because C++'s
- * std::string/std::regex is not well suited for UTF8 comprehensions.
- */
- #pragma once
- #include <ostream>
- #include <string>
- #include <string_view>
- #if __has_include(<unicode/std_string.h>)
- #define JVALIDATE_HAS_ICU
- #include <unicode/brkiter.h>
- #include <unicode/unistr.h>
- #endif
- #include <jvalidate/detail/expect.h>
- #ifdef JVALIDATE_HAS_ICU
- namespace jvalidate::detail {
- /**
- * @brief Calclates the string-length of the argument, treating multi-byte
- * characters an unicode graphemes as single characters (which std::string
- * cannot do).
- *
- * @param arg Any UTF8 compatible string (including a standard ASCII string)
- *
- * @returns A number no greater than arg.size(), depending on the number of
- * graphemes/codepoints in the string.
- */
- inline size_t length(std::string_view arg) {
- icu::UnicodeString ucs = icu::UnicodeString::fromUTF8(icu::StringPiece(arg));
- return ucs.countChar32();
- }
- inline std::string_view to_u8(std::string_view arg) { return arg; }
- inline std::string to_u8(std::u32string_view arg) {
- icu::UnicodeString const ucs =
- icu::UnicodeString::fromUTF32(reinterpret_cast<int const *>(arg.data()), arg.size());
- std::string out;
- return ucs.toUTF8String(out);
- }
- inline std::u32string_view to_u32(std::u32string_view arg) { return arg; }
- inline std::u32string to_u32(std::string_view arg) {
- icu::UnicodeString const ucs = icu::UnicodeString::fromUTF8(icu::StringPiece(arg));
- std::u32string rval;
- size_t const capacity = ucs.countChar32();
- rval.resize(capacity);
- UErrorCode status = U_ZERO_ERROR;
- ucs.toUTF32(reinterpret_cast<int *>(rval.data()), capacity, status);
- // This should never occur - unless there's like an alloc error
- if (U_FAILURE(status)) {
- JVALIDATE_THROW(std::runtime_error, "UTF-32 Translation Error");
- }
- return rval;
- }
- }
- namespace std {
- inline std::ostream & operator<<(std::ostream & os, std::u32string_view str) {
- return os << jvalidate::detail::to_u8(str);
- }
- }
- #else
- #endif
|