/** * Utility functions for managing strings, specifically because C++'s * std::string/std::regex is not well suited for UTF8 comprehensions. */ #pragma once #include // IWYU pragma: keep #include #include #include #if JVALIDATE_HAS_ICU #include #include #endif #if JVALIDATE_HAS_IDNA #include #endif #include namespace jvalidate::detail { inline std::string_view to_u8(std::string_view arg) { return arg; } inline std::u32string_view to_u32(std::u32string_view arg) { return arg; } } namespace jvalidate::detail { #if JVALIDATE_HAS_IDNA /** * @brief Calculates the string-length of the argument, treating multi-byte * characters as their individual bytes (as if the string was a std::string). * * @param arg Any UTF8 compatible string (including a standard ASCII string) * * @returns A number no greater than arg.length(), depending on the number of * graphemes/codepoints in the string. */ inline size_t length(std::string_view arg) { return ada::idna::utf32_length_from_utf8(arg.data(), arg.length()); } inline std::string to_u8(std::u32string_view str) { auto data = std::make_unique_for_overwrite(4 * str.length()); size_t bytes = ada::idna::utf32_to_utf8(str.data(), str.length(), data.get()); return std::string(data.get(), data.get() + bytes); } inline std::u32string to_u32(std::string_view str) { auto data = std::make_unique_for_overwrite(str.length()); size_t bytes = ada::idna::utf8_to_utf32(str.data(), str.length(), data.get()); return std::u32string(data.get(), data.get() + bytes); } #elif JVALIDATE_HAS_ICU /** * @brief Calculates the string-length of the argument, treating multi-byte * characters and unicode graphemes as single characters (which std::string * cannot do). * * @param arg Any UTF8 compatible string (including a standard ASCII string) * * @returns A number no greater than arg.length(), depending on the number of * graphemes/codepoints in the string. */ inline size_t length(std::string_view arg) { icu::UnicodeString const ucs = icu::UnicodeString::fromUTF8(icu::StringPiece(arg)); return ucs.countChar32(); } #else /** * @brief Calculates the string-length of the argument, without attempting to * parse out graphemes. This method is provided so that it is possible to use * maxLength and minLength constraints even when building without IDNA or ICU * in the toolchain. * * @param arg Any UTF8 compatible string (including a standard ASCII string) * * @returns arg.length() */ inline size_t length(std::string_view arg) { return arg.length(); } #endif }