/** * Utility functions for managing strings, specifically because C++'s * std::string/std::regex is not well suited for UTF8 comprehensions. */ #pragma once #include #include #include #include #if JVALIDATE_HAS_IDNA #include #endif #include namespace jvalidate::detail { inline size_t length_u8(std::string_view arg) { return arg.length(); } inline size_t length_u32(std::u32string_view arg) { return arg.length(); } inline std::string_view to_u8(std::string_view arg) { return arg; } inline std::u32string_view to_u32(std::u32string_view arg) { return arg; } } #if JVALIDATE_HAS_IDNA namespace jvalidate::detail { /** * @brief Calclates the string-length of the argument, treating multi-byte * characters as their individual bytes (as if the string was a std::string). * * @param arg A string encoded in UTF32 * * @returns A number no greater than 4 * arg.length(), depending on the number * of graphemes/codepoints in the string. */ inline size_t length_u8(std::u32string_view arg) { return ada::idna::utf8_length_from_utf32(arg.data(), arg.length()); } /** * @brief Calclates the string-length of the argument, treating multi-byte * characters and unicode graphemes as single characters (which std::string * cannot do). * * @param arg Any UTF8 compatible string (including a standard ASCII string) * * @returns A number no greater than arg.length(), depending on the number of * graphemes/codepoints in the string. */ inline size_t length_u32(std::string_view arg) { return ada::idna::utf32_length_from_utf8(arg.data(), arg.length()); } inline std::string to_u8(std::u32string_view str) { auto data = std::make_unique_for_overwrite(4 * str.length()); size_t bytes = ada::idna::utf32_to_utf8(str.data(), str.length(), data.get()); return std::string(data.get(), data.get() + bytes); } inline std::u32string to_u32(std::string_view str) { auto data = std::make_unique_for_overwrite(str.length()); size_t bytes = ada::idna::utf8_to_utf32(str.data(), str.length(), data.get()); return std::u32string(data.get(), data.get() + bytes); } } #endif