/** * Utility functions for managing strings, specifically because C++'s * std::string/std::regex is not well suited for UTF8 comprehensions. */ #pragma once #include // IWYU pragma: keep #include #include #if JVALIDATE_HAS_ICU #include #include #endif namespace jvalidate::detail { #if JVALIDATE_HAS_ICU /** * @brief Calculates the string-length of the argument, treating multi-byte * characters and unicode graphemes as single characters (which std::string * cannot do). * * @param arg Any UTF8 compatible string (including a standard ASCII string) * * @returns A number no greater than arg.length(), depending on the number of * graphemes/codepoints in the string. */ inline size_t length(std::string_view arg) { icu::UnicodeString const ucs = icu::UnicodeString::fromUTF8(icu::StringPiece(arg)); return ucs.countChar32(); } #else /** * @brief Calculates the string-length of the argument, without attempting to * parse out graphemes. This method is provided so that it is possible to use * maxLength and minLength constraints even when building without IDNA or ICU * in the toolchain. * * @param arg Any UTF8 compatible string (including a standard ASCII string) * * @returns arg.length() */ inline size_t length(std::string_view arg) { return arg.length(); } #endif }