| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103 |
- /**
- * Utility functions for managing strings, specifically because C++'s
- * std::string/std::regex is not well suited for UTF8 comprehensions.
- */
- #pragma once
- #include <jvalidate/_config.h>
- #include <cstring>
- #include <memory>
- #include <string_view>
- #if JVALIDATE_HAS_ICU
- #include <unicode/brkiter.h>
- #include <unicode/unistr.h>
- #endif
- #if JVALIDATE_HAS_IDNA
- #include <ada/idna/unicode_transcoding.h>
- #endif
- #include <jvalidate/detail/expect.h>
- namespace jvalidate::detail {
- inline size_t length_u8(std::string_view arg) { return arg.length(); }
- inline size_t length_u32(std::u32string_view arg) { return arg.length(); }
- inline std::string_view to_u8(std::string_view arg) { return arg; }
- inline std::u32string_view to_u32(std::u32string_view arg) { return arg; }
- }
- namespace jvalidate::detail {
- #if JVALIDATE_HAS_IDNA
- /**
- * @brief Calculates the string-length of the argument, treating multi-byte
- * characters as their individual bytes (as if the string was a std::string).
- *
- * @param arg A string encoded in UTF32
- *
- * @returns A number no greater than 4 * arg.length(), depending on the number
- * of graphemes/codepoints in the string.
- */
- inline size_t length_u8(std::u32string_view arg) {
- return ada::idna::utf8_length_from_utf32(arg.data(), arg.length());
- }
- /**
- * @brief Calculates the string-length of the argument, treating multi-byte
- * characters and unicode graphemes as single characters (which std::string
- * cannot do).
- *
- * @param arg Any UTF8 compatible string (including a standard ASCII string)
- *
- * @returns A number no greater than arg.length(), depending on the number of
- * graphemes/codepoints in the string.
- */
- inline size_t length_u32(std::string_view arg) {
- return ada::idna::utf32_length_from_utf8(arg.data(), arg.length());
- }
- inline std::string to_u8(std::u32string_view str) {
- auto data = std::make_unique_for_overwrite<char[]>(4 * str.length());
- size_t bytes = ada::idna::utf32_to_utf8(str.data(), str.length(), data.get());
- return std::string(data.get(), data.get() + bytes);
- }
- inline std::u32string to_u32(std::string_view str) {
- auto data = std::make_unique_for_overwrite<char32_t[]>(str.length());
- size_t bytes = ada::idna::utf8_to_utf32(str.data(), str.length(), data.get());
- return std::u32string(data.get(), data.get() + bytes);
- }
- #elif JVALIDATE_HAS_ICU
- inline size_t length_u32(std::string_view arg) {
- icu::UnicodeString ucs = icu::UnicodeString::fromUTF8(icu::StringPiece(arg));
- return ucs.countChar32();
- }
- #endif
- #if JVALIDATE_HAS_IDNA || JVALIDATE_HAS_ICU
- /**
- * @brief A proxy for jvalidate::detail::length_u32. This method is provided
- * so that it is possible to use maxLength and minLength constraints even when
- * building without IDNA or ICU in the toolchain.
- *
- * @param arg Any UTF8 compatible string (including a standard ASCII string)
- *
- * @returns A number no greater than arg.length(), depending on the number of
- * graphemes/codepoints in the string.
- */
- inline size_t length(std::string_view arg) { return length_u32(arg); }
- #else
- /**
- * @brief Calculates the string-length of the argument, without attempting to
- * parse out graphemes. This method is provided so that it is possible to use
- * maxLength and minLength constraints even when building without IDNA or ICU
- * in the toolchain.
- *
- * @param arg Any UTF8 compatible string (including a standard ASCII string)
- *
- * @returns arg.length()
- */
- inline size_t length(std::string_view arg) { return arg.length(); }
- #endif
- }
|