#pragma once #include #include #include namespace jvalidate::format::detail { template struct special_case { std::basic_string_view target; bool (*accepts_at)(std::basic_string_view, size_t); bool accepts(std::basic_string_view const str) const { for (size_t n = str.find_first_of(target); n != str.npos; n = str.find_first_of(target, n + 1)) { if (not accepts_at(str, n)) { return false; } } return true; } }; } namespace jvalidate::format::detail { constexpr std::u32string_view g_exception_chars = U"\u00B7\u00DF\u0375\u03C2\u05F3\u05F4\u0640\u0660\u0661\u0662\u0663\u0664\u0665\u0666\u0667" U"\u0668\u0669\u06F0\u06F1\u06F2\u06F3\u06F4\u06F5\u06F6\u06F7\u06F8\u06F9\u06FD\u06FE\u07FA" U"\u0F0B\u3007\u302E\u302F\u3031\u3032\u3033\u3034\u3035\u303B\u30FB"; constexpr std::u32string_view g_exception_disallowed_chars = U"\u0640\u07FA\u302E\u302F\u3031\u3032\u3033\u3034\u3035\u303B"; /** * @brief Determine if the character is DISALLOWED by special case in the * Exceptions (https://datatracker.ietf.org/doc/html/rfc5892#section-2.6) * table. */ inline bool is_not_disallowed_exception(char32_t c) { return g_exception_disallowed_chars.find(c) == std::u32string_view::npos; } /** * @brief Determine if the character is in the "Greek" character range. */ inline bool is_greek(char32_t c) { return c >= U'\u0370' && c <= U'\u03FF'; } /** * @brief Determine if the character is in the "Hebrew" character range. */ inline bool is_hebrew(char32_t c) { return c >= U'\u0590' && c <= U'\u05FF'; } /** * @brief Determine if the character is in the "Han" (Kanji), "Hiragana", or * "Katakana" character ranges, excepting "KATAKANA MIDDLE DOT". */ inline bool is_jp(char32_t c) { using P = std::pair; constexpr std::array range{P{U'\u3040', U'\u30FA'}, P{U'\u30FC', U'\u30FF'}, P{U'\u4E00', U'\u9FFF'}, P{U'\u3400', U'\u4DBF'}, P{U'\U00020000', U'\U0002A6DF'}, P{U'\U0002A700', U'\U0002EBEF'}, P{U'\U00030000', U'\U000323AF'}, P{U'\U0002EBF0', U'\U0002EE5F'}, P{U'\U000323B0', U'\U0003347F'}, P{U'\uF900', U'\uFAFF'}, P{U'\u2E80', U'\u303F'}, P{U'\u31C0', U'\u31EF'}}; return std::ranges::any_of(range, [c](P p) { return c >= p.first && c <= p.second; }); return (c >= U'\u3040' && c <= U'\u30FF') || (c >= U'\u4e00' && c <= U'\u9fff'); } /** * @brief Determine if the character is an ASCII 'l' - required for handling * "MIDDLE DOT". */ inline bool is_l_char(char32_t c) { return c == 'l'; } } namespace jvalidate::format::detail { template constexpr auto char_before(std::u32string_view str, size_t n) { return n != 0 && F(str[n - 1]); } template constexpr auto char_after(std::u32string_view str, size_t n) { return (n != str.size() - 1) && F(str[n + 1]); } template constexpr auto before_or_after(std::u32string_view str, size_t n) { return char_before(str, n) || char_after(str, n); } template constexpr auto before_and_after(std::u32string_view str, size_t n) { return char_before(str, n) && char_after(str, n); } template constexpr auto any_other_char(std::u32string_view str, size_t n) { return std::any_of(str.begin(), str.begin() + n, F) || std::any_of(str.begin() + n + 1, str.end(), F); } } namespace jvalidate::format::detail { using std::string_view_literals::operator""sv; template struct char_delimiters; template <> struct char_delimiters { static constexpr std::string_view dotdot{".."}; static constexpr std::string_view illegal_hostname_chars; static constexpr std::string_view punycode_prefix{"xn--"}; static constexpr std::string_view illegal_dashes_ulabel{"--"}; static constexpr std::array, 0> special_cases; }; template <> struct char_delimiters { static constexpr std::u32string_view dotdot{U".."}; static constexpr std::u32string_view punycode_prefix{U"xn--"}; static constexpr std::u32string_view illegal_dashes_ulabel{U"--"}; static constexpr std::u32string_view illegal_hostname_chars{U"\u302E"}; static constexpr std::array special_cases{ special_case{U"\u0375"sv, char_after}, special_case{U"\u05f3"sv, char_before}, special_case{U"\u05f4"sv, char_before}, special_case{U"\u00b7"sv, before_and_after}, special_case{U"\u30fb"sv, any_other_char}, special_case{g_exception_chars, before_or_after}, }; }; }