| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125 |
- #pragma once
- #include <string_view>
- #include <jvalidate/detail/string.h>
- #include <jvalidate/forward.h>
- namespace jvalidate::format::detail {
- template <typename CharT> struct special_case {
- std::basic_string_view<CharT> target;
- bool (*accepts_at)(std::basic_string_view<CharT>, size_t);
- bool accepts(std::basic_string_view<CharT> const str) const {
- for (size_t n = str.find_first_of(target); n != str.npos;
- n = str.find_first_of(target, n + 1)) {
- if (not accepts_at(str, n)) {
- return false;
- }
- }
- return true;
- }
- };
- }
- namespace jvalidate::format::detail {
- constexpr std::u32string_view g_exception_chars =
- U"\u00B7\u00DF\u0375\u03C2\u05F3\u05F4\u0640\u0660\u0661\u0662\u0663\u0664\u0665\u0666\u0667"
- U"\u0668\u0669\u06F0\u06F1\u06F2\u06F3\u06F4\u06F5\u06F6\u06F7\u06F8\u06F9\u06FD\u06FE\u07FA"
- U"\u0F0B\u3007\u302E\u302F\u3031\u3032\u3033\u3034\u3035\u303B\u30FB";
- constexpr std::u32string_view g_exception_disallowed_chars =
- U"\u0640\u07FA\u302E\u302F\u3031\u3032\u3033\u3034\u3035\u303B";
- /**
- * @brief Determine if the character is DISALLOWED by special case in the
- * Exceptions (https://datatracker.ietf.org/doc/html/rfc5892#section-2.6)
- * table.
- */
- inline bool is_not_disallowed_exception(char32_t c) {
- return g_exception_disallowed_chars.find(c) == std::u32string_view::npos;
- }
- /**
- * @brief Determine if the character is in the "Greek" character range.
- */
- inline bool is_greek(char32_t c) { return c >= U'\u0370' && c <= U'\u03FF'; }
- /**
- * @brief Determine if the character is in the "Hebrew" character range.
- */
- inline bool is_hebrew(char32_t c) { return c >= U'\u0590' && c <= U'\u05FF'; }
- /**
- * @brief Determine if the character is in the "Han" (Kanji), "Hiragana", or
- * "Katakana" character ranges, excepting "KATAKANA MIDDLE DOT".
- */
- inline bool is_jp(char32_t c) {
- using P = std::pair<char32_t, char32_t>;
- constexpr std::array range{P{U'\u3040', U'\u30FA'}, P{U'\u30FC', U'\u30FF'},
- P{U'\u4E00', U'\u9FFF'}, P{U'\u3400', U'\u4DBF'},
- P{U'\U00020000', U'\U0002A6DF'}, P{U'\U0002A700', U'\U0002EBEF'},
- P{U'\U00030000', U'\U000323AF'}, P{U'\U0002EBF0', U'\U0002EE5F'},
- P{U'\U000323B0', U'\U0003347F'}, P{U'\uF900', U'\uFAFF'},
- P{U'\u2E80', U'\u303F'}, P{U'\u31C0', U'\u31EF'}};
- return std::ranges::any_of(range, [c](P p) { return c >= p.first && c <= p.second; });
- return (c >= U'\u3040' && c <= U'\u30FF') || (c >= U'\u4e00' && c <= U'\u9fff');
- }
- /**
- * @brief Determine if the character is an ASCII 'l' - required for handling
- * "MIDDLE DOT".
- */
- inline bool is_l_char(char32_t c) { return c == 'l'; }
- }
- namespace jvalidate::format::detail {
- template <auto F> constexpr auto char_before(std::u32string_view str, size_t n) {
- return n != 0 && F(str[n - 1]);
- }
- template <auto F> constexpr auto char_after(std::u32string_view str, size_t n) {
- return (n != str.size() - 1) && F(str[n + 1]);
- }
- template <auto F> constexpr auto before_or_after(std::u32string_view str, size_t n) {
- return char_before<F>(str, n) || char_after<F>(str, n);
- }
- template <auto F> constexpr auto before_and_after(std::u32string_view str, size_t n) {
- return char_before<F>(str, n) && char_after<F>(str, n);
- }
- template <auto F> constexpr auto any_other_char(std::u32string_view str, size_t n) {
- return std::any_of(str.begin(), str.begin() + n, F) ||
- std::any_of(str.begin() + n + 1, str.end(), F);
- }
- }
- namespace jvalidate::format::detail {
- using std::string_view_literals::operator""sv;
- template <typename CharT> struct char_delimiters;
- template <> struct char_delimiters<char> {
- static constexpr std::string_view dotdot{".."};
- static constexpr std::string_view double_slash{"//"};
- static constexpr std::string_view illegal_hostname_chars;
- static constexpr std::string_view punycode_prefix{"xn--"};
- static constexpr std::string_view illegal_dashes_ulabel{"--"};
- static constexpr std::array<special_case<char>, 0> special_cases;
- };
- template <> struct char_delimiters<char32_t> {
- static constexpr std::u32string_view dotdot{U".."};
- static constexpr std::u32string_view double_slash{U"//"};
- static constexpr std::u32string_view punycode_prefix{U"xn--"};
- static constexpr std::u32string_view illegal_dashes_ulabel{U"--"};
- static constexpr std::u32string_view illegal_hostname_chars{U"\u302E"};
- static constexpr std::array special_cases{
- special_case{U"\u0375"sv, char_after<is_greek>},
- special_case{U"\u05f3"sv, char_before<is_hebrew>},
- special_case{U"\u05f4"sv, char_before<is_hebrew>},
- special_case{U"\u00b7"sv, before_and_after<is_l_char>},
- special_case{U"\u30fb"sv, any_other_char<is_jp>},
- special_case{g_exception_chars, before_or_after<is_not_disallowed_exception>},
- };
- };
- }
|