|
|
@@ -0,0 +1,123 @@
|
|
|
+#pragma once
|
|
|
+#include <string_view>
|
|
|
+
|
|
|
+#include <jvalidate/detail/string.h>
|
|
|
+#include <jvalidate/forward.h>
|
|
|
+
|
|
|
+namespace jvalidate::format::detail {
|
|
|
+template <typename CharT> struct special_case {
|
|
|
+ std::basic_string_view<CharT> target;
|
|
|
+ bool (*accepts_at)(std::basic_string_view<CharT>, size_t);
|
|
|
+
|
|
|
+ bool accepts(std::basic_string_view<CharT> const str) const {
|
|
|
+ for (size_t n = str.find_first_of(target); n != str.npos;
|
|
|
+ n = str.find_first_of(target, n + 1)) {
|
|
|
+ if (not accepts_at(str, n)) {
|
|
|
+ return false;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return true;
|
|
|
+ }
|
|
|
+};
|
|
|
+}
|
|
|
+
|
|
|
+namespace jvalidate::format::detail {
|
|
|
+constexpr std::u32string_view g_exception_chars =
|
|
|
+ U"\u00B7\u00DF\u0375\u03C2\u05F3\u05F4\u0640\u0660\u0661\u0662\u0663\u0664\u0665\u0666\u0667"
|
|
|
+ U"\u0668\u0669\u06F0\u06F1\u06F2\u06F3\u06F4\u06F5\u06F6\u06F7\u06F8\u06F9\u06FD\u06FE\u07FA"
|
|
|
+ U"\u0F0B\u3007\u302E\u302F\u3031\u3032\u3033\u3034\u3035\u303B\u30FB";
|
|
|
+constexpr std::u32string_view g_exception_disallowed_chars =
|
|
|
+ U"\u0640\u07FA\u302E\u302F\u3031\u3032\u3033\u3034\u3035\u303B";
|
|
|
+
|
|
|
+/**
|
|
|
+ * @brief Determine if the character is DISALLOWED by special case in the
|
|
|
+ * Exceptions (https://datatracker.ietf.org/doc/html/rfc5892#section-2.6)
|
|
|
+ * table.
|
|
|
+ */
|
|
|
+inline bool is_not_disallowed_exception(char32_t c) {
|
|
|
+ return g_exception_disallowed_chars.find(c) == std::u32string_view::npos;
|
|
|
+}
|
|
|
+
|
|
|
+/**
|
|
|
+ * @brief Determine if the character is in the "Greek" character range.
|
|
|
+ */
|
|
|
+inline bool is_greek(char32_t c) { return c >= U'\u0370' && c <= U'\u03FF'; }
|
|
|
+
|
|
|
+/**
|
|
|
+ * @brief Determine if the character is in the "Hebrew" character range.
|
|
|
+ */
|
|
|
+inline bool is_hebrew(char32_t c) { return c >= U'\u0590' && c <= U'\u05FF'; }
|
|
|
+
|
|
|
+/**
|
|
|
+ * @brief Determine if the character is in the "Han" (Kanji), "Hiragana", or
|
|
|
+ * "Katakana" character ranges, excepting "KATAKANA MIDDLE DOT".
|
|
|
+ */
|
|
|
+inline bool is_jp(char32_t c) {
|
|
|
+ using P = std::pair<char32_t, char32_t>;
|
|
|
+ constexpr std::array range{P{U'\u3040', U'\u30FA'}, P{U'\u30FC', U'\u30FF'},
|
|
|
+ P{U'\u4E00', U'\u9FFF'}, P{U'\u3400', U'\u4DBF'},
|
|
|
+ P{U'\U00020000', U'\U0002A6DF'}, P{U'\U0002A700', U'\U0002EBEF'},
|
|
|
+ P{U'\U00030000', U'\U000323AF'}, P{U'\U0002EBF0', U'\U0002EE5F'},
|
|
|
+ P{U'\U000323B0', U'\U0003347F'}, P{U'\uF900', U'\uFAFF'},
|
|
|
+ P{U'\u2E80', U'\u303F'}, P{U'\u31C0', U'\u31EF'}};
|
|
|
+ return std::ranges::any_of(range, [c](P p) { return c >= p.first && c <= p.second; });
|
|
|
+ return (c >= U'\u3040' && c <= U'\u30FF') || (c >= U'\u4e00' && c <= U'\u9fff');
|
|
|
+}
|
|
|
+
|
|
|
+/**
|
|
|
+ * @brief Determine if the character is an ASCII 'l' - required for handling
|
|
|
+ * "MIDDLE DOT".
|
|
|
+ */
|
|
|
+inline bool is_l_char(char32_t c) { return c == 'l'; }
|
|
|
+}
|
|
|
+
|
|
|
+namespace jvalidate::format::detail {
|
|
|
+template <auto F> constexpr auto char_before(std::u32string_view str, size_t n) {
|
|
|
+ return n != 0 && F(str[n - 1]);
|
|
|
+}
|
|
|
+
|
|
|
+template <auto F> constexpr auto char_after(std::u32string_view str, size_t n) {
|
|
|
+ return (n != str.size() - 1) && F(str[n + 1]);
|
|
|
+}
|
|
|
+
|
|
|
+template <auto F> constexpr auto before_or_after(std::u32string_view str, size_t n) {
|
|
|
+ return char_before<F>(str, n) || char_after<F>(str, n);
|
|
|
+}
|
|
|
+
|
|
|
+template <auto F> constexpr auto before_and_after(std::u32string_view str, size_t n) {
|
|
|
+ return char_before<F>(str, n) && char_after<F>(str, n);
|
|
|
+}
|
|
|
+
|
|
|
+template <auto F> constexpr auto any_other_char(std::u32string_view str, size_t n) {
|
|
|
+ return std::any_of(str.begin(), str.begin() + n, F) ||
|
|
|
+ std::any_of(str.begin() + n + 1, str.end(), F);
|
|
|
+}
|
|
|
+}
|
|
|
+
|
|
|
+namespace jvalidate::format::detail {
|
|
|
+using std::string_view_literals::operator""sv;
|
|
|
+template <typename CharT> struct char_delimiters;
|
|
|
+
|
|
|
+template <> struct char_delimiters<char> {
|
|
|
+ static constexpr std::string_view dotdot{".."};
|
|
|
+ static constexpr std::string_view illegal_hostname_chars;
|
|
|
+ static constexpr std::string_view punycode_prefix{"xn--"};
|
|
|
+ static constexpr std::string_view illegal_dashes_ulabel{"--"};
|
|
|
+ static constexpr std::array<special_case<char>, 0> special_cases;
|
|
|
+};
|
|
|
+
|
|
|
+template <> struct char_delimiters<char32_t> {
|
|
|
+ static constexpr std::u32string_view dotdot{U".."};
|
|
|
+ static constexpr std::u32string_view punycode_prefix{U"xn--"};
|
|
|
+ static constexpr std::u32string_view illegal_dashes_ulabel{U"--"};
|
|
|
+ static constexpr std::u32string_view illegal_hostname_chars{U"\u302E"};
|
|
|
+ static constexpr std::array special_cases{
|
|
|
+ special_case{U"\u0375"sv, char_after<is_greek>},
|
|
|
+ special_case{U"\u05f3"sv, char_before<is_hebrew>},
|
|
|
+ special_case{U"\u05f4"sv, char_before<is_hebrew>},
|
|
|
+ special_case{U"\u00b7"sv, before_and_after<is_l_char>},
|
|
|
+ special_case{U"\u30fb"sv, any_other_char<is_jp>},
|
|
|
+ special_case{g_exception_chars, before_or_after<is_not_disallowed_exception>},
|
|
|
+ };
|
|
|
+};
|
|
|
+}
|