59 Commitit d1f4328e3f ... 26adf67f22

Tekijä SHA1 Viesti Päivämäärä
  Sam Jaffe 26adf67f22 Merge branch 'feat/format-matcher' into refactor/expected 1 viikko sitten
  Sam Jaffe af414740e4 refactor: remove unreachable line 1 viikko sitten
  Sam Jaffe 5b3e2b24f7 fix: compiler errors and test regression(s) 1 viikko sitten
  Sam Jaffe cbf59019cb Merge branch 'master' into feat/format-matcher 1 viikko sitten
  Sam Jaffe 438808fdfb Merge branch 'master' into feat/format-matcher 2 viikkoa sitten
  Sam Jaffe e92a647192 fix: inverted condition 2 viikkoa sitten
  Sam Jaffe baed30a69a chore: delete submodule 2 viikkoa sitten
  Sam Jaffe 14ae172ad0 Merge branch 'master' into feat/format-matcher 2 viikkoa sitten
  Sam Jaffe bfaa0f2924 fix: allow CMake to decide the JVALIDATE_USE macros 2 viikkoa sitten
  Sam Jaffe 8b6011e809 chore: attach to all test cases, since has-include is too good 2 viikkoa sitten
  Sam Jaffe d208bee4f4 chore: merge yes/no branches for adding ICU/IDNA 2 viikkoa sitten
  Sam Jaffe 40d5876d14 fix: make it so test cases will properly be filtered if either of IDNA and/or ICU are missing 2 viikkoa sitten
  Sam Jaffe a49bf7bfce refactor: remove length_u8 and length_u32 as they are no longer relevant for format validation 2 viikkoa sitten
  Sam Jaffe ef9be256fe fix: re-allow empty strings in ipv6-style URI/IRI 2 viikkoa sitten
  Sam Jaffe 09bdb0a972 fix: update format code to account for new tests 2 viikkoa sitten
  Sam Jaffe f8ae75c4f9 Merge branch 'master' into feat/format-matcher 2 viikkoa sitten
  Sam Jaffe e5db5e3730 chore: update to Test-JSON-Schema-Acceptance-1.037 2 viikkoa sitten
  Sam Jaffe 0e4dfd7965 test-fix: allow for CMake to provide the testdir 2 viikkoa sitten
  Sam Jaffe e81526a6c4 chore: update Makefile to run all supported tests 2 viikkoa sitten
  Sam Jaffe 436f35a923 refactor: move construction of FormatValidator into Validator, allowing the user to provide user-defined-format codes 2 viikkoa sitten
  Sam Jaffe aa75d9a8f5 chore: fix typo 2 viikkoa sitten
  Sam Jaffe f22fb02f21 feat: implement draft03 format phone numbers according to E.123 format 2 viikkoa sitten
  Sam Jaffe 1f274c425d refactor: renaming in FormatValidator 2 viikkoa sitten
  Sam Jaffe 704864c10e feat: add support for Draft03-format keywords: date-time, ip-address, host-name (alt. name) and time, utc-millisec, color (new/changed) 2 viikkoa sitten
  Sam Jaffe 741a03cde6 Merge branch 'master' into feat/format-matcher 2 viikkoa sitten
  Sam Jaffe 7e34f035fb chore: disable ecmascript format test 2 viikkoa sitten
  Sam Jaffe 4db4a02747 Merge branch 'master' into feat/format-matcher 2 viikkoa sitten
  Sam Jaffe d3e5492e1f Merge branch 'master' into feat/format-matcher 2 viikkoa sitten
  Sam Jaffe 266137e95c Merge branch 'master' into feat/format-matcher 2 viikkoa sitten
  Sam Jaffe be8d8dcf1d feat: add ICU Regex Engine option 7 kuukautta sitten
  Sam Jaffe 1c382e8a53 docs: add more explaination in format.h 7 kuukautta sitten
  Sam Jaffe aa196e3a5d chore: fix filtering of test cases 7 kuukautta sitten
  Sam Jaffe 29bdafb5a8 feat: implement uri_template format 7 kuukautta sitten
  Sam Jaffe c2dedc8efe feat: implmenet uri-reference and iri-reference 7 kuukautta sitten
  Sam Jaffe da0d0a1dd8 feat: implement iri format testing 7 kuukautta sitten
  Sam Jaffe 1c0127945e feat: implement uri-format (excluding IPvFuture) 7 kuukautta sitten
  Sam Jaffe 5c18d15254 refactor: move simple macros to _macro.h, add IIF 7 kuukautta sitten
  Sam Jaffe a7f8212906 refactor: disable I18N formats when missing IDNA library 7 kuukautta sitten
  Sam Jaffe 7fe69c0e39 refactor: switch utf8/utf32 conversions to use idna over icu 7 kuukautta sitten
  Sam Jaffe 5bea563a43 refactor: remove regex_escape function 7 kuukautta sitten
  Sam Jaffe 71c2c84e49 fix: add leapsecond support 7 kuukautta sitten
  Sam Jaffe 58959a21ee feat: implement hostname/idn-hostname 7 kuukautta sitten
  Sam Jaffe 6dc36beb9b refactor: missing include for std::isalnum 7 kuukautta sitten
  Sam Jaffe 9ab9476d66 fix: string-bounds 7 kuukautta sitten
  Sam Jaffe bbd23791e9 refactor: cleanup ICU string usage (TODO non-icu support) 7 kuukautta sitten
  Sam Jaffe 53e3015fea feat: prospective idn-email and idn-hostname support 8 kuukautta sitten
  Sam Jaffe ea5e823892 refactor: remove extra guard on "json-pointer" format 8 kuukautta sitten
  Sam Jaffe c324da4c97 Merge branch 'master' into feat/format-matcher 8 kuukautta sitten
  Sam Jaffe 9da98fae0c fix: add expect in JSON-Pointer for leading / 8 kuukautta sitten
  Sam Jaffe 2720f2f9f2 refactor: cleanup 8 kuukautta sitten
  Sam Jaffe f477659a6f chore: add format tests to MAKE 8 kuukautta sitten
  Sam Jaffe b668d68059 refactor: pass regex engine into format validator 8 kuukautta sitten
  Sam Jaffe a13c6de001 refactor: compress details in vocabulary::Metadata w/ enum 8 kuukautta sitten
  Sam Jaffe 2699edfbd7 refactor: implement required-keywords 8 kuukautta sitten
  Sam Jaffe cc172b0678 fix: validation on RelativePointer for e.g. "01/A" 8 kuukautta sitten
  Sam Jaffe 68804cd7c3 fix: JSON-Pointer handling is looser in the schema handler than the format handler 8 kuukautta sitten
  Sam Jaffe 7525ba870f refactor: add validator for json-pointer and relative-json-pointer 8 kuukautta sitten
  Sam Jaffe c027e6c5ba refactor/fix: make integer parsing for Pointer require all chars in the string 8 kuukautta sitten
  Sam Jaffe 49623e7f72 refactor: don't use optional for relative pointer 8 kuukautta sitten

+ 7 - 0
include/jvalidate/_config.h

@@ -6,3 +6,10 @@
 #else
 #define JVALIDATE_HAS_ICU 0
 #endif
+
+#if defined(JVALIDATE_HAS_IDNA)
+#elif __has_include(<ada/idna/to_unicode.h>)
+#define JVALIDATE_HAS_IDNA 1
+#else
+#define JVALIDATE_HAS_IDNA 0
+#endif

+ 1 - 1
include/jvalidate/constraint.h

@@ -745,7 +745,7 @@ public:
    * @throws If the contained value is not interpretable as a string
    */
   static auto format(detail::ParserContext<A> const & context) {
-    return ptr(constraint::FormatConstraint{context.schema.as_string(),
+    return ptr(constraint::FormatConstraint{context.schema.as_string(), context.vocab->version(),
                                             context.vocab->is_format_assertion()});
   }
 

+ 2 - 0
include/jvalidate/constraint/string_constraint.h

@@ -3,6 +3,7 @@
 #include <string>
 
 #include <jvalidate/detail/string.h>
+#include <jvalidate/enum.h>
 #include <jvalidate/forward.h>
 
 namespace jvalidate::constraint {
@@ -53,6 +54,7 @@ struct PatternConstraint {
  */
 struct FormatConstraint {
   std::string format;
+  schema::Version for_version;
   bool is_assertion;
 };
 }

+ 131 - 0
include/jvalidate/detail/idna_special_cases.h

@@ -0,0 +1,131 @@
+#pragma once
+#include <string_view>
+
+#include <jvalidate/detail/string.h>
+#include <jvalidate/forward.h>
+
+namespace jvalidate::format::detail {
+template <typename CharT> struct special_case {
+  std::basic_string_view<CharT> target;
+  bool (*accepts_at)(std::basic_string_view<CharT>, size_t);
+
+  bool accepts(std::basic_string_view<CharT> const str) const {
+    for (size_t n = str.find_first_of(target); n != str.npos;
+         n = str.find_first_of(target, n + 1)) {
+      if (not accepts_at(str, n)) {
+        return false;
+      }
+    }
+    return true;
+  }
+};
+}
+
+namespace jvalidate::format::detail {
+constexpr std::u32string_view g_exception_chars =
+    U"\u00B7\u00DF\u0375\u03C2\u05F3\u05F4\u0640\u0660\u0661\u0662\u0663\u0664\u0665\u0666\u0667"
+    U"\u0668\u0669\u06F0\u06F1\u06F2\u06F3\u06F4\u06F5\u06F6\u06F7\u06F8\u06F9\u06FD\u06FE\u07FA"
+    U"\u0F0B\u3007\u302E\u302F\u3031\u3032\u3033\u3034\u3035\u303B\u30FB";
+constexpr std::u32string_view g_exception_disallowed_chars =
+    U"\u0640\u07FA\u302E\u302F\u3031\u3032\u3033\u3034\u3035\u303B";
+
+/**
+ * @brief Determine if the character is DISALLOWED by special case in the
+ * Exceptions (https://datatracker.ietf.org/doc/html/rfc5892#section-2.6)
+ * table.
+ */
+inline bool is_not_disallowed_exception(char32_t c) {
+  return g_exception_disallowed_chars.find(c) == std::u32string_view::npos;
+}
+
+/**
+ * @brief Determine if the character is in the "Greek" character range.
+ */
+inline bool is_greek(char32_t c) { return c >= U'\u0370' && c <= U'\u03FF'; }
+
+/**
+ * @brief Determine if the character is in the "Hebrew" character range.
+ */
+inline bool is_hebrew(char32_t c) { return c >= U'\u0590' && c <= U'\u05FF'; }
+
+/**
+ * @brief Determine if the character is in the "Han" (Kanji), "Hiragana", or
+ * "Katakana" character ranges, excepting "KATAKANA MIDDLE DOT".
+ */
+inline bool is_jp(char32_t c) {
+  using P = std::pair<char32_t, char32_t>;
+  constexpr std::array range{P{U'\u3040', U'\u30FA'},         P{U'\u30FC', U'\u30FF'},
+                             P{U'\u4E00', U'\u9FFF'},         P{U'\u3400', U'\u4DBF'},
+                             P{U'\U00020000', U'\U0002A6DF'}, P{U'\U0002A700', U'\U0002EBEF'},
+                             P{U'\U00030000', U'\U000323AF'}, P{U'\U0002EBF0', U'\U0002EE5F'},
+                             P{U'\U000323B0', U'\U0003347F'}, P{U'\uF900', U'\uFAFF'},
+                             P{U'\u2E80', U'\u303F'},         P{U'\u31C0', U'\u31EF'}};
+  return std::ranges::any_of(range, [c](P p) { return c >= p.first && c <= p.second; });
+}
+
+/**
+ * @brief Determine if the character is an ASCII 'l' - required for handling
+ * "MIDDLE DOT".
+ */
+inline bool is_l_char(char32_t c) { return c == 'l'; }
+}
+
+namespace jvalidate::format::detail {
+template <auto F> constexpr auto char_before(std::u32string_view str, size_t n) {
+  return n != 0 && F(str[n - 1]);
+}
+
+template <auto F> constexpr auto char_after(std::u32string_view str, size_t n) {
+  return (n != str.size() - 1) && F(str[n + 1]);
+}
+
+template <auto F> constexpr auto before_or_after(std::u32string_view str, size_t n) {
+  return char_before<F>(str, n) || char_after<F>(str, n);
+}
+
+template <auto F> constexpr auto before_and_after(std::u32string_view str, size_t n) {
+  return char_before<F>(str, n) && char_after<F>(str, n);
+}
+
+template <auto F> constexpr auto any_other_char(std::u32string_view str, size_t n) {
+  return std::any_of(str.begin(), str.begin() + n, F) ||
+         std::any_of(str.begin() + n + 1, str.end(), F);
+}
+}
+
+namespace jvalidate::format::detail {
+using std::string_view_literals::operator""sv;
+template <typename CharT> struct char_delimiters;
+
+template <> struct char_delimiters<char> {
+  static constexpr std::string_view hostname_part_delims{"."};
+  static constexpr std::string_view dotdot{".."};
+  static constexpr std::string_view double_slash{"//"};
+  static constexpr std::string_view illegal_hostname_chars;
+  static constexpr std::string_view punycode_prefix{"xn--"};
+  static constexpr std::string_view illegal_dashes_ulabel{"--"};
+  static constexpr std::array<special_case<char>, 0> special_cases;
+};
+
+template <> struct char_delimiters<char32_t> {
+  static constexpr std::u32string_view hostname_part_delims{U".\u3002\uff0e\uff61"};
+  static constexpr std::u32string_view dotdot{U".."};
+  static constexpr std::u32string_view double_slash{U"//"};
+  static constexpr std::u32string_view punycode_prefix{U"xn--"};
+  static constexpr std::u32string_view illegal_dashes_ulabel{U"--"};
+  static constexpr std::u32string_view illegal_hostname_chars{U"\u302E"};
+  static constexpr std::array special_cases{
+      special_case{U"\u0375"sv, char_after<is_greek>},
+      special_case{U"\u05f3"sv, char_before<is_hebrew>},
+      special_case{U"\u05f4"sv, char_before<is_hebrew>},
+      special_case{U"\u00b7"sv, before_and_after<is_l_char>},
+      special_case{U"\u30fb"sv, any_other_char<is_jp>},
+      special_case{g_exception_chars, before_or_after<is_not_disallowed_exception>},
+  };
+};
+
+template <typename CharT> bool is_special_case_ok(std::basic_string_view<CharT> name) {
+  return std::ranges::all_of(detail::char_delimiters<CharT>::special_cases,
+                             [name](auto & sc) { return sc.accepts(name); });
+}
+}

+ 38 - 1
include/jvalidate/detail/string.h

@@ -6,6 +6,7 @@
 #include <jvalidate/_config.h>
 
 #include <cstring>
+#include <memory>
 #include <string_view>
 
 #if JVALIDATE_HAS_ICU
@@ -13,8 +14,44 @@
 #include <unicode/unistr.h>
 #endif
 
+#if JVALIDATE_HAS_IDNA
+#include <ada/idna/unicode_transcoding.h>
+#endif
+
+#include <jvalidate/detail/expect.h>
+
 namespace jvalidate::detail {
-#if JVALIDATE_HAS_ICU
+inline std::string_view to_u8(std::string_view arg) { return arg; }
+inline std::u32string_view to_u32(std::u32string_view arg) { return arg; }
+}
+
+namespace jvalidate::detail {
+#if JVALIDATE_HAS_IDNA
+/**
+ * @brief Calculates the string-length of the argument, treating multi-byte
+ * characters as their individual bytes (as if the string was a std::string).
+ *
+ * @param arg Any UTF8 compatible string (including a standard ASCII string)
+ *
+ * @returns A number no greater than arg.length(), depending on the number of
+ * graphemes/codepoints in the string.
+ */
+inline size_t length(std::string_view arg) {
+  return ada::idna::utf32_length_from_utf8(arg.data(), arg.length());
+}
+
+inline std::string to_u8(std::u32string_view str) {
+  auto data = std::make_unique_for_overwrite<char[]>(4 * str.length());
+  size_t bytes = ada::idna::utf32_to_utf8(str.data(), str.length(), data.get());
+  return std::string(data.get(), data.get() + bytes);
+}
+
+inline std::u32string to_u32(std::string_view str) {
+  auto data = std::make_unique_for_overwrite<char32_t[]>(str.length());
+  size_t bytes = ada::idna::utf8_to_utf32(str.data(), str.length(), data.get());
+  return std::u32string(data.get(), data.get() + bytes);
+}
+#elif JVALIDATE_HAS_ICU
 /**
  * @brief Calculates the string-length of the argument, treating multi-byte
  * characters and unicode graphemes as single characters (which std::string

+ 569 - 77
include/jvalidate/format.h

@@ -1,16 +1,72 @@
 #pragma once
+#include <cstdio>
+#include <functional>
+#include <jvalidate/_macro.h>
 
 #include <cctype>
+#include <chrono>
 #include <cstddef>
+#include <cstring>
 #include <ctime>
-#include <iostream>
 #include <string>
+#include <string_view>
+#include <system_error>
 #include <unordered_map>
+#include <unordered_set>
 #include <utility>
 
+#if JVALIDATE_HAS_IDNA
+#include <ada/idna/to_unicode.h>
+#include <ada/idna/validity.h>
+#endif
+
+#include <jvalidate/detail/expect.h>
+#include <jvalidate/detail/idna_special_cases.h>
+#include <jvalidate/detail/pointer.h>
+#include <jvalidate/detail/relative_pointer.h>
+#include <jvalidate/detail/string.h>
+#include <jvalidate/enum.h>
 #include <jvalidate/forward.h>
 
+#define CONSTRUCTS(TYPE) format::ctor_as_valid<detail::TYPE>
+
+#define UTF32(FN) JVALIDATE_IIF(JVALIDATE_HAS_IDNA, format::utf32<format::FN<char32_t>>, nullptr)
+
+namespace jvalidate::format {
+bool date(std::string_view dt);
+bool time(std::string_view dt);
+bool date_time(std::string_view dt);
+bool duration(std::string_view dur);
+
+template <typename CharT = char> bool uri(std::basic_string_view<CharT> uri);
+template <typename CharT = char> bool uri_reference(std::basic_string_view<CharT> uri);
+bool uri_template(std::u32string_view uri);
+bool uuid(std::string_view id);
+template <typename CharT = char> bool hostname(std::basic_string_view<CharT> name);
+
+bool ipv4(std::string_view ip);
+bool ipv6(std::string_view ip);
+
+template <typename CharT = char> bool email(std::basic_string_view<CharT> em);
+}
+
 namespace jvalidate::format::detail {
+inline bool is_dec(std::string_view s, size_t min = 0, size_t max = std::string_view::npos) {
+  constexpr char const * g_dec_digits = "0123456789";
+  return s.find_first_not_of(g_dec_digits) == std::string::npos && s.size() >= min &&
+         s.size() <= max;
+}
+
+inline bool is_hex(std::string_view s) {
+  constexpr char const * g_hex_digits = "0123456789ABCDEFabcdef";
+  return s.find_first_not_of(g_hex_digits) == std::string::npos;
+}
+
+struct result {
+  ptrdiff_t consumed;
+  bool valid;
+};
+
 inline bool is_leapyear(int y) { return (y % 400) == 0 || ((y % 4) == 0 && (y % 100) != 0); }
 
 inline bool illegal_date(int y, int m, int d) {
@@ -21,26 +77,228 @@ inline bool illegal_date(int y, int m, int d) {
   return d > days[m];
 }
 
-inline auto date(std::string_view dt) {
+inline result date(std::string_view dt) {
   struct tm tm;
   if (auto end = strptime(dt.data(), "%Y-%m-%d", &tm); end) {
     if ((end - dt.data()) != 10 || illegal_date(tm.tm_year + 1900, tm.tm_mon, tm.tm_mday)) {
-      return std::make_pair(0L, false);
+      return {.consumed = 0, .valid = false};
+    }
+    return {.consumed = end - dt.data(), .valid = true};
+  }
+  return {.consumed = 0L, .valid = false};
+}
+
+inline bool is_leapsecond(std::tm tm) {
+  if (tm.tm_sec != 60) {
+    return true;
+  }
+
+#if __cpp_lib_chrono >= 201907L
+  tm.tm_isdst = -1;
+  std::chrono::seconds time(std::mktime(&tm));
+  auto const & leap_seconds = std::chrono::get_tzdb().leap_seconds;
+  return std::ranges::find(leap_seconds, time) != leap_seconds.end();
+#else
+  return false;
+#endif
+}
+
+// https://www.rfc-editor.org/rfc/rfc6570.html#section-1.5
+inline bool is_uschar(int c) {
+  using P = std::pair<int, int>;
+  constexpr std::array data{
+      P{0xA0, 0xD7FF},     P{0xF900, 0xFDCF},   P{0xFDF0, 0xFFEF},   P{0x10000, 0x1FFFD},
+      P{0x20000, 0x2FFFD}, P{0x30000, 0x3FFFD}, P{0x40000, 0x4FFFD}, P{0x50000, 0x5FFFD},
+      P{0x60000, 0x6FFFD}, P{0x70000, 0x7FFFD}, P{0x80000, 0x8FFFD}, P{0x90000, 0x9FFFD},
+      P{0xA0000, 0xAFFFD}, P{0xB0000, 0xBFFFD}, P{0xC0000, 0xCFFFD}, P{0xD0000, 0xDFFFD},
+      P{0xE0000, 0xEFFFD},
+  };
+  return std::ranges::any_of(data,
+                             [c](auto & pair) { return c >= pair.first && c <= pair.second; });
+}
+
+template <typename CharT>
+inline bool is_pchar(std::basic_string_view<CharT> part, size_t & pos,
+                     std::string_view extra_valid_chars = ":@") {
+  constexpr char const * g_hex_digits = "0123456789ABCDEFabcdef";
+  if (std::isalnum(part[pos]) || is_uschar(part[pos]) ||
+      std::strchr("-._~!$&'()*+,;=", part[pos])) {
+    return true;
+  }
+  if (part[pos] == '%') {
+    return pos + 2 < part.size() && std::strchr(g_hex_digits, part[++pos]) &&
+           std::strchr(g_hex_digits, part[++pos]);
+  }
+  return extra_valid_chars.find(part[pos]) != part.npos;
+};
+
+inline bool is_uri_template_literal(std::u32string_view part, size_t & pos) {
+  constexpr char const * g_hex_digits = "0123456789ABCDEFabcdef";
+  if (part[pos] == '%') {
+    return pos + 2 < part.size() && std::strchr(g_hex_digits, part[++pos]) &&
+           std::strchr(g_hex_digits, part[++pos]);
+  }
+  return !std::strchr(R"( "'%<>\^`{|}`)", part[pos]) && part[pos] > 0x1F && part[pos] != 0x7F;
+}
+
+inline bool is_uri_template_varchar(std::u32string_view part, size_t & pos) {
+  constexpr char const * g_hex_digits = "0123456789ABCDEFabcdef";
+  if (part[pos] == '%') {
+    return pos + 2 < part.size() && std::strchr(g_hex_digits, part[++pos]) &&
+           std::strchr(g_hex_digits, part[++pos]);
+  }
+  return std::isalnum(part[pos]) || part[pos] == '_';
+}
+
+inline bool is_uri_template_expression(std::u32string_view part) {
+  if (part.empty()) {
+    return false;
+  }
+
+  if (std::strchr("+#./;?&=,!@|", part[0])) {
+    part.remove_prefix(1);
+  }
+
+  for (size_t pos = part.find(','); !part.empty();
+       part.remove_prefix(std::min(part.size(), pos)), pos = part.find(',')) {
+    std::u32string_view varspec = part.substr(0, pos);
+    std::u32string_view expand;
+    if (size_t const mod = varspec.find_first_of(U":*"); mod != varspec.npos) {
+      expand = varspec.substr(mod + 1);
+      varspec.remove_suffix(expand.size() + 1);
+    }
+
+    if (expand.empty() || expand == U"*") {
+      // No Modifier, or Explode
+    } else if (expand.size() > 4 || expand[0] == '0' ||
+               not std::ranges::all_of(expand, [](char c) { return std::isdigit(c); })) {
+      return false;
+    }
+    for (size_t i = 0; i < varspec.size(); ++i) {
+      RETURN_UNLESS(is_uri_template_varchar(varspec, i) || (i > 0 && varspec[i] == '.'), false);
+    }
+  }
+
+  return true;
+}
+
+template <typename CharT> bool is_uri_authority(std::basic_string_view<CharT> uri) {
+  // A URI Authority section MAY contain user info, which is every character up
+  // to the first "@" character, as long as that character is not part of the path
+  if (size_t pos = uri.find('@'); pos != uri.npos) {
+    for (size_t i = 0; i < pos; ++i) {
+      if (not is_pchar(uri, i, ":")) {
+        return false;
+      }
+    }
+    uri.remove_prefix(pos + 1);
+  }
+
+  // A URI Authority HOST section
+  // If the URI starts with '[', then it MUST BE an IPv6 or an "IPvFuture"
+  bool const has_ipv6 = (uri[0] == '[');
+  if (has_ipv6) {
+    size_t pos = uri.find(']');
+    auto ip = uri.substr(1, pos - 1);
+    uri.remove_prefix(pos + 1);
+    if (not ipv6(to_u8(ip))) {
+      return false;
+    }
+  }
+
+  // A URI Authority PORT section. Technically allows any number of digits
+  if (size_t pos = uri.find(':'); pos != uri.npos) {
+    if (not std::ranges::all_of(uri.substr(pos + 1), [](auto c) { return std::isdigit(c); })) {
+      return false;
     }
-    return std::make_pair(end - dt.data(), true);
+    uri.remove_suffix(uri.size() - pos + 1);
+  }
+
+  // Normal URI Authority HOST section is either an IPv4 or a HOSTNAME
+  // if we had an ipv6 part, we can permit an empty string (since hostname
+  // no longer permits them).
+  return (has_ipv6 && uri.empty()) || ipv4(to_u8(uri)) || hostname(uri);
+}
+
+// Tests if a URI "Query Part" or "Fragment Part" is valid and remove the part
+template <typename CharT> bool test_uri_part(std::basic_string_view<CharT> & uri, char delim) {
+  size_t const pos = uri.find(delim);
+  if (pos == uri.npos) {
+    return true;
+  }
+  auto part = uri.substr(pos + 1);
+  uri = uri.substr(0, pos);
+  for (size_t pos = 0; pos < part.size(); ++pos) {
+    RETURN_UNLESS(detail::is_pchar(part, pos, ":@/?"), false);
   }
-  return std::make_pair(0L, false);
+  return true;
+};
+}
+
+namespace jvalidate::format::draft03 {
+namespace detail = jvalidate::format::detail;
+
+inline bool time(std::string_view dt) {
+  std::tm tm;
+  char const * end = strptime(dt.data(), "%T", &tm);
+  if (end == nullptr || (end - dt.data()) < 8) {
+    return false;
+  }
+  return end == dt.end();
+}
+
+inline bool utc_millisec(std::string_view utc) {
+  int64_t itime;
+  if (auto [end, ec] = std::from_chars(utc.begin(), utc.end(), itime);
+      ec == std::errc{} && end == utc.end()) {
+    return true;
+  }
+  double dtime;
+  auto [end, ec] = std::from_chars(utc.begin(), utc.end(), dtime);
+  return ec == std::errc{} && end == utc.end();
+}
+
+inline bool css_2_1_color(std::string_view color) {
+  if (color.empty()) {
+    return false;
+  }
+  constexpr char const * g_hex_digits = "0123456789ABCDEFabcdef";
+  if (color[0] == '#') {
+    return color.size() <= 7 && detail::is_hex(color.substr(1));
+  }
+  static std::unordered_set<std::string_view> g_color_codes{
+      "maroon", "red",  "orange", "yellow", "olive", "purple", "fuchsia", "white", "lime",
+      "green",  "navy", "blue",   "aqua",   "teal",  "black",  "silver",  "gray"};
+  return g_color_codes.contains(color);
+}
+
+inline bool e_123_phone(std::string_view phone) {
+  // https://support.secureauth.com/hc/en-us/articles/360036402211-Regular-Expressions-for-ITU-E-123-and-E-164-phone-number-formats
+  if (phone.empty()) {
+    return false;
+  }
+  if (phone[0] != '+') {
+    constexpr size_t g_usa_phone_tokens = 3;
+    char area[4], head[4], tail[5];
+    return sscanf(phone.data(), "(%3s) %3s %4s", area, head, tail) == g_usa_phone_tokens &&
+           detail::is_dec(area, 3) && detail::is_dec(head, 3) && detail::is_dec(tail, 4);
+  }
+  char tok0[4], tok1[4], tok2[4], tok3[5];
+  constexpr size_t g_i18n_phone_tokens = 4;
+  return sscanf(phone.data(), "+%3s %3s %3s %4s", tok0, tok1, tok2, tok3) == g_i18n_phone_tokens &&
+         detail::is_dec(tok0, 1, 3) && detail::is_dec(tok1, 2, 3) && detail::is_dec(tok2, 2, 3) &&
+         detail::is_dec(tok3, 4);
 }
 }
 
 namespace jvalidate::format {
 inline bool date(std::string_view dt) {
-  auto [size, good] = detail::date(dt);
-  return good && size == dt.size();
+  auto [consumed, valid] = detail::date(dt);
+  return valid && consumed == dt.size();
 }
 
 inline bool time(std::string_view dt) {
-  struct tm tm;
+  std::tm tm;
   char const * end = strptime(dt.data(), "%T", &tm);
   if (end == nullptr || end == dt.end() || (end - dt.data()) < 8) {
     return false;
@@ -57,10 +315,10 @@ inline bool time(std::string_view dt) {
   }
 
   if (dt[0] == 'Z' || dt[0] == 'z') {
-    return dt.size() == 1;
+    return dt.size() == 1 && detail::is_leapsecond(tm);
   }
   if (std::strchr("+-", dt[0])) {
-    return strptime(dt.data() + 1, "%R", &tm) == dt.end();
+    return strptime(dt.data() + 1, "%R", &tm) == dt.end() && detail::is_leapsecond(tm);
   }
   return false;
 }
@@ -74,18 +332,94 @@ inline bool date_time(std::string_view dt) {
   return time(dt);
 }
 
+template <typename CharT> inline bool uri(std::basic_string_view<CharT> uri) {
+  using delim = detail::char_delimiters<CharT>;
+
+  // https://www.rfc-editor.org/rfc/rfc3986.html#appendix-A
+  if (size_t const pos = uri.find(':'); pos != uri.npos) {
+    RETURN_UNLESS(std::isalpha(uri[0]), false);
+    for (size_t i = 1; i < pos; ++i) {
+      RETURN_UNLESS(std::isalnum(uri[i]) || std::strchr("+-.", uri[i]), false);
+    }
+    uri.remove_prefix(pos + 1);
+  } else {
+    return false;
+  }
+
+  RETURN_UNLESS(detail::test_uri_part(uri, '#'), false);
+  RETURN_UNLESS(detail::test_uri_part(uri, '?'), false);
+
+  auto path = uri;
+  if (uri.starts_with(delim::double_slash)) {
+    uri.remove_prefix(2);
+    path = uri.substr(std::min(uri.size(), uri.find('/')));
+    uri.remove_suffix(path.size());
+    RETURN_UNLESS(detail::is_uri_authority(uri), false);
+  }
+
+  for (size_t i = 0; i < path.size(); ++i) {
+    RETURN_UNLESS(detail::is_pchar(path, i, "/:@"), false);
+  }
+
+  return true;
+}
+
+template <typename CharT> inline bool uri_reference(std::basic_string_view<CharT> uri) {
+  using delim = detail::char_delimiters<CharT>;
+  if (jvalidate::format::uri(uri)) {
+    return true;
+  }
+
+  RETURN_UNLESS(detail::test_uri_part(uri, '#'), false);
+  RETURN_UNLESS(detail::test_uri_part(uri, '?'), false);
+
+  auto path = uri;
+  if (uri.starts_with(delim::double_slash)) {
+    uri.remove_prefix(2);
+    path = uri.substr(std::min(uri.size(), uri.find('/')));
+    uri.remove_suffix(path.size());
+    RETURN_UNLESS(detail::is_uri_authority(uri), false);
+  }
+
+  if (size_t const pos = path.find('/'); pos != path.npos) {
+    for (size_t i = 0; i < pos; ++i) {
+      RETURN_UNLESS(detail::is_pchar(path, i, "@"), false);
+    }
+    path.remove_prefix(pos);
+  }
+
+  for (size_t i = 0; i < path.size(); ++i) {
+    RETURN_UNLESS(detail::is_pchar(path, i, "/:@"), false);
+  }
+
+  return true;
+}
+
+inline bool uri_template(std::u32string_view uri) {
+  for (size_t i = 0; i < uri.size(); ++i) {
+    if (uri[i] != '{') {
+      RETURN_UNLESS(detail::is_uri_template_literal(uri, i), false);
+      continue;
+    }
+
+    std::u32string_view expr = uri.substr(i + 1);
+    size_t const pos = expr.find('}');
+    RETURN_UNLESS(pos != uri.npos, false);
+    RETURN_UNLESS(detail::is_uri_template_expression(expr.substr(0, pos)), false);
+    i += pos + 1;
+  }
+  return true;
+}
+
 inline bool uuid(std::string_view id) {
-  constexpr char const * g_hex_digits = "0123456789ABCDEFabcdef";
   constexpr size_t g_uuid_len = 36;
   constexpr size_t g_uuid_tokens = 5;
   char tok0[9], tok1[5], tok2[5], tok3[5], tok4[13];
 
-  auto is_hex = [](std::string_view s) {
-    return s.find_first_not_of(g_hex_digits) == std::string::npos;
-  };
   return id.size() == g_uuid_len &&
          sscanf(id.data(), "%8s-%4s-%4s-%4s-%12s", tok0, tok1, tok2, tok3, tok4) == g_uuid_tokens &&
-         is_hex(tok0) && is_hex(tok1) && is_hex(tok2) && is_hex(tok3) && is_hex(tok4);
+         detail::is_hex(tok0) && detail::is_hex(tok1) && detail::is_hex(tok2) &&
+         detail::is_hex(tok3) && detail::is_hex(tok4);
 }
 
 inline bool duration(std::string_view dur) {
@@ -99,16 +433,26 @@ inline bool duration(std::string_view dur) {
     return text.find(type);
   };
 
+  // All DURATION entities must start with the prefix 'P', and cannot be empty
+  // past that point.
   if (dur[0] != 'P' || dur.size() == 1) {
     return false;
   }
   dur.remove_prefix(1);
 
+  // Special Case: a duration measured in weeks is incompatible with other
+  // duration tokens.
+  if (eat("W") != std::string::npos) {
+    return dur.empty();
+  }
+
+  // DURATION takes the following form, because we use the same token for both
+  // Months and Minutes.
+  // "P[#Y][#M][#D][T[#H][#M][#S]]".
+  // At least one of the optional fields must be present.
   if (dur[0] != 'T') {
-    if (eat("W") != std::string::npos) {
-      return dur.empty();
-    }
     std::string_view ymd{"YMD"};
+    // Read YMD duration offsets in that order, allowing us to skip past them.
     while (not ymd.empty() && not dur.empty()) {
       if (size_t n = eat(ymd); n != std::string::npos) {
         ymd.remove_prefix(n + 1);
@@ -121,12 +465,15 @@ inline bool duration(std::string_view dur) {
     }
   }
 
+  // If we have a 'T' prefix for Hour/Minute/Second offsets, we must have at
+  // least one of them present.
   if (dur[0] != 'T' || dur.size() == 1) {
     return false;
   }
   dur.remove_prefix(1);
 
   std::string_view hms{"HMS"};
+  // Read HMS duration offsets in that order, allowing us to skip past them.
   while (not hms.empty() && not dur.empty()) {
     if (size_t n = eat(hms); n != std::string::npos) {
       hms.remove_prefix(n + 1);
@@ -137,66 +484,130 @@ inline bool duration(std::string_view dur) {
   return dur.empty();
 }
 
-// Limitation - does not inspect graphemes, so it cannot check idn-hostname
-// to fix this - we'd need to
-inline bool hostname(std::string_view name) {
-  auto hostname_part = [&name](size_t end) {
-    if (end == 0 || end >= 64 || name[0] == '-' || name[end - 1] == '-') {
-      return false;
-    }
-    for (size_t i = 0; i < end; ++i) {
-      if (name[i] != '-' && not std::isalnum(name[i])) {
-        return false;
-      }
-    }
-    return true;
-  };
+template <typename CharT>
+bool is_invalid_size_or_boundary_hostname(std::basic_string_view<CharT> name) {
+  using delim = detail::char_delimiters<CharT>;
+  return (name.empty() || name.length() >= 64 ||
+          (name.size() >= 4 && name.substr(2).starts_with(delim::illegal_dashes_ulabel)) ||
+          name[0] == '-' || name.back() == '-');
+}
+
+#if !JVALIDATE_HAS_IDNA
+inline bool hostname_part(std::string_view name) {
+  using delim = detail::char_delimiters<char>;
+
+  if (is_invalid_size_or_boundary_hostname(name)) {
+    return false;
+  }
+  return std::ranges::none_of(name, [](char c) { return c != '-' && not std::isalnum(c); });
+}
+#else
+template <typename CharT> inline bool hostname_part(std::basic_string_view<CharT> name) {
+  using delim = detail::char_delimiters<CharT>;
+  // Punycode is a way to restructure UTF-8 strings to be ASCII compatibly
+  // All Punycode string start with "xn--" (and would therefore fail below).
+  if (name.starts_with(delim::punycode_prefix)) {
+    std::u32string decoded = detail::to_u32(ada::idna::to_unicode(detail::to_u8(name)));
+    return (decoded != detail::to_u32(name)) && hostname_part<char32_t>(decoded);
+  }
+
+  // Unfortunately, the ada-idna library does not validate things like
+  // "is there a HEBREW character after the HEBREW COMMA".
+  if (not detail::is_special_case_ok(name)) {
+    return false;
+  }
+
+  if (name.find_first_of(delim::illegal_hostname_chars) != name.npos) {
+    return false;
+  }
+
+  // An INVALID hostname part is one of the following:
+  // - empty
+  // - more than 63 characters long
+  // - starts or ends with a '-'
+  // - matches the regular expression /^..--.*$/
+  if (is_invalid_size_or_boundary_hostname(name)) {
+    return false;
+  }
+
+  // This is a much easier check in hostname than idn-hostname, since we can
+  // just check for alphanumeric and '-'.
+  if constexpr (std::is_same_v<char, CharT>) {
+    return std::ranges::none_of(name, [](char c) { return c != '-' && not std::isalnum(c); });
+  } else {
+    return ada::idna::is_label_valid(name);
+  }
+}
+#endif
 
-  if (name.size() > (name.back() == '.' ? 254 : 253)) {
+template <typename CharT> inline bool hostname(std::basic_string_view<CharT> name) {
+  using delim = detail::char_delimiters<CharT>;
+  // In general, the maximum length of a hostname is 253 characters.
+  if (name.empty() || name.length() > 253) {
     return false;
   }
-  for (size_t n = name.find('.'); n != std::string::npos;
-       name.remove_prefix(n + 1), n = name.find('.')) {
-    if (not hostname_part(n)) {
+
+  // We validate each sub-section of the hostname in parts, delimited by '.'
+  for (size_t n = name.find_first_of(delim::hostname_part_delims); n != std::string::npos;
+       name.remove_prefix(n + 1), n = name.find_first_of(delim::hostname_part_delims)) {
+    if (not hostname_part(name.substr(0, n))) {
       return false;
     }
   }
-  return name.empty() || hostname_part(name.size());
+
+  // Previous test versions allowed for a hostname to end with '.', but this is
+  // not permitted in the latest test specification.
+  return hostname_part(name);
 }
 
 inline bool ipv4(std::string_view ip) {
   unsigned int ip0, ip1, ip2, ip3;
   char eof;
-  if (ip.find_first_not_of("0123456789.") != std::string::npos) {
+  // IPv4 address MAY only contain DIGITS and '.'
+  if (ip.find_first_not_of("0123456789.") != ip.npos) {
     return false;
   }
+
+  // Each OCTET of an IPv4 can only start with '0' if it is EXACTLY '0'
   if (ip[0] == '0' && std::isdigit(ip[1])) {
     return false;
   }
-  if (size_t n = ip.find(".0"); n != std::string::npos && std::isdigit(ip[n + 2])) {
+  if (size_t n = ip.find(".0"); n != ip.npos && std::isdigit(ip[n + 2])) {
     return false;
   }
-  if (sscanf(ip.data(), "%3u.%3u.%3u.%3u%c", &ip0, &ip1, &ip2, &ip3, &eof) != 4) {
+
+  // sscanf returns the number of tokens parsed successfully.
+  // Therefore, we can add a trailing character output to the format-string
+  // and check that we failed to parse any token into the eof-character token.
+  if (sscanf(std::string(ip).c_str(), "%3u.%3u.%3u.%3u%c", &ip0, &ip1, &ip2, &ip3, &eof) != 4) {
     return false;
   }
+  // Affirm that each OCTET is only two bytes wide.
   return ip0 <= 0xFF && ip1 <= 0xFF && ip2 <= 0xFF && ip3 <= 0xFF;
 }
 
 inline bool ipv6(std::string_view ip) {
   int expected_spans = 8;
 
+  // There is a special rule with IPv6 to allow an IPv4 address as a suffix
   if (size_t n = ip.find('.'); n != std::string::npos) {
     if (not ipv4(ip.substr(ip.find_last_of(':') + 1))) {
       return false;
     }
-    // This is a cheat to allow e.g. ::127.0.0.1 to validate
+    // since ipv4 addresses contain 8 bytes of information, and each segment of
+    // an ipv6 address contains 4 bytes - we should reduce the number of
+    // expected spans to 6. Instead - we reduce it to 7 because we don't prune
+    // the first OCTET of the IPv4 section (as it can read as a valid segment).
     expected_spans = 7;
     ip = ip.substr(0, n);
   }
 
+  // IPv6 address MAY only contain HEXDIGITs and ':'
   if (ip.find_first_not_of("0123456789ABCDEFabcdef:") != std::string::npos) {
     return false;
   }
+  // IPv6 addresses can have a maximum of 39 characters (8 4-char HEXDIGIT
+  // segments with 7 dividing ':'s).
   if (ip.size() >= 40) {
     return false;
   }
@@ -208,25 +619,33 @@ inline bool ipv6(std::string_view ip) {
     has_compressed = true;
     ip.remove_prefix(2);
   }
-  while (!ip.empty()) {
+
+  while (!ip.empty() && ++groups) {
     int data;
     if (sscanf(ip.data(), "%4x", &data) != 1) {
+      // Not a 4-byte HEXDIGIT. Not sure that it's ever possible due to the
+      // char filter above.
       return false;
     }
-    if (size_t n = ip.find(':'); std::min(n, ip.size()) > 4) {
-      return false;
+
+    if (size_t const n = ip.find(':'); std::min(n, ip.size()) > 4) {
+      return false; // Segment too wide
     } else if (n != std::string::npos) {
       ip.remove_prefix(n + 1);
     } else {
-      ip = "";
+      break; // End of String
     }
-    ++groups;
-    if (ip[0] == ':') {
-      if (std::exchange(has_compressed, true)) {
-        return false;
-      }
-      ip.remove_prefix(1);
+
+    // We removed the regular ':', so this is a check for a compression mark
+    if (ip[0] != ':') {
+      continue;
     }
+    if (std::exchange(has_compressed, true)) {
+      // The above trick allows us to ensure that there is no more than one
+      // set of "::" compression tokens in this IPv6 adfress.
+      return false;
+    }
+    ip.remove_prefix(1);
   }
 
   return groups == expected_spans || (has_compressed && groups < expected_spans);
@@ -237,65 +656,135 @@ inline bool ipv6(std::string_view ip) {
 // Therefore, there's no point in trying to validate things according to a
 // complex grammar - as long as it has an '@' sign with at least one character
 // on each side, we ought to call it an email.
-inline bool email(std::string_view em) {
-  size_t n = em.find_last_of('@');
+template <typename CharT> inline bool email(std::basic_string_view<CharT> em) {
+  using delim = detail::char_delimiters<CharT>;
+  size_t const n = em.find_last_of('@');
   if (n == 0 || n >= em.size() - 1) {
     return false;
   }
 
-  if (em[0] == '"' && em[n - 1] == '"') {
+  auto const who = em.substr(0, n);
+  if (who.starts_with('"') && who.ends_with('"')) {
     // No validation
-  } else if (em.substr(0, n).find("..") != std::string::npos || em[n - 1] == '.' || em[0] == '.') {
+  } else if (who.starts_with('.') || who.ends_with('.')) {
+    return false;
+  } else if (em.substr(0, n).find(delim::dotdot) != em.npos) {
+    return false;
+  } else if (who.find('@') != em.npos) {
+    // This will catch multiple emails, but will gracefully ignore quote-escaped
+    // '@' characters in the name element.
     return false;
   }
 
-  em.remove_prefix(n + 1);
-  if (em.front() == '[' && em.back() == ']') {
-    em.remove_prefix(1);
-    em.remove_suffix(1);
-    if (em.starts_with("IPv6:")) {
-      return ipv6(std::string(em.substr(5)));
-    }
-    return ipv4(std::string(em)); // Re-acquire NULL-term
+  // The DOMAIN section of an email address MAY be either a HOSTNAME, or an
+  // IP Address surrounded in brackets.
+  auto domain = em.substr(n + 1);
+  if (not(domain.starts_with('[') && domain.ends_with(']'))) {
+    return hostname(domain);
+  }
+  domain.remove_prefix(1);
+  domain.remove_suffix(1);
+
+  // When the DOMAIN is an IPv6, it must start with "IPv6:" for some
+  // weird compatibility reason.
+  if (auto ip = detail::to_u8(domain); ip.starts_with("IPv6:")) {
+    return ipv6(ip.substr(5));
+  } else {
+    return ipv4(ip);
   }
-  return hostname(em);
 }
+
+template <typename T> inline bool ctor_as_valid(std::string_view str) {
+  try {
+    [[maybe_unused]] auto _ = T(str);
+    return true;
+  } catch (std::exception const &) { return false; }
+}
+
+#if JVALIDATE_HAS_IDNA
+template <auto Predicate> bool utf32(std::string_view str) {
+  return Predicate(detail::to_u32(str));
+}
+#endif
 }
 
 namespace jvalidate {
 class FormatValidator {
 public:
-  using Predicate = bool (*)(std::string_view);
+  using StatelessPredicate = bool (*)(std::string_view);
+  using Predicate = std::function<bool(std::string_view)>;
+  using UserDefinedFormats = std::unordered_map<std::string, Predicate>;
   enum class Status { Unknown, Unimplemented, Valid, Invalid };
 
 private:
-  std::unordered_map<std::string, Predicate> supported_formats_{
+  // This isn't actually a user format, but we don't generate any special
+  // annotations for user-defined format codes, so it doesn't really matter that
+  // we're putting it here. It simply reduces the number of LoC when setting up.
+  std::unordered_map<std::string, Predicate> formats_{{"regex", nullptr}};
+
+  std::unordered_map<std::string, StatelessPredicate> builtin_formats_{
       {"date", &format::date},
       {"date-time", &format::date_time},
       {"duration", &format::duration},
       {"email", &format::email},
       {"hostname", &format::hostname},
-      {"idn-email", nullptr},
-      {"idn-hostname", nullptr},
+      {"idn-email", UTF32(email)},
+      {"idn-hostname", UTF32(hostname)},
       {"ipv4", &format::ipv4},
       {"ipv6", &format::ipv6},
-      {"iri", nullptr},
-      {"iri-reference", nullptr},
-      {"json-pointer", nullptr},
-      {"relative-json-pointer", nullptr},
-      /* {"regex", &detail::StdRegexEngine::is_valid}, */
+      {"iri", UTF32(uri)},
+      {"iri-reference", UTF32(uri_reference)},
+      {"json-pointer", CONSTRUCTS(Pointer)},
+      {"relative-json-pointer", CONSTRUCTS(RelativePointer)},
       {"time", &format::time},
-      {"uri", nullptr},
-      {"uri-reference", nullptr},
+      {"uri", &format::uri},
+      {"uri-reference", &format::uri_reference},
+#if JVALIDATE_HAS_IDNA
+      {"uri-template", &format::utf32<format::uri_template>},
+#else
       {"uri-template", nullptr},
+#endif
       {"uuid", &format::uuid},
   };
 
+  std::unordered_map<std::string, StatelessPredicate> draft03_formats_{
+      {"date", &format::date},
+      // One of the weird things about draft03 - date-time allows for timezone
+      // and fraction-of-second in the argument, but time only allows hh:mm:ss.
+      {"date-time", &format::date_time},
+      {"time", &format::draft03::time},
+      {"utc-millisec", &format::draft03::utc_millisec},
+      {"color", &format::draft03::css_2_1_color},
+      {"style", nullptr},
+      {"phone", &format::draft03::e_123_phone},
+      {"uri", &format::uri},
+      {"email", &format::email},
+      {"ip-address", &format::ipv4},
+      {"ipv6", &format::ipv6},
+      {"host-name", &format::hostname},
+  };
+
 public:
   FormatValidator() = default;
+  FormatValidator(Predicate is_regex) { formats_.insert_or_assign("regex", is_regex); }
+  FormatValidator(UserDefinedFormats const & formats, Predicate is_regex) : formats_(formats) {
+    formats_.insert_or_assign("regex", is_regex);
+  }
+
+  Status operator()(std::string const & format, schema::Version for_version,
+                    std::string_view text) const {
+    auto const & supported =
+        for_version == schema::Version::Draft03 ? draft03_formats_ : builtin_formats_;
+    if (Status rval = (*this)(supported, format, text); rval != Status::Unknown) {
+      return rval;
+    }
+    return (*this)(formats_, format, text);
+  }
 
-  Status operator()(std::string const & format, std::string_view text) const {
-    if (auto it = supported_formats_.find(format); it != supported_formats_.end() && it->second) {
+private:
+  Status operator()(auto const & supported, std::string const & format,
+                    std::string_view text) const {
+    if (auto it = supported.find(format); it != supported.end()) {
       if (not it->second) {
         return Status::Unimplemented;
       }
@@ -305,3 +794,6 @@ public:
   }
 };
 }
+
+#undef CONSTRUCTS
+#undef UTF32

+ 7 - 0
include/jvalidate/forward.h

@@ -15,6 +15,12 @@
 
 #define COMMA_NAME(X) , X
 
+namespace jvalidate::detail {
+}
+namespace jvalidate::format::detail {
+using namespace jvalidate::detail;
+}
+
 namespace jvalidate {
 class Schema;
 class Status;
@@ -174,6 +180,7 @@ concept MutableAdapter = Adapter<A> && requires(A const a) {
 template <typename R>
 concept RegexEngine = requires(R & engine) {
   { R::engine_name() } -> std::convertible_to<std::string_view>;
+  { R::is_regex("") } -> std::same_as<bool>;
   { engine.search("" /* pattern */, "" /* text */) } -> std::same_as<bool>;
 };
 

+ 5 - 3
include/jvalidate/validation_visitor.h

@@ -76,6 +76,7 @@ private:
   ValidationConfig const & cfg_;
   ExtensionVisitor extension_;
   RE & regex_;
+  FormatValidator & format_;
 
   mutable VisitedAnnotation * visited_ = nullptr;
   mutable StoreResults tracking_ = StoreResults::ForInvalid;
@@ -93,9 +94,10 @@ public:
    * receive a detailed summary of why a document is supported/unsupported.
    */
   ValidationVisitor(schema::Node const & schema, Root const & root, ValidationConfig const & cfg,
-                    RE & regex, ExtensionVisitor extension, ValidationResult * result)
+                    RE & regex, FormatValidator & format, ExtensionVisitor extension,
+                    ValidationResult * result)
       : schema_(&schema), root_(&root), result_(result), cfg_(cfg), extension_(extension),
-        regex_(regex) {}
+        regex_(regex), format_(format) {}
 
 private:
   Status visit(constraint::ExtensionConstraint const & cons, Adapter auto const & document) const {
@@ -335,7 +337,7 @@ private:
       return true; // TODO: I think this can be made into Noop
     }
 
-    switch (FormatValidator()(cons.format, document.as_string())) {
+    switch (format_(cons.format, cons.for_version, document.as_string())) {
     case FormatValidator::Status::Unimplemented:
       return result(Status::Reject, "unimplemented format '", cons.format, "'");
     case FormatValidator::Status::Invalid:

+ 55 - 5
include/jvalidate/validator.h

@@ -30,6 +30,7 @@ private:
   ValidationConfig cfg_;
   ExtensionVisitor extension_;
   RE regex_;
+  FormatValidator format_{RE::is_regex};
 
 public:
   /**
@@ -40,12 +41,61 @@ public:
    * @param cfg Any special (runtime) configuration rules being applied to the
    * validator.
    */
-  Validator(schema::Node const & schema, ExtensionVisitor extension = {},
+  Validator(schema::Node const & schema, ValidationConfig const & cfg = {})
+      : schema_(schema), cfg_(cfg) {}
+
+  /**
+   * @brief Construct a Validator
+   *
+   * @param schema The root schema being validated against. Must outlive this.
+   *
+   * @param extension An extension visitor for processing user-defined
+   * constraints
+   *
+   * @param cfg Any special (runtime) configuration rules being applied to the
+   * validator.
+   */
+  Validator(schema::Node const & schema, ExtensionVisitor extension,
             ValidationConfig const & cfg = {})
       : schema_(schema), cfg_(cfg), extension_(extension) {}
 
-  Validator(schema::Node const & schema, ValidationConfig const & cfg)
-      : schema_(schema), cfg_(cfg) {}
+  /**
+   * @brief Construct a Validator
+   *
+   * @param schema The root schema being validated against. Must outlive this.
+   *
+   * @param user_defined_formats A map of format-name to string validator for
+   * user-defined format tools.
+   *
+   * @param cfg Any special (runtime) configuration rules being applied to the
+   * validator. Because user_defined_formats is provided by this constructor,
+   * we default to validate_format:=true.
+   */
+  Validator(schema::Node const & schema,
+            FormatValidator::UserDefinedFormats const & user_defined_formats,
+            ValidationConfig const & cfg = {.validate_format = true})
+      : schema_(schema), cfg_(cfg), format_(user_defined_formats, RE::is_regex) {}
+
+  /**
+   * @brief Construct a Validator
+   *
+   * @param schema The root schema being validated against. Must outlive this.
+   *
+   * @param extension An extension visitor for processing user-defined
+   * constraints
+   *
+   * @param user_defined_formats A map of format-name to string validator for
+   * user-defined format tools.
+   *
+   * @param cfg Any special (runtime) configuration rules being applied to the
+   * validator. Because user_defined_formats is provided by this constructor,
+   * we default to validate_format:=true.
+   */
+  Validator(schema::Node const & schema, ExtensionVisitor extension,
+            FormatValidator::UserDefinedFormats const & user_defined_formats,
+            ValidationConfig const & cfg = {.validate_format = true})
+      : schema_(schema), cfg_(cfg), extension_(extension),
+        format_(user_defined_formats, RE::is_regex) {}
 
   template <typename... Args> Validator(schema::Node &&, Args &&...) = delete;
 
@@ -69,7 +119,7 @@ public:
              "Cannot perform mutations on an immutable JSON Adapter");
     detail::OnBlockExit _ = [&result, this]() { post_process(result); };
     return static_cast<bool>(
-        ValidationVisitor(schema_, json, cfg_, regex_, extension_, result).validate(json));
+        ValidationVisitor(schema_, json, cfg_, regex_, format_, extension_, result).validate(json));
   }
 
   /**
@@ -89,7 +139,7 @@ public:
   template <MutableAdapter A> bool validate(A const & json, ValidationResult * result = nullptr) {
     detail::OnBlockExit _ = [&result, this]() { post_process(result); };
     return static_cast<bool>(
-        ValidationVisitor(schema_, json, cfg_, regex_, extension_, result).validate(json));
+        ValidationVisitor(schema_, json, cfg_, regex_, format_, extension_, result).validate(json));
   }
 
   /**

+ 33 - 1
tests/CMakeLists.txt

@@ -11,6 +11,13 @@ find_package(CURL REQUIRED)
 # ICU components = data, i18n, io, le, lx, test, tu and uc.
 find_package(ICU 77.1 COMPONENTS uc i18n)
 
+FetchContent_Declare(
+  IDNA
+  GIT_REPOSITORY https://github.com/ada-url/idna.git
+  GIT_TAG        0.5.0
+)
+FetchContent_MakeAvailable(IDNA)
+
 FetchContent_Declare(
   json_schema_test_suite
   GIT_REPOSITORY https://github.com/json-schema-org/JSON-Schema-Test-Suite.git
@@ -60,6 +67,13 @@ foreach(CASE IN LISTS JVALIDATE_TESTS)
   else()
     target_compile_definitions(${CASE} PUBLIC JVALIDATE_HAS_ICU=0)
   endif()
+
+  if (idna_POPULATED)
+    target_compile_definitions(${CASE} PUBLIC JVALIDATE_HAS_IDNA=1)
+    target_link_libraries(${CASE} ada-idna)
+  else()
+    target_compile_definitions(${CASE} PUBLIC JVALIDATE_HAS_IDNA=0)
+  endif()
 endforeach()
 
 string(
@@ -68,7 +82,6 @@ string(
   "*optional_content"
   "*optional_*ecmascript_regex"
   "*optional_zeroTerminatedFloats"
-  "*optional_format*"
 )
 set(SelfValidateTest_Unsupported_Suites "")
 set(SelfValidateTest_Unsupported_Cases "*leap second")
@@ -80,6 +93,25 @@ if (NOT ICU_FOUND)
   )
 endif()
 
+if (NOT idna_POPULATED)
+  string(
+    APPEND SelfValidateTest_Unsupported
+    ":*optional_*iri*"
+    ":*optional_*idn*"
+    ":*optional_*uri_template*"
+  )
+  string(
+    APPEND SelfValidateTest_Unsupported_Suites
+    ":*punycode*"
+    ":*puny-code"
+  )
+  string(
+    APPEND SelfValidateTest_Unsupported_Cases
+    ":*punycode*"
+    ":*puny-code"
+  )
+endif()
+
 foreach(CASE IN LISTS JVALIDATE_UNIT_TESTS)
   gtest_discover_tests(${CASE})
 endforeach()

+ 16 - 3
tests/validation_visitor_test.cxx

@@ -28,7 +28,7 @@ protected:
   auto visit(jvalidate::detail::Pointer ptr, auto const & cons, JSON & json,
              jvalidate::ValidationResult * result = nullptr, bool annotate_everything = false) {
     JsonCppAdapter const adapter(json);
-    ValidationVisitor visitor(node_, adapter, cfg_, regex_, extension_, result);
+    ValidationVisitor visitor(node_, adapter, cfg_, regex_, format_, extension_, result);
     if (annotate_everything) {
       visitor.tracking_ = StoreResults::ForAnything;
     }
@@ -43,10 +43,14 @@ protected:
   }
 
   void config(jvalidate::ValidationConfig cfg) { cfg_ = cfg; }
+  void format(FormatValidator::UserDefinedFormats && fmts) {
+    format_ = {std::move(fmts), jvalidate::StdRegexEngine::is_regex};
+  }
 
 private:
   jvalidate::schema::Node node_;
   jvalidate::StdRegexEngine regex_;
+  jvalidate::FormatValidator format_{jvalidate::StdRegexEngine::is_regex};
   jvalidate::ValidationConfig cfg_;
   jvalidate::detail::StubExtensionVisitor extension_;
 };
@@ -117,13 +121,22 @@ TEST_F(ValidationVisitorTest, EnumConstraintAnnotatesMatchingIndex) {
   EXPECT_THAT(result, ErrorAt("enum", "1"));
 }
 
+TEST_F(ValidationVisitorTest, UnknownFormatIsAccept) {
+  constraint::FormatConstraint cons{"bogus", Draft2020_12, true};
+  config({.validate_format = true});
+
+  ValidationResult result;
+  EXPECT_THAT(visit("/format"_jptr, cons, "\"Hello\""_json, &result), Eq(Status::Accept));
+}
+
 TEST_F(ValidationVisitorTest, UnimplementedFormatIsError) {
-  constraint::FormatConstraint cons{"bogus", true};
+  constraint::FormatConstraint cons{"bogus", Draft2020_12, true};
   config({.validate_format = true});
+  format({{"bogus", nullptr}});
 
   ValidationResult result;
   EXPECT_THAT(visit("/format"_jptr, cons, "\"Hello\""_json, &result), Eq(Status::Reject));
-  EXPECT_THAT(result, ErrorAt("format", "bogus is unimplemented"));
+  EXPECT_THAT(result, ErrorAt("format", "unimplemented format 'bogus'"));
 }
 }