Sfoglia il codice sorgente

feat: implement uri-format (excluding IPvFuture)

Sam Jaffe 7 mesi fa
parent
commit
1c0127945e
1 ha cambiato i file con 109 aggiunte e 6 eliminazioni
  1. 109 6
      include/jvalidate/format.h

+ 109 - 6
include/jvalidate/format.h

@@ -16,6 +16,7 @@
 #include <ada/idna/validity.h>
 #endif
 
+#include <jvalidate/detail/expect.h>
 #include <jvalidate/detail/idna_special_cases.h>
 #include <jvalidate/detail/pointer.h>
 #include <jvalidate/detail/relative_pointer.h>
@@ -26,6 +27,22 @@
 
 #define UTF32(FN) JVALIDATE_IIF(JVALIDATE_HAS_IDNA, format::utf32<format::FN<char32_t>>, nullptr)
 
+namespace jvalidate::format {
+bool date(std::string_view dt);
+bool time(std::string_view dt);
+bool date_time(std::string_view dt);
+bool duration(std::string_view dur);
+
+bool uri(std::string_view uri);
+bool uuid(std::string_view id);
+template <typename CharT = char> bool hostname(std::basic_string_view<CharT> name);
+
+bool ipv4(std::string_view ip);
+bool ipv6(std::string_view ip);
+
+template <typename CharT = char> bool email(std::basic_string_view<CharT> em);
+}
+
 namespace jvalidate::format::detail {
 struct result {
   ptrdiff_t consumed;
@@ -61,12 +78,50 @@ inline bool is_leapsecond(std::tm tm) {
 #if __cpp_lib_chrono >= 201907L
   tm.tm_isdst = -1;
   std::chrono::seconds time(std::mktime(&tm));
-  auto const &leap_seconds = std::chrono::get_tzdb().leap_seconds;
+  auto const & leap_seconds = std::chrono::get_tzdb().leap_seconds;
   return std::ranges::find(leap_seconds, time) != leap_seconds.end();
 #else
   return false;
 #endif
 }
+
+inline bool is_pchar(std::string_view part, size_t & pos,
+                     std::string_view extra_valid_chars = ":@") {
+  constexpr char const * g_hex_digits = "0123456789ABCDEFabcdef";
+  if (std::isalnum(part[pos]) || std::strchr("-._~!$&'()*+,;=", part[pos])) {
+    return true;
+  }
+  if (part[pos] == '%') {
+    return std::strchr(g_hex_digits, part[++pos]) && std::strchr(g_hex_digits, part[++pos]);
+  }
+  return extra_valid_chars.find(part[pos]) != part.npos;
+};
+
+inline bool is_uri_authority(std::string_view uri) {
+  if (size_t pos = uri.find('@'); pos != uri.npos && pos < uri.find('/')) {
+    for (size_t i = 0; i < pos; ++i) {
+      if (not is_pchar(uri, i, ":")) {
+        return false;
+      }
+    }
+    uri.remove_prefix(pos + 1);
+  }
+  if (uri[0] == '[') {
+    size_t pos = uri.find(']');
+    auto ip = uri.substr(1, pos - 1);
+    uri.remove_prefix(pos + 1);
+    if (not ipv6(ip)) {
+      return false;
+    }
+  }
+  if (size_t pos = uri.find(':'); pos != uri.npos) {
+    if (not std::ranges::all_of(uri.substr(pos + 1), [](auto c) { return std::isdigit(c); })) {
+      return false;
+    }
+    uri.remove_suffix(uri.size() - pos + 1);
+  }
+  return ipv4(uri) || hostname(uri);
+}
 }
 
 namespace jvalidate::format {
@@ -110,6 +165,56 @@ inline bool date_time(std::string_view dt) {
   return time(dt);
 }
 
+inline bool uri(std::string_view uri) {
+  auto test_uri_part = [&uri](char delim) {
+    size_t const pos = uri.find(delim);
+    if (pos == uri.npos) {
+      return true;
+    }
+    auto part = uri.substr(pos + 1);
+    uri = uri.substr(0, pos);
+    for (size_t pos = 0; pos < part.size(); ++pos) {
+      RETURN_UNLESS(detail::is_pchar(part, pos, ":@/?"), false);
+    }
+    return true;
+  };
+
+  // https://www.rfc-editor.org/rfc/rfc3986.html#appendix-A
+  if (size_t const pos = uri.find(':'); pos != uri.npos) {
+    RETURN_UNLESS(std::isalpha(uri[0]), false);
+    for (size_t i = 1; i < pos; ++i) {
+      RETURN_UNLESS(std::isalnum(uri[i]) || std::strchr("+-.", uri[i]), false);
+    }
+    uri.remove_prefix(pos + 1);
+  } else {
+    return false;
+  }
+
+  RETURN_UNLESS(test_uri_part('#'), false);
+  RETURN_UNLESS(test_uri_part('?'), false);
+
+  auto path = uri;
+  if (uri.starts_with("//")) {
+    uri.remove_prefix(2);
+    path = uri.substr(std::min(uri.size(), uri.find('/')));
+    uri.remove_suffix(path.size());
+    RETURN_UNLESS(detail::is_uri_authority(uri), false);
+  }
+
+  if (size_t const pos = path.find('/'); pos != path.npos) {
+    for (size_t i = 0; i < pos; ++i) {
+      RETURN_UNLESS(detail::is_pchar(path, i, "@"), false);
+    }
+    path.remove_prefix(pos);
+  }
+
+  for (size_t i = 0; i < path.size(); ++i) {
+    RETURN_UNLESS(detail::is_pchar(path, i, "/:@"), false);
+  }
+
+  return true;
+}
+
 inline bool uuid(std::string_view id) {
   constexpr char const * g_hex_digits = "0123456789ABCDEFabcdef";
   constexpr size_t g_uuid_len = 36;
@@ -213,9 +318,7 @@ template <typename CharT> inline bool hostname_part(std::basic_string_view<CharT
 }
 #endif
 
-// Limitation - does not inspect graphemes, so it cannot check idn-hostname
-// to fix this - we'd need to
-template <typename CharT = char> inline bool hostname(std::basic_string_view<CharT> name) {
+template <typename CharT> inline bool hostname(std::basic_string_view<CharT> name) {
   using delim = detail::char_delimiters<CharT>;
   if (name.find_first_of(delim::illegal_hostname_chars) != name.npos) {
     return false;
@@ -312,7 +415,7 @@ inline bool ipv6(std::string_view ip) {
 // Therefore, there's no point in trying to validate things according to a
 // complex grammar - as long as it has an '@' sign with at least one character
 // on each side, we ought to call it an email.
-template <typename CharT = char> inline bool email(std::basic_string_view<CharT> em) {
+template <typename CharT> inline bool email(std::basic_string_view<CharT> em) {
   using delim = detail::char_delimiters<CharT>;
   size_t n = em.find_last_of('@');
   if (n == 0 || n >= em.size() - 1) {
@@ -377,7 +480,7 @@ private:
       {"relative-json-pointer", CONSTRUCTS(RelativePointer)},
       {"regex", nullptr},
       {"time", &format::time},
-      {"uri", nullptr},
+      {"uri", &format::uri},
       {"uri-reference", nullptr},
       {"uri-template", nullptr},
       {"uuid", &format::uuid},