Explorar el Código

feat: prospective idn-email and idn-hostname support

Sam Jaffe hace 3 meses
padre
commit
53e3015fea
Se han modificado 2 ficheros con 57 adiciones y 18 borrados
  1. 20 1
      include/jvalidate/detail/string.h
  2. 37 17
      include/jvalidate/format.h

+ 20 - 1
include/jvalidate/detail/string.h

@@ -4,12 +4,14 @@
  */
 #pragma once
 
+#include <string>
 #if __has_include(<unicode/std_string.h>)
 #define JVALIDATE_HAS_ICU
 #include <unicode/brkiter.h>
 #include <unicode/unistr.h>
 #endif
-#include <iostream>
+
+#include <jvalidate/detail/expect.h>
 
 namespace jvalidate::detail {
 /**
@@ -92,4 +94,21 @@ inline std::string regex_escape(std::string_view arg) {
   return std::string(arg);
 #endif
 }
+
+inline std::u32string to_u32(std::string_view arg) {
+  icu::UnicodeString const ucs = icu::UnicodeString::fromUTF8(icu::StringPiece(arg));
+
+  std::u32string rval;
+  size_t const capacity = ucs.countChar32();
+  rval.resize(capacity);
+
+  UErrorCode status = U_ZERO_ERROR;
+  ucs.toUTF32(reinterpret_cast<int*>(rval.data()), capacity, status);
+  // This should never occur - unless there's like an alloc error
+  if (U_FAILURE(status)) {
+    JVALIDATE_THROW(std::runtime_error, "UTF-32 Translation Error");
+  }
+
+  return rval;
+}
 }

+ 37 - 17
include/jvalidate/format.h

@@ -10,9 +10,15 @@
 
 #include <jvalidate/detail/pointer.h>
 #include <jvalidate/detail/relative_pointer.h>
+#include <jvalidate/detail/string.h>
 #include <jvalidate/forward.h>
 
+#define CONSTRUCTS(TYPE) format::ctor_as_valid<detail::TYPE>
+#define UTF32(FN) format::utf32<format::FN<char32_t>>
+
 namespace jvalidate::format::detail {
+using namespace jvalidate::detail;
+
 struct result {
   ptrdiff_t consumed;
   bool valid;
@@ -146,7 +152,7 @@ inline bool duration(std::string_view dur) {
 
 // Limitation - does not inspect graphemes, so it cannot check idn-hostname
 // to fix this - we'd need to
-inline bool hostname(std::string_view name) {
+template <typename CharT = char> inline bool hostname(std::basic_string_view<CharT> name) {
   auto hostname_part = [&name](size_t end) {
     if (end == 0 || end >= 64 || name[0] == '-' || name[end - 1] == '-') {
       return false;
@@ -244,28 +250,35 @@ inline bool ipv6(std::string_view ip) {
 // Therefore, there's no point in trying to validate things according to a
 // complex grammar - as long as it has an '@' sign with at least one character
 // on each side, we ought to call it an email.
-inline bool email(std::string_view em) {
+template <typename CharT = char> inline bool email(std::basic_string_view<CharT> em) {
   size_t n = em.find_last_of('@');
   if (n == 0 || n >= em.size() - 1) {
     return false;
   }
 
-  if (em[0] == '"' && em[n - 1] == '"') {
+  auto const who = em.substr(0, n);
+  if (who.starts_with('"') && who.ends_with('"')) {
     // No validation
-  } else if (em.substr(0, n).find("..") != std::string::npos || em[n - 1] == '.' || em[0] == '.') {
+  } else if (who.starts_with('.') || who.ends_with('.')) {
+    return false;
+  } else if (CharT const dots[3] = {'.', '.', '\0'}; em.substr(0, n).find(dots) != em.npos) {
     return false;
   }
 
-  em.remove_prefix(n + 1);
-  if (em.front() == '[' && em.back() == ']') {
-    em.remove_prefix(1);
-    em.remove_suffix(1);
-    if (em.starts_with("IPv6:")) {
-      return ipv6(std::string(em.substr(5)));
-    }
-    return ipv4(std::string(em)); // Re-acquire NULL-term
+  auto domain = em.substr(n + 1);
+  if (not(domain.starts_with('[') && domain.ends_with(']'))) {
+    return hostname(domain);
+  }
+  domain.remove_prefix(1);
+  domain.remove_suffix(1);
+
+  std::string ip(domain.size() + 1, '\0'); // Re-acquiring the NULL terminator
+  std::ranges::copy(domain, ip.begin());
+
+  if (ip.starts_with("IPv6:")) {
+    return ipv6(ip.substr(5));
   }
-  return hostname(em);
+  return ipv4(ip);
 }
 
 template <typename T> inline bool ctor_as_valid(std::string_view str) {
@@ -274,6 +287,10 @@ template <typename T> inline bool ctor_as_valid(std::string_view str) {
     return true;
   } catch (std::exception const &) { return false; }
 }
+
+template <auto Predicate> bool utf32(std::string_view str) {
+  return Predicate(detail::to_u32(str));
+}
 }
 
 namespace jvalidate {
@@ -289,14 +306,14 @@ private:
       {"duration", &format::duration},
       {"email", &format::email},
       {"hostname", &format::hostname},
-      {"idn-email", nullptr},
-      {"idn-hostname", nullptr},
+      {"idn-email", UTF32(email)},
+      {"idn-hostname", UTF32(hostname)},
       {"ipv4", &format::ipv4},
       {"ipv6", &format::ipv6},
       {"iri", nullptr},
       {"iri-reference", nullptr},
-      {"json-pointer", &format::ctor_as_valid<detail::Pointer>},
-      {"relative-json-pointer", &format::ctor_as_valid<detail::RelativePointer>},
+      {"json-pointer", CONSTRUCTS(Pointer)},
+      {"relative-json-pointer", CONSTRUCTS(RelativePointer)},
       {"regex", nullptr},
       {"time", &format::time},
       {"uri", nullptr},
@@ -320,3 +337,6 @@ public:
   }
 };
 }
+
+#undef CONSTRUCTS
+#undef UTF32