Ver Fonte

feat: implement iri format testing

Sam Jaffe há 7 meses atrás
pai
commit
da0d0a1dd8

+ 2 - 0
include/jvalidate/detail/idna_special_cases.h

@@ -100,6 +100,7 @@ template <typename CharT> struct char_delimiters;
 
 
 template <> struct char_delimiters<char> {
 template <> struct char_delimiters<char> {
   static constexpr std::string_view dotdot{".."};
   static constexpr std::string_view dotdot{".."};
+  static constexpr std::string_view double_slash{"//"};
   static constexpr std::string_view illegal_hostname_chars;
   static constexpr std::string_view illegal_hostname_chars;
   static constexpr std::string_view punycode_prefix{"xn--"};
   static constexpr std::string_view punycode_prefix{"xn--"};
   static constexpr std::string_view illegal_dashes_ulabel{"--"};
   static constexpr std::string_view illegal_dashes_ulabel{"--"};
@@ -108,6 +109,7 @@ template <> struct char_delimiters<char> {
 
 
 template <> struct char_delimiters<char32_t> {
 template <> struct char_delimiters<char32_t> {
   static constexpr std::u32string_view dotdot{U".."};
   static constexpr std::u32string_view dotdot{U".."};
+  static constexpr std::u32string_view double_slash{U"//"};
   static constexpr std::u32string_view punycode_prefix{U"xn--"};
   static constexpr std::u32string_view punycode_prefix{U"xn--"};
   static constexpr std::u32string_view illegal_dashes_ulabel{U"--"};
   static constexpr std::u32string_view illegal_dashes_ulabel{U"--"};
   static constexpr std::u32string_view illegal_hostname_chars{U"\u302E"};
   static constexpr std::u32string_view illegal_hostname_chars{U"\u302E"};

+ 25 - 9
include/jvalidate/format.h

@@ -33,7 +33,7 @@ bool time(std::string_view dt);
 bool date_time(std::string_view dt);
 bool date_time(std::string_view dt);
 bool duration(std::string_view dur);
 bool duration(std::string_view dur);
 
 
-bool uri(std::string_view uri);
+template <typename CharT = char> bool uri(std::basic_string_view<CharT> uri);
 bool uuid(std::string_view id);
 bool uuid(std::string_view id);
 template <typename CharT = char> bool hostname(std::basic_string_view<CharT> name);
 template <typename CharT = char> bool hostname(std::basic_string_view<CharT> name);
 
 
@@ -85,10 +85,25 @@ inline bool is_leapsecond(std::tm tm) {
 #endif
 #endif
 }
 }
 
 
-inline bool is_pchar(std::string_view part, size_t & pos,
+inline bool is_uschar(int c) {
+  using P = std::pair<int, int>;
+  constexpr std::array data{
+      P{0xA0, 0xD7FF},     P{0xF900, 0xFDCF},   P{0xFDF0, 0xFFEF},   P{0x10000, 0x1FFFD},
+      P{0x20000, 0x2FFFD}, P{0x30000, 0x3FFFD}, P{0x40000, 0x4FFFD}, P{0x50000, 0x5FFFD},
+      P{0x60000, 0x6FFFD}, P{0x70000, 0x7FFFD}, P{0x80000, 0x8FFFD}, P{0x90000, 0x9FFFD},
+      P{0xA0000, 0xAFFFD}, P{0xB0000, 0xBFFFD}, P{0xC0000, 0xCFFFD}, P{0xD0000, 0xDFFFD},
+      P{0xE0000, 0xEFFFD},
+  };
+  return std::ranges::any_of(data,
+                             [c](auto & pair) { return c >= pair.first && c <= pair.second; });
+}
+
+template <typename CharT>
+inline bool is_pchar(std::basic_string_view<CharT> part, size_t & pos,
                      std::string_view extra_valid_chars = ":@") {
                      std::string_view extra_valid_chars = ":@") {
   constexpr char const * g_hex_digits = "0123456789ABCDEFabcdef";
   constexpr char const * g_hex_digits = "0123456789ABCDEFabcdef";
-  if (std::isalnum(part[pos]) || std::strchr("-._~!$&'()*+,;=", part[pos])) {
+  if (std::isalnum(part[pos]) || is_uschar(part[pos]) ||
+      std::strchr("-._~!$&'()*+,;=", part[pos])) {
     return true;
     return true;
   }
   }
   if (part[pos] == '%') {
   if (part[pos] == '%') {
@@ -97,7 +112,7 @@ inline bool is_pchar(std::string_view part, size_t & pos,
   return extra_valid_chars.find(part[pos]) != part.npos;
   return extra_valid_chars.find(part[pos]) != part.npos;
 };
 };
 
 
-inline bool is_uri_authority(std::string_view uri) {
+template <typename CharT> inline bool is_uri_authority(std::basic_string_view<CharT> uri) {
   if (size_t pos = uri.find('@'); pos != uri.npos && pos < uri.find('/')) {
   if (size_t pos = uri.find('@'); pos != uri.npos && pos < uri.find('/')) {
     for (size_t i = 0; i < pos; ++i) {
     for (size_t i = 0; i < pos; ++i) {
       if (not is_pchar(uri, i, ":")) {
       if (not is_pchar(uri, i, ":")) {
@@ -110,7 +125,7 @@ inline bool is_uri_authority(std::string_view uri) {
     size_t pos = uri.find(']');
     size_t pos = uri.find(']');
     auto ip = uri.substr(1, pos - 1);
     auto ip = uri.substr(1, pos - 1);
     uri.remove_prefix(pos + 1);
     uri.remove_prefix(pos + 1);
-    if (not ipv6(ip)) {
+    if (not ipv6(to_u8(ip))) {
       return false;
       return false;
     }
     }
   }
   }
@@ -120,7 +135,7 @@ inline bool is_uri_authority(std::string_view uri) {
     }
     }
     uri.remove_suffix(uri.size() - pos + 1);
     uri.remove_suffix(uri.size() - pos + 1);
   }
   }
-  return ipv4(uri) || hostname(uri);
+  return ipv4(to_u8(uri)) || hostname(uri);
 }
 }
 }
 }
 
 
@@ -165,7 +180,8 @@ inline bool date_time(std::string_view dt) {
   return time(dt);
   return time(dt);
 }
 }
 
 
-inline bool uri(std::string_view uri) {
+template <typename CharT> inline bool uri(std::basic_string_view<CharT> uri) {
+  using delim = detail::char_delimiters<CharT>;
   auto test_uri_part = [&uri](char delim) {
   auto test_uri_part = [&uri](char delim) {
     size_t const pos = uri.find(delim);
     size_t const pos = uri.find(delim);
     if (pos == uri.npos) {
     if (pos == uri.npos) {
@@ -194,7 +210,7 @@ inline bool uri(std::string_view uri) {
   RETURN_UNLESS(test_uri_part('?'), false);
   RETURN_UNLESS(test_uri_part('?'), false);
 
 
   auto path = uri;
   auto path = uri;
-  if (uri.starts_with("//")) {
+  if (uri.starts_with(delim::double_slash)) {
     uri.remove_prefix(2);
     uri.remove_prefix(2);
     path = uri.substr(std::min(uri.size(), uri.find('/')));
     path = uri.substr(std::min(uri.size(), uri.find('/')));
     uri.remove_suffix(path.size());
     uri.remove_suffix(path.size());
@@ -474,7 +490,7 @@ private:
       {"idn-hostname", UTF32(hostname)},
       {"idn-hostname", UTF32(hostname)},
       {"ipv4", &format::ipv4},
       {"ipv4", &format::ipv4},
       {"ipv6", &format::ipv6},
       {"ipv6", &format::ipv6},
-      {"iri", nullptr},
+      {"iri", UTF32(uri)},
       {"iri-reference", nullptr},
       {"iri-reference", nullptr},
       {"json-pointer", CONSTRUCTS(Pointer)},
       {"json-pointer", CONSTRUCTS(Pointer)},
       {"relative-json-pointer", CONSTRUCTS(RelativePointer)},
       {"relative-json-pointer", CONSTRUCTS(RelativePointer)},