Browse Source

docs: add more explaination in format.h

Sam Jaffe 7 months ago
parent
commit
1c382e8a53
1 changed files with 17 additions and 3 deletions
  1. 17 3
      include/jvalidate/format.h

+ 17 - 3
include/jvalidate/format.h

@@ -201,6 +201,7 @@ template <typename CharT> bool is_uri_authority(std::basic_string_view<CharT> ur
   return ipv4(to_u8(uri)) || hostname(uri);
 }
 
+// Tests if a URI "Query Part" or "Fragment Part" is valid and remove the part
 template <typename CharT> bool test_uri_part(std::basic_string_view<CharT> & uri, char delim) {
   size_t const pos = uri.find(delim);
   if (pos == uri.npos) {
@@ -431,14 +432,24 @@ inline bool hostname_part(std::string_view name) {
 #else
 template <typename CharT> inline bool hostname_part(std::basic_string_view<CharT> name) {
   using delim = detail::char_delimiters<CharT>;
+  // Punycode is a way to restructure UTF-8 strings to be ASCII compatibly
+  // All Punycode string start with "xn--" (and would therefore fail below).
   if (name.starts_with(delim::punycode_prefix)) {
     std::u32string decoded = detail::to_u32(ada::idna::to_unicode(detail::to_u8(name)));
     return (decoded != detail::to_u32(name)) && hostname_part<char32_t>(decoded);
   }
 
+  // An INVALID hostname part is one of the following:
+  // - empty
+  // - more than 63 UTF-8 characters long
+  // - starts or ends with a '-'
+  // - matches the regular expression /^..--.*$/
   if (is_invalid_size_or_boundary_hostname(name)) {
     return false;
   }
+
+  // This is a much easier check in hostname than idn-hostname, since we can
+  // just check for alphanumeric and '-'.
   if constexpr (std::is_same_v<char, CharT>) {
     return std::ranges::none_of(name, [](char c) { return c != '-' && not std::isalnum(c); });
   } else {
@@ -514,8 +525,9 @@ inline bool ipv6(std::string_view ip) {
       return false;
     }
     // since ipv4 addresses contain 8 bytes of information, and each segment of
-    // an ipv6 address contains 4 bytes - we reduce the number of expected spans
-    // to 6. This can even accept IPv6 things like "::127.0.0.1" as valid IPv6.
+    // an ipv6 address contains 4 bytes - we should reduce the number of
+    // expected spans to 6. Instead - we reduce it to 7 because we don't prune
+    // the first OCTET of the IPv4 section (as it can read as a valid segment).
     expected_spans = 7;
     ip = ip.substr(0, n);
   }
@@ -541,7 +553,9 @@ inline bool ipv6(std::string_view ip) {
   while (!ip.empty() && ++groups) {
     int data;
     if (sscanf(ip.data(), "%4x", &data) != 1) {
-      return false; // Not a 4-byte HEXDIGIT
+      // Not a 4-byte HEXDIGIT. Not sure that it's ever possible due to the
+      // char filter above.
+      return false;
     }
 
     if (size_t const n = ip.find(':'); std::min(n, ip.size()) > 4) {