Selaa lähdekoodia

fix: update format code to account for new tests

This contains the following changes:
* hostname: forbid empty
* hostname: forbid the final character in a hostname to be '.'
* hostname: allow idn-hostnames and punycode to use other dot characters
* hostname: relax size rules to check length in the native width instead
  of UTF8
* hostname: check for illegal special cases when performing punycode
  transforms, instead of before
* email: forbid multiple emails
Sam Jaffe 2 viikkoa sitten
vanhempi
commit
09bdb0a972
3 muutettua tiedostoa jossa 30 lisäystä ja 22 poistoa
  1. 7 0
      include/jvalidate/detail/idna_special_cases.h
  2. 23 21
      include/jvalidate/format.h
  3. 0 1
      tests/CMakeLists.txt

+ 7 - 0
include/jvalidate/detail/idna_special_cases.h

@@ -99,6 +99,7 @@ using std::string_view_literals::operator""sv;
 template <typename CharT> struct char_delimiters;
 
 template <> struct char_delimiters<char> {
+  static constexpr std::string_view hostname_part_delims{"."};
   static constexpr std::string_view dotdot{".."};
   static constexpr std::string_view double_slash{"//"};
   static constexpr std::string_view illegal_hostname_chars;
@@ -108,6 +109,7 @@ template <> struct char_delimiters<char> {
 };
 
 template <> struct char_delimiters<char32_t> {
+  static constexpr std::u32string_view hostname_part_delims{U".\u3002\uff0e\uff61"};
   static constexpr std::u32string_view dotdot{U".."};
   static constexpr std::u32string_view double_slash{U"//"};
   static constexpr std::u32string_view punycode_prefix{U"xn--"};
@@ -122,4 +124,9 @@ template <> struct char_delimiters<char32_t> {
       special_case{g_exception_chars, before_or_after<is_not_disallowed_exception>},
   };
 };
+
+template <typename CharT> bool is_special_case_ok(std::basic_string_view<CharT> name) {
+  return std::ranges::all_of(detail::char_delimiters<CharT>::special_cases,
+                             [name](auto & sc) { return sc.accepts(name); });
+}
 }

+ 23 - 21
include/jvalidate/format.h

@@ -484,7 +484,7 @@ inline bool duration(std::string_view dur) {
 template <typename CharT>
 bool is_invalid_size_or_boundary_hostname(std::basic_string_view<CharT> name) {
   using delim = detail::char_delimiters<CharT>;
-  return (name.empty() || detail::length_u8(name) >= 64 ||
+  return (name.empty() || name.length() >= 64 ||
           (name.size() >= 4 && name.substr(2).starts_with(delim::illegal_dashes_ulabel)) ||
           name[0] == '-' || name.back() == '-');
 }
@@ -508,9 +508,19 @@ template <typename CharT> inline bool hostname_part(std::basic_string_view<CharT
     return (decoded != detail::to_u32(name)) && hostname_part<char32_t>(decoded);
   }
 
+  // Unfortunately, the ada-idna library does not validate things like
+  // "is there a HEBREW character after the HEBREW COMMA".
+  if (not detail::is_special_case_ok(name)) {
+    return false;
+  }
+
+  if (name.find_first_of(delim::illegal_hostname_chars) != name.npos) {
+    return false;
+  }
+
   // An INVALID hostname part is one of the following:
   // - empty
-  // - more than 63 UTF-8 characters long
+  // - more than 63 characters long
   // - starts or ends with a '-'
   // - matches the regular expression /^..--.*$/
   if (is_invalid_size_or_boundary_hostname(name)) {
@@ -529,34 +539,22 @@ template <typename CharT> inline bool hostname_part(std::basic_string_view<CharT
 
 template <typename CharT> inline bool hostname(std::basic_string_view<CharT> name) {
   using delim = detail::char_delimiters<CharT>;
-  if (name.find_first_of(delim::illegal_hostname_chars) != name.npos) {
-    return false;
-  }
-
-  // In general, the maximum length of a hostname is 253 UTF-8 characters.
-  if (detail::to_u8(name).size() > (name.back() == '.' ? 254 : 253)) {
-    return false;
-  }
-
-  // Unfortunately, the ada-idna library does not validate things like
-  // "is there a HEBREW character after the HEBREW COMMA".
-  if (not std::ranges::all_of(delim::special_cases,
-                              [name](auto & sc) { return sc.accepts(name); })) {
+  // In general, the maximum length of a hostname is 253 characters.
+  if (name.empty() || name.length() > 253) {
     return false;
   }
 
   // We validate each sub-section of the hostname in parts, delimited by '.'
-  for (size_t n = name.find('.'); n != std::string::npos;
-       name.remove_prefix(n + 1), n = name.find('.')) {
+  for (size_t n = name.find_first_of(delim::hostname_part_delims); n != std::string::npos;
+       name.remove_prefix(n + 1), n = name.find_first_of(delim::hostname_part_delims)) {
     if (not hostname_part(name.substr(0, n))) {
       return false;
     }
   }
 
-  // name.empty() would be true only if the final character in the input name
-  // was '.', this is the only empty hostname part that we allow. Otherwise, we
-  // have a trailing hostname_part.
-  return name.empty() || hostname_part(name);
+  // Previous test versions allowed for a hostname to end with '.', but this is
+  // not permitted in the latest test specification.
+  return hostname_part(name);
 }
 
 inline bool ipv4(std::string_view ip) {
@@ -669,6 +667,10 @@ template <typename CharT> inline bool email(std::basic_string_view<CharT> em) {
     return false;
   } else if (em.substr(0, n).find(delim::dotdot) != em.npos) {
     return false;
+  } else if (who.find('@') != em.npos) {
+    // This will catch multiple emails, but will gracefully ignore quote-escaped
+    // '@' characters in the name element.
+    return false;
   }
 
   // The DOMAIN section of an email address MAY be either a HOSTNAME, or an

+ 0 - 1
tests/CMakeLists.txt

@@ -57,7 +57,6 @@ string(
   "*optional_content"
   "*optional_*ecmascript_regex"
   "*optional_zeroTerminatedFloats"
-  "*optional_format*"
 )
 if (NOT ${ICU_FOUND})
   string(