|
|
@@ -201,6 +201,7 @@ template <typename CharT> bool is_uri_authority(std::basic_string_view<CharT> ur
|
|
|
return ipv4(to_u8(uri)) || hostname(uri);
|
|
|
}
|
|
|
|
|
|
+// Tests if a URI "Query Part" or "Fragment Part" is valid and remove the part
|
|
|
template <typename CharT> bool test_uri_part(std::basic_string_view<CharT> & uri, char delim) {
|
|
|
size_t const pos = uri.find(delim);
|
|
|
if (pos == uri.npos) {
|
|
|
@@ -431,14 +432,24 @@ inline bool hostname_part(std::string_view name) {
|
|
|
#else
|
|
|
template <typename CharT> inline bool hostname_part(std::basic_string_view<CharT> name) {
|
|
|
using delim = detail::char_delimiters<CharT>;
|
|
|
+ // Punycode is a way to restructure UTF-8 strings to be ASCII compatibly
|
|
|
+ // All Punycode string start with "xn--" (and would therefore fail below).
|
|
|
if (name.starts_with(delim::punycode_prefix)) {
|
|
|
std::u32string decoded = detail::to_u32(ada::idna::to_unicode(detail::to_u8(name)));
|
|
|
return (decoded != detail::to_u32(name)) && hostname_part<char32_t>(decoded);
|
|
|
}
|
|
|
|
|
|
+ // An INVALID hostname part is one of the following:
|
|
|
+ // - empty
|
|
|
+ // - more than 63 UTF-8 characters long
|
|
|
+ // - starts or ends with a '-'
|
|
|
+ // - matches the regular expression /^..--.*$/
|
|
|
if (is_invalid_size_or_boundary_hostname(name)) {
|
|
|
return false;
|
|
|
}
|
|
|
+
|
|
|
+ // This is a much easier check in hostname than idn-hostname, since we can
|
|
|
+ // just check for alphanumeric and '-'.
|
|
|
if constexpr (std::is_same_v<char, CharT>) {
|
|
|
return std::ranges::none_of(name, [](char c) { return c != '-' && not std::isalnum(c); });
|
|
|
} else {
|
|
|
@@ -514,8 +525,9 @@ inline bool ipv6(std::string_view ip) {
|
|
|
return false;
|
|
|
}
|
|
|
// since ipv4 addresses contain 8 bytes of information, and each segment of
|
|
|
- // an ipv6 address contains 4 bytes - we reduce the number of expected spans
|
|
|
- // to 6. This can even accept IPv6 things like "::127.0.0.1" as valid IPv6.
|
|
|
+ // an ipv6 address contains 4 bytes - we should reduce the number of
|
|
|
+ // expected spans to 6. Instead - we reduce it to 7 because we don't prune
|
|
|
+ // the first OCTET of the IPv4 section (as it can read as a valid segment).
|
|
|
expected_spans = 7;
|
|
|
ip = ip.substr(0, n);
|
|
|
}
|
|
|
@@ -541,7 +553,9 @@ inline bool ipv6(std::string_view ip) {
|
|
|
while (!ip.empty() && ++groups) {
|
|
|
int data;
|
|
|
if (sscanf(ip.data(), "%4x", &data) != 1) {
|
|
|
- return false; // Not a 4-byte HEXDIGIT
|
|
|
+ // Not a 4-byte HEXDIGIT. Not sure that it's ever possible due to the
|
|
|
+ // char filter above.
|
|
|
+ return false;
|
|
|
}
|
|
|
|
|
|
if (size_t const n = ip.find(':'); std::min(n, ip.size()) > 4) {
|