#pragma once #include #include #include #include #include #include #include #include namespace jvalidate::format::detail { inline bool is_leapyear(int y) { return (y % 400) == 0 || ((y % 4) == 0 && (y % 100) != 0); } inline bool illegal_date(int y, int m, int d) { static constexpr int days[] = {31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}; if (is_leapyear(y) && m == 1) { --d; } return d > days[m]; } inline auto date(std::string_view dt) { struct tm tm; if (auto end = strptime(dt.data(), "%Y-%m-%d", &tm); end) { if ((end - dt.data()) != 10 || illegal_date(tm.tm_year + 1900, tm.tm_mon, tm.tm_mday)) { return std::make_pair(0L, false); } return std::make_pair(end - dt.data(), true); } return std::make_pair(0L, false); } } namespace jvalidate::format { inline bool date(std::string_view dt) { auto [size, good] = detail::date(dt); return good && size == dt.size(); } inline bool time(std::string_view dt) { struct tm tm; char const * end = strptime(dt.data(), "%T", &tm); if (end == nullptr || end == dt.end() || (end - dt.data()) < 8) { return false; } dt.remove_prefix(end - dt.begin()); if (dt[0] == '.') { dt.remove_prefix(1); if (dt.empty() || not std::isdigit(dt[0])) { return false; } while (std::isdigit(dt[0])) { dt.remove_prefix(1); } } if (dt[0] == 'Z' || dt[0] == 'z') { return dt.size() == 1; } if (std::strchr("+-", dt[0])) { return strptime(dt.data() + 1, "%R", &tm) == dt.end(); } return false; } inline bool date_time(std::string_view dt) { auto [size, good] = detail::date(dt); if (not good || std::strchr("Tt", dt[size]) == nullptr) { return false; } dt.remove_prefix(size + 1); return time(dt); } inline bool uuid(std::string_view id) { constexpr char const * g_hex_digits = "0123456789ABCDEFabcdef"; constexpr size_t g_uuid_len = 36; constexpr size_t g_uuid_tokens = 5; char tok0[9], tok1[5], tok2[5], tok3[5], tok4[13]; auto is_hex = [](std::string_view s) { return s.find_first_not_of(g_hex_digits) == std::string::npos; }; return id.size() == g_uuid_len && sscanf(id.data(), "%8s-%4s-%4s-%4s-%12s", tok0, tok1, tok2, tok3, tok4) == g_uuid_tokens && is_hex(tok0) && is_hex(tok1) && is_hex(tok2) && is_hex(tok3) && is_hex(tok4); } inline bool duration(std::string_view dur) { auto eat = [&dur](std::string_view text) { char type; unsigned int rep; if (sscanf(dur.data(), "%u%c", &rep, &type) != 2 || text.find(type) == std::string::npos) { return std::string::npos; } dur.remove_prefix(dur.find(type) + 1); return text.find(type); }; if (dur[0] != 'P' || dur.size() == 1) { return false; } dur.remove_prefix(1); if (dur[0] != 'T') { if (eat("W") != std::string::npos) { return dur.empty(); } std::string_view ymd{"YMD"}; while (not ymd.empty() && not dur.empty()) { if (size_t n = eat(ymd); n != std::string::npos) { ymd.remove_prefix(n + 1); } else { return false; } } if (dur.empty()) { return true; } } if (dur[0] != 'T' || dur.size() == 1) { return false; } dur.remove_prefix(1); std::string_view hms{"HMS"}; while (not hms.empty() && not dur.empty()) { if (size_t n = eat(hms); n != std::string::npos) { hms.remove_prefix(n + 1); } else { return false; } } return dur.empty(); } // Limitation - does not inspect graphemes, so it cannot check idn-hostname // to fix this - we'd need to inline bool hostname(std::string_view name) { auto hostname_part = [&name](size_t end) { if (end == 0 || end >= 64 || name[0] == '-' || name[end - 1] == '-') { return false; } for (size_t i = 0; i < end; ++i) { if (name[i] != '-' && not std::isalnum(name[i])) { return false; } } return true; }; if (name.size() > (name.back() == '.' ? 254 : 253)) { return false; } for (size_t n = name.find('.'); n != std::string::npos; name.remove_prefix(n + 1), n = name.find('.')) { if (not hostname_part(n)) { return false; } } return name.empty() || hostname_part(name.size()); } inline bool ipv4(std::string_view ip) { unsigned int ip0, ip1, ip2, ip3; char eof; if (ip.find_first_not_of("0123456789.") != std::string::npos) { return false; } if (ip[0] == '0' && std::isdigit(ip[1])) { return false; } if (size_t n = ip.find(".0"); n != std::string::npos && std::isdigit(ip[n + 2])) { return false; } if (sscanf(ip.data(), "%3u.%3u.%3u.%3u%c", &ip0, &ip1, &ip2, &ip3, &eof) != 4) { return false; } return ip0 <= 0xFF && ip1 <= 0xFF && ip2 <= 0xFF && ip3 <= 0xFF; } inline bool ipv6(std::string_view ip) { int expected_spans = 8; if (size_t n = ip.find('.'); n != std::string::npos) { if (not ipv4(ip.substr(ip.find_last_of(':') + 1))) { return false; } // This is a cheat to allow e.g. ::127.0.0.1 to validate expected_spans = 7; ip = ip.substr(0, n); } if (ip.find_first_not_of("0123456789ABCDEFabcdef:") != std::string::npos) { return false; } if (ip.size() >= 40) { return false; } bool has_compressed = false; int groups = 0; if (ip.starts_with("::")) { has_compressed = true; ip.remove_prefix(2); } while (!ip.empty()) { int data; if (sscanf(ip.data(), "%4x", &data) != 1) { return false; } if (size_t n = ip.find(':'); std::min(n, ip.size()) > 4) { return false; } else if (n != std::string::npos) { ip.remove_prefix(n + 1); } else { ip = ""; } ++groups; if (ip[0] == ':') { if (std::exchange(has_compressed, true)) { return false; } ip.remove_prefix(1); } } return groups == expected_spans || (has_compressed && groups < expected_spans); } // Let's be honest - no matter what RFC 5321 ยง4.1.2 or RFC 6531 say, the only // way to know if an email address is valid is to try and send a message to it. // Therefore, there's no point in trying to validate things according to a // complex grammar - as long as it has an '@' sign with at least one character // on each side, we ought to call it an email. inline bool email(std::string_view em) { size_t n = em.find_last_of('@'); if (n == 0 || n >= em.size() - 1) { return false; } if (em[0] == '"' && em[n - 1] == '"') { // No validation } else if (em.substr(0, n).find("..") != std::string::npos || em[n - 1] == '.' || em[0] == '.') { return false; } em.remove_prefix(n + 1); if (em.front() == '[' && em.back() == ']') { em.remove_prefix(1); em.remove_suffix(1); if (em.starts_with("IPv6:")) { return ipv6(std::string(em.substr(5))); } return ipv4(std::string(em)); // Re-acquire NULL-term } return hostname(em); } } namespace jvalidate { class FormatValidator { public: using Predicate = bool (*)(std::string_view); enum class Status { Unknown, Unimplemented, Valid, Invalid }; private: std::unordered_map supported_formats_{ {"date", &format::date}, {"date-time", &format::date_time}, {"duration", &format::duration}, {"email", &format::email}, {"hostname", &format::hostname}, {"idn-email", nullptr}, {"idn-hostname", nullptr}, {"ipv4", &format::ipv4}, {"ipv6", &format::ipv6}, {"iri", nullptr}, {"iri-reference", nullptr}, {"json-pointer", nullptr}, {"relative-json-pointer", nullptr}, /* {"regex", &detail::StdRegexEngine::is_valid}, */ {"time", &format::time}, {"uri", nullptr}, {"uri-reference", nullptr}, {"uri-template", nullptr}, {"uuid", &format::uuid}, }; public: FormatValidator() = default; Status operator()(std::string const & format, std::string_view text) const { if (auto it = supported_formats_.find(format); it != supported_formats_.end() && it->second) { if (not it->second) { return Status::Unimplemented; } return it->second(text) ? Status::Valid : Status::Invalid; } return Status::Unknown; } }; }