| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343 |
- #pragma once
- #include <cctype>
- #include <cstddef>
- #include <cstring>
- #include <ctime>
- #include <string>
- #include <string_view>
- #include <unordered_map>
- #include <utility>
- #include <jvalidate/detail/pointer.h>
- #include <jvalidate/detail/relative_pointer.h>
- #include <jvalidate/detail/string.h>
- #include <jvalidate/forward.h>
- #define CONSTRUCTS(TYPE) format::ctor_as_valid<detail::TYPE>
- #define UTF32(FN) format::utf32<format::FN<char32_t>>
- namespace jvalidate::format::detail {
- using namespace jvalidate::detail;
- struct result {
- ptrdiff_t consumed;
- bool valid;
- };
- inline bool is_leapyear(int y) { return (y % 400) == 0 || ((y % 4) == 0 && (y % 100) != 0); }
- inline bool illegal_date(int y, int m, int d) {
- static constexpr int days[] = {31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31};
- if (is_leapyear(y) && m == 1) {
- --d;
- }
- return d > days[m];
- }
- inline result date(std::string_view dt) {
- struct tm tm;
- if (auto end = strptime(dt.data(), "%Y-%m-%d", &tm); end) {
- if ((end - dt.data()) != 10 || illegal_date(tm.tm_year + 1900, tm.tm_mon, tm.tm_mday)) {
- return {.consumed = 0, .valid = false};
- }
- return {.consumed = end - dt.data(), .valid = true};
- }
- return {.consumed = 0L, .valid = false};
- }
- }
- namespace jvalidate::format {
- inline bool date(std::string_view dt) {
- auto [consumed, valid] = detail::date(dt);
- return valid && consumed == dt.size();
- }
- inline bool time(std::string_view dt) {
- struct tm tm;
- char const * end = strptime(dt.data(), "%T", &tm);
- if (end == nullptr || end == dt.end() || (end - dt.data()) < 8) {
- return false;
- }
- dt.remove_prefix(end - dt.begin());
- if (dt[0] == '.') {
- dt.remove_prefix(1);
- if (dt.empty() || not std::isdigit(dt[0])) {
- return false;
- }
- while (std::isdigit(dt[0])) {
- dt.remove_prefix(1);
- }
- }
- if (dt[0] == 'Z' || dt[0] == 'z') {
- return dt.size() == 1;
- }
- if (std::strchr("+-", dt[0])) {
- return strptime(dt.data() + 1, "%R", &tm) == dt.end();
- }
- return false;
- }
- inline bool date_time(std::string_view dt) {
- auto [size, good] = detail::date(dt);
- if (not good || std::strchr("Tt", dt[size]) == nullptr) {
- return false;
- }
- dt.remove_prefix(size + 1);
- return time(dt);
- }
- inline bool uuid(std::string_view id) {
- constexpr char const * g_hex_digits = "0123456789ABCDEFabcdef";
- constexpr size_t g_uuid_len = 36;
- constexpr size_t g_uuid_tokens = 5;
- char tok0[9], tok1[5], tok2[5], tok3[5], tok4[13];
- auto is_hex = [](std::string_view s) {
- return s.find_first_not_of(g_hex_digits) == std::string::npos;
- };
- return id.size() == g_uuid_len &&
- sscanf(id.data(), "%8s-%4s-%4s-%4s-%12s", tok0, tok1, tok2, tok3, tok4) == g_uuid_tokens &&
- is_hex(tok0) && is_hex(tok1) && is_hex(tok2) && is_hex(tok3) && is_hex(tok4);
- }
- inline bool duration(std::string_view dur) {
- auto eat = [&dur](std::string_view text) {
- char type;
- unsigned int rep;
- if (sscanf(dur.data(), "%u%c", &rep, &type) != 2 || text.find(type) == std::string::npos) {
- return std::string::npos;
- }
- dur.remove_prefix(dur.find(type) + 1);
- return text.find(type);
- };
- if (dur[0] != 'P' || dur.size() == 1) {
- return false;
- }
- dur.remove_prefix(1);
- if (dur[0] != 'T') {
- if (eat("W") != std::string::npos) {
- return dur.empty();
- }
- std::string_view ymd{"YMD"};
- while (not ymd.empty() && not dur.empty()) {
- if (size_t n = eat(ymd); n != std::string::npos) {
- ymd.remove_prefix(n + 1);
- } else {
- return false;
- }
- }
- if (dur.empty()) {
- return true;
- }
- }
- if (dur[0] != 'T' || dur.size() == 1) {
- return false;
- }
- dur.remove_prefix(1);
- std::string_view hms{"HMS"};
- while (not hms.empty() && not dur.empty()) {
- if (size_t n = eat(hms); n != std::string::npos) {
- hms.remove_prefix(n + 1);
- } else {
- return false;
- }
- }
- return dur.empty();
- }
- // Limitation - does not inspect graphemes, so it cannot check idn-hostname
- // to fix this - we'd need to
- template <typename CharT = char> inline bool hostname(std::basic_string_view<CharT> name) {
- auto hostname_part = [&name](size_t end) {
- if (end == 0 || end >= 64 || name[0] == '-' || name[end - 1] == '-') {
- return false;
- }
- for (size_t i = 0; i < end; ++i) {
- if (name[i] != '-' && not std::isalnum(name[i])) {
- return false;
- }
- }
- return true;
- };
- if (name.size() > (name.back() == '.' ? 254 : 253)) {
- return false;
- }
- for (size_t n = name.find('.'); n != std::string::npos;
- name.remove_prefix(n + 1), n = name.find('.')) {
- if (not hostname_part(n)) {
- return false;
- }
- }
- return name.empty() || hostname_part(name.size());
- }
- inline bool ipv4(std::string_view ip) {
- unsigned int ip0, ip1, ip2, ip3;
- char eof;
- if (ip.find_first_not_of("0123456789.") != std::string::npos) {
- return false;
- }
- if (ip[0] == '0' && std::isdigit(ip[1])) {
- return false;
- }
- if (size_t n = ip.find(".0"); n != std::string::npos && std::isdigit(ip[n + 2])) {
- return false;
- }
- if (sscanf(ip.data(), "%3u.%3u.%3u.%3u%c", &ip0, &ip1, &ip2, &ip3, &eof) != 4) {
- return false;
- }
- return ip0 <= 0xFF && ip1 <= 0xFF && ip2 <= 0xFF && ip3 <= 0xFF;
- }
- inline bool ipv6(std::string_view ip) {
- int expected_spans = 8;
- if (size_t n = ip.find('.'); n != std::string::npos) {
- if (not ipv4(ip.substr(ip.find_last_of(':') + 1))) {
- return false;
- }
- // This is a cheat to allow e.g. ::127.0.0.1 to validate
- expected_spans = 7;
- ip = ip.substr(0, n);
- }
- if (ip.find_first_not_of("0123456789ABCDEFabcdef:") != std::string::npos) {
- return false;
- }
- if (ip.size() >= 40) {
- return false;
- }
- bool has_compressed = false;
- int groups = 0;
- if (ip.starts_with("::")) {
- has_compressed = true;
- ip.remove_prefix(2);
- }
- while (!ip.empty()) {
- int data;
- if (sscanf(ip.data(), "%4x", &data) != 1) {
- return false;
- }
- if (size_t n = ip.find(':'); std::min(n, ip.size()) > 4) {
- return false;
- } else if (n != std::string::npos) {
- ip.remove_prefix(n + 1);
- } else {
- ip = "";
- }
- ++groups;
- if (ip[0] == ':') {
- if (std::exchange(has_compressed, true)) {
- return false;
- }
- ip.remove_prefix(1);
- }
- }
- return groups == expected_spans || (has_compressed && groups < expected_spans);
- }
- // Let's be honest - no matter what RFC 5321 §4.1.2 or RFC 6531 say, the only
- // way to know if an email address is valid is to try and send a message to it.
- // Therefore, there's no point in trying to validate things according to a
- // complex grammar - as long as it has an '@' sign with at least one character
- // on each side, we ought to call it an email.
- template <typename CharT = char> inline bool email(std::basic_string_view<CharT> em) {
- size_t n = em.find_last_of('@');
- if (n == 0 || n >= em.size() - 1) {
- return false;
- }
- auto const who = em.substr(0, n);
- if (who.starts_with('"') && who.ends_with('"')) {
- // No validation
- } else if (who.starts_with('.') || who.ends_with('.')) {
- return false;
- } else if (CharT const dots[3] = {'.', '.', '\0'}; em.substr(0, n).find(dots) != em.npos) {
- return false;
- }
- auto domain = em.substr(n + 1);
- if (not(domain.starts_with('[') && domain.ends_with(']'))) {
- return hostname(domain);
- }
- domain.remove_prefix(1);
- domain.remove_suffix(1);
- std::string ip(domain.size(), '\0'); // Re-acquiring the NULL terminator
- std::ranges::copy(domain, ip.begin());
- if (ip.starts_with("IPv6:")) {
- return ipv6(ip.substr(5));
- }
- return ipv4(ip);
- }
- template <typename T> inline bool ctor_as_valid(std::string_view str) {
- try {
- [[maybe_unused]] auto _ = T(str);
- return true;
- } catch (std::exception const &) { return false; }
- }
- template <auto Predicate> bool utf32(std::string_view str) {
- return Predicate(detail::to_u32(str));
- }
- }
- namespace jvalidate {
- class FormatValidator {
- public:
- using Predicate = bool (*)(std::string_view);
- enum class Status { Unknown, Unimplemented, Valid, Invalid };
- private:
- std::unordered_map<std::string, Predicate> supported_formats_{
- {"date", &format::date},
- {"date-time", &format::date_time},
- {"duration", &format::duration},
- {"email", &format::email},
- {"hostname", &format::hostname},
- {"idn-email", UTF32(email)},
- {"idn-hostname", UTF32(hostname)},
- {"ipv4", &format::ipv4},
- {"ipv6", &format::ipv6},
- {"iri", nullptr},
- {"iri-reference", nullptr},
- {"json-pointer", CONSTRUCTS(Pointer)},
- {"relative-json-pointer", CONSTRUCTS(RelativePointer)},
- {"regex", nullptr},
- {"time", &format::time},
- {"uri", nullptr},
- {"uri-reference", nullptr},
- {"uri-template", nullptr},
- {"uuid", &format::uuid},
- };
- public:
- FormatValidator() = default;
- FormatValidator(Predicate is_regex) { supported_formats_.insert_or_assign("regex", is_regex); }
- Status operator()(std::string const & format, std::string_view text) const {
- if (auto it = supported_formats_.find(format); it != supported_formats_.end() && it->second) {
- if (not it->second) {
- return Status::Unimplemented;
- }
- return it->second(text) ? Status::Valid : Status::Invalid;
- }
- return Status::Unknown;
- }
- };
- }
- #undef CONSTRUCTS
- #undef UTF32
|