| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307 |
- #pragma once
- #include <cctype>
- #include <cstddef>
- #include <ctime>
- #include <iostream>
- #include <string>
- #include <unordered_map>
- #include <utility>
- #include <jvalidate/forward.h>
- namespace jvalidate::format::detail {
- inline bool is_leapyear(int y) { return (y % 400) == 0 || ((y % 4) == 0 && (y % 100) != 0); }
- inline bool illegal_date(int y, int m, int d) {
- static constexpr int days[] = {31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31};
- if (is_leapyear(y) && m == 1) {
- --d;
- }
- return d > days[m];
- }
- inline auto date(std::string_view dt) {
- struct tm tm;
- if (auto end = strptime(dt.data(), "%Y-%m-%d", &tm); end) {
- if ((end - dt.data()) != 10 || illegal_date(tm.tm_year + 1900, tm.tm_mon, tm.tm_mday)) {
- return std::make_pair(0L, false);
- }
- return std::make_pair(end - dt.data(), true);
- }
- return std::make_pair(0L, false);
- }
- }
- namespace jvalidate::format {
- inline bool date(std::string_view dt) {
- auto [size, good] = detail::date(dt);
- return good && size == dt.size();
- }
- inline bool time(std::string_view dt) {
- struct tm tm;
- char const * end = strptime(dt.data(), "%T", &tm);
- if (end == nullptr || end == dt.end() || (end - dt.data()) < 8) {
- return false;
- }
- dt.remove_prefix(end - dt.begin());
- if (dt[0] == '.') {
- dt.remove_prefix(1);
- if (dt.empty() || not std::isdigit(dt[0])) {
- return false;
- }
- while (std::isdigit(dt[0])) {
- dt.remove_prefix(1);
- }
- }
- if (dt[0] == 'Z' || dt[0] == 'z') {
- return dt.size() == 1;
- }
- if (std::strchr("+-", dt[0])) {
- return strptime(dt.data() + 1, "%R", &tm) == dt.end();
- }
- return false;
- }
- inline bool date_time(std::string_view dt) {
- auto [size, good] = detail::date(dt);
- if (not good || std::strchr("Tt", dt[size]) == nullptr) {
- return false;
- }
- dt.remove_prefix(size + 1);
- return time(dt);
- }
- inline bool uuid(std::string_view id) {
- constexpr char const * g_hex_digits = "0123456789ABCDEFabcdef";
- constexpr size_t g_uuid_len = 36;
- constexpr size_t g_uuid_tokens = 5;
- char tok0[9], tok1[5], tok2[5], tok3[5], tok4[13];
- auto is_hex = [](std::string_view s) {
- return s.find_first_not_of(g_hex_digits) == std::string::npos;
- };
- return id.size() == g_uuid_len &&
- sscanf(id.data(), "%8s-%4s-%4s-%4s-%12s", tok0, tok1, tok2, tok3, tok4) == g_uuid_tokens &&
- is_hex(tok0) && is_hex(tok1) && is_hex(tok2) && is_hex(tok3) && is_hex(tok4);
- }
- inline bool duration(std::string_view dur) {
- auto eat = [&dur](std::string_view text) {
- char type;
- unsigned int rep;
- if (sscanf(dur.data(), "%u%c", &rep, &type) != 2 || text.find(type) == std::string::npos) {
- return std::string::npos;
- }
- dur.remove_prefix(dur.find(type) + 1);
- return text.find(type);
- };
- if (dur[0] != 'P' || dur.size() == 1) {
- return false;
- }
- dur.remove_prefix(1);
- if (dur[0] != 'T') {
- if (eat("W") != std::string::npos) {
- return dur.empty();
- }
- std::string_view ymd{"YMD"};
- while (not ymd.empty() && not dur.empty()) {
- if (size_t n = eat(ymd); n != std::string::npos) {
- ymd.remove_prefix(n + 1);
- } else {
- return false;
- }
- }
- if (dur.empty()) {
- return true;
- }
- }
- if (dur[0] != 'T' || dur.size() == 1) {
- return false;
- }
- dur.remove_prefix(1);
- std::string_view hms{"HMS"};
- while (not hms.empty() && not dur.empty()) {
- if (size_t n = eat(hms); n != std::string::npos) {
- hms.remove_prefix(n + 1);
- } else {
- return false;
- }
- }
- return dur.empty();
- }
- // Limitation - does not inspect graphemes, so it cannot check idn-hostname
- // to fix this - we'd need to
- inline bool hostname(std::string_view name) {
- auto hostname_part = [&name](size_t end) {
- if (end == 0 || end >= 64 || name[0] == '-' || name[end - 1] == '-') {
- return false;
- }
- for (size_t i = 0; i < end; ++i) {
- if (name[i] != '-' && not std::isalnum(name[i])) {
- return false;
- }
- }
- return true;
- };
- if (name.size() > (name.back() == '.' ? 254 : 253)) {
- return false;
- }
- for (size_t n = name.find('.'); n != std::string::npos;
- name.remove_prefix(n + 1), n = name.find('.')) {
- if (not hostname_part(n)) {
- return false;
- }
- }
- return name.empty() || hostname_part(name.size());
- }
- inline bool ipv4(std::string_view ip) {
- unsigned int ip0, ip1, ip2, ip3;
- char eof;
- if (ip.find_first_not_of("0123456789.") != std::string::npos) {
- return false;
- }
- if (ip[0] == '0' && std::isdigit(ip[1])) {
- return false;
- }
- if (size_t n = ip.find(".0"); n != std::string::npos && std::isdigit(ip[n + 2])) {
- return false;
- }
- if (sscanf(ip.data(), "%3u.%3u.%3u.%3u%c", &ip0, &ip1, &ip2, &ip3, &eof) != 4) {
- return false;
- }
- return ip0 <= 0xFF && ip1 <= 0xFF && ip2 <= 0xFF && ip3 <= 0xFF;
- }
- inline bool ipv6(std::string_view ip) {
- int expected_spans = 8;
- if (size_t n = ip.find('.'); n != std::string::npos) {
- if (not ipv4(ip.substr(ip.find_last_of(':') + 1))) {
- return false;
- }
- // This is a cheat to allow e.g. ::127.0.0.1 to validate
- expected_spans = 7;
- ip = ip.substr(0, n);
- }
- if (ip.find_first_not_of("0123456789ABCDEFabcdef:") != std::string::npos) {
- return false;
- }
- if (ip.size() >= 40) {
- return false;
- }
- bool has_compressed = false;
- int groups = 0;
- if (ip.starts_with("::")) {
- has_compressed = true;
- ip.remove_prefix(2);
- }
- while (!ip.empty()) {
- int data;
- if (sscanf(ip.data(), "%4x", &data) != 1) {
- return false;
- }
- if (size_t n = ip.find(':'); std::min(n, ip.size()) > 4) {
- return false;
- } else if (n != std::string::npos) {
- ip.remove_prefix(n + 1);
- } else {
- ip = "";
- }
- ++groups;
- if (ip[0] == ':') {
- if (std::exchange(has_compressed, true)) {
- return false;
- }
- ip.remove_prefix(1);
- }
- }
- return groups == expected_spans || (has_compressed && groups < expected_spans);
- }
- // Let's be honest - no matter what RFC 5321 §4.1.2 or RFC 6531 say, the only
- // way to know if an email address is valid is to try and send a message to it.
- // Therefore, there's no point in trying to validate things according to a
- // complex grammar - as long as it has an '@' sign with at least one character
- // on each side, we ought to call it an email.
- inline bool email(std::string_view em) {
- size_t n = em.find_last_of('@');
- if (n == 0 || n >= em.size() - 1) {
- return false;
- }
- if (em[0] == '"' && em[n - 1] == '"') {
- // No validation
- } else if (em.substr(0, n).find("..") != std::string::npos || em[n - 1] == '.' || em[0] == '.') {
- return false;
- }
- em.remove_prefix(n + 1);
- if (em.front() == '[' && em.back() == ']') {
- em.remove_prefix(1);
- em.remove_suffix(1);
- if (em.starts_with("IPv6:")) {
- return ipv6(std::string(em.substr(5)));
- }
- return ipv4(std::string(em)); // Re-acquire NULL-term
- }
- return hostname(em);
- }
- }
- namespace jvalidate {
- class FormatValidator {
- public:
- using Predicate = bool (*)(std::string_view);
- enum class Status { Unknown, Unimplemented, Valid, Invalid };
- private:
- std::unordered_map<std::string, Predicate> supported_formats_{
- {"date", &format::date},
- {"date-time", &format::date_time},
- {"duration", &format::duration},
- {"email", &format::email},
- {"hostname", &format::hostname},
- {"idn-email", nullptr},
- {"idn-hostname", nullptr},
- {"ipv4", &format::ipv4},
- {"ipv6", &format::ipv6},
- {"iri", nullptr},
- {"iri-reference", nullptr},
- {"json-pointer", nullptr},
- {"relative-json-pointer", nullptr},
- /* {"regex", &detail::StdRegexEngine::is_valid}, */
- {"time", &format::time},
- {"uri", nullptr},
- {"uri-reference", nullptr},
- {"uri-template", nullptr},
- {"uuid", &format::uuid},
- };
- public:
- FormatValidator() = default;
- Status operator()(std::string const & format, std::string_view text) const {
- if (auto it = supported_formats_.find(format); it != supported_formats_.end() && it->second) {
- if (not it->second) {
- return Status::Unimplemented;
- }
- return it->second(text) ? Status::Valid : Status::Invalid;
- }
- return Status::Unknown;
- }
- };
- }
|