|
|
@@ -0,0 +1,307 @@
|
|
|
+#pragma once
|
|
|
+
|
|
|
+#include <cctype>
|
|
|
+#include <cstddef>
|
|
|
+#include <ctime>
|
|
|
+#include <iostream>
|
|
|
+#include <string>
|
|
|
+#include <unordered_map>
|
|
|
+#include <utility>
|
|
|
+
|
|
|
+#include <jvalidate/forward.h>
|
|
|
+
|
|
|
+namespace jvalidate::format::detail {
|
|
|
+inline bool is_leapyear(int y) { return (y % 400) == 0 || ((y % 4) == 0 && (y % 100) != 0); }
|
|
|
+
|
|
|
+inline bool illegal_date(int y, int m, int d) {
|
|
|
+ static constexpr int days[] = {31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31};
|
|
|
+ if (is_leapyear(y) && m == 1) {
|
|
|
+ --d;
|
|
|
+ }
|
|
|
+ return d > days[m];
|
|
|
+}
|
|
|
+
|
|
|
+inline auto date(std::string_view dt) {
|
|
|
+ struct tm tm;
|
|
|
+ if (auto end = strptime(dt.data(), "%Y-%m-%d", &tm); end) {
|
|
|
+ if ((end - dt.data()) != 10 || illegal_date(tm.tm_year + 1900, tm.tm_mon, tm.tm_mday)) {
|
|
|
+ return std::make_pair(0L, false);
|
|
|
+ }
|
|
|
+ return std::make_pair(end - dt.data(), true);
|
|
|
+ }
|
|
|
+ return std::make_pair(0L, false);
|
|
|
+}
|
|
|
+}
|
|
|
+
|
|
|
+namespace jvalidate::format {
|
|
|
+inline bool date(std::string_view dt) {
|
|
|
+ auto [size, good] = detail::date(dt);
|
|
|
+ return good && size == dt.size();
|
|
|
+}
|
|
|
+
|
|
|
+inline bool time(std::string_view dt) {
|
|
|
+ struct tm tm;
|
|
|
+ char const * end = strptime(dt.data(), "%T", &tm);
|
|
|
+ if (end == nullptr || end == dt.end() || (end - dt.data()) < 8) {
|
|
|
+ return false;
|
|
|
+ }
|
|
|
+ dt.remove_prefix(end - dt.begin());
|
|
|
+ if (dt[0] == '.') {
|
|
|
+ dt.remove_prefix(1);
|
|
|
+ if (dt.empty() || not std::isdigit(dt[0])) {
|
|
|
+ return false;
|
|
|
+ }
|
|
|
+ while (std::isdigit(dt[0])) {
|
|
|
+ dt.remove_prefix(1);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ if (dt[0] == 'Z' || dt[0] == 'z') {
|
|
|
+ return dt.size() == 1;
|
|
|
+ }
|
|
|
+ if (std::strchr("+-", dt[0])) {
|
|
|
+ return strptime(dt.data() + 1, "%R", &tm) == dt.end();
|
|
|
+ }
|
|
|
+ return false;
|
|
|
+}
|
|
|
+
|
|
|
+inline bool date_time(std::string_view dt) {
|
|
|
+ auto [size, good] = detail::date(dt);
|
|
|
+ if (not good || std::strchr("Tt", dt[size]) == nullptr) {
|
|
|
+ return false;
|
|
|
+ }
|
|
|
+ dt.remove_prefix(size + 1);
|
|
|
+ return time(dt);
|
|
|
+}
|
|
|
+
|
|
|
+inline bool uuid(std::string_view id) {
|
|
|
+ constexpr char const * g_hex_digits = "0123456789ABCDEFabcdef";
|
|
|
+ constexpr size_t g_uuid_len = 36;
|
|
|
+ constexpr size_t g_uuid_tokens = 5;
|
|
|
+ char tok0[9], tok1[5], tok2[5], tok3[5], tok4[13];
|
|
|
+
|
|
|
+ auto is_hex = [](std::string_view s) {
|
|
|
+ return s.find_first_not_of(g_hex_digits) == std::string::npos;
|
|
|
+ };
|
|
|
+ return id.size() == g_uuid_len &&
|
|
|
+ sscanf(id.data(), "%8s-%4s-%4s-%4s-%12s", tok0, tok1, tok2, tok3, tok4) == g_uuid_tokens &&
|
|
|
+ is_hex(tok0) && is_hex(tok1) && is_hex(tok2) && is_hex(tok3) && is_hex(tok4);
|
|
|
+}
|
|
|
+
|
|
|
+inline bool duration(std::string_view dur) {
|
|
|
+ auto eat = [&dur](std::string_view text) {
|
|
|
+ char type;
|
|
|
+ unsigned int rep;
|
|
|
+ if (sscanf(dur.data(), "%u%c", &rep, &type) != 2 || text.find(type) == std::string::npos) {
|
|
|
+ return std::string::npos;
|
|
|
+ }
|
|
|
+ dur.remove_prefix(dur.find(type) + 1);
|
|
|
+ return text.find(type);
|
|
|
+ };
|
|
|
+
|
|
|
+ if (dur[0] != 'P' || dur.size() == 1) {
|
|
|
+ return false;
|
|
|
+ }
|
|
|
+ dur.remove_prefix(1);
|
|
|
+
|
|
|
+ if (dur[0] != 'T') {
|
|
|
+ if (eat("W") != std::string::npos) {
|
|
|
+ return dur.empty();
|
|
|
+ }
|
|
|
+ std::string_view ymd{"YMD"};
|
|
|
+ while (not ymd.empty() && not dur.empty()) {
|
|
|
+ if (size_t n = eat(ymd); n != std::string::npos) {
|
|
|
+ ymd.remove_prefix(n + 1);
|
|
|
+ } else {
|
|
|
+ return false;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if (dur.empty()) {
|
|
|
+ return true;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ if (dur[0] != 'T' || dur.size() == 1) {
|
|
|
+ return false;
|
|
|
+ }
|
|
|
+ dur.remove_prefix(1);
|
|
|
+
|
|
|
+ std::string_view hms{"HMS"};
|
|
|
+ while (not hms.empty() && not dur.empty()) {
|
|
|
+ if (size_t n = eat(hms); n != std::string::npos) {
|
|
|
+ hms.remove_prefix(n + 1);
|
|
|
+ } else {
|
|
|
+ return false;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return dur.empty();
|
|
|
+}
|
|
|
+
|
|
|
+// Limitation - does not inspect graphemes, so it cannot check idn-hostname
|
|
|
+// to fix this - we'd need to
|
|
|
+inline bool hostname(std::string_view name) {
|
|
|
+ auto hostname_part = [&name](size_t end) {
|
|
|
+ if (end == 0 || end >= 64 || name[0] == '-' || name[end - 1] == '-') {
|
|
|
+ return false;
|
|
|
+ }
|
|
|
+ for (size_t i = 0; i < end; ++i) {
|
|
|
+ if (name[i] != '-' && not std::isalnum(name[i])) {
|
|
|
+ return false;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return true;
|
|
|
+ };
|
|
|
+
|
|
|
+ if (name.size() > (name.back() == '.' ? 254 : 253)) {
|
|
|
+ return false;
|
|
|
+ }
|
|
|
+ for (size_t n = name.find('.'); n != std::string::npos;
|
|
|
+ name.remove_prefix(n + 1), n = name.find('.')) {
|
|
|
+ if (not hostname_part(n)) {
|
|
|
+ return false;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return name.empty() || hostname_part(name.size());
|
|
|
+}
|
|
|
+
|
|
|
+inline bool ipv4(std::string_view ip) {
|
|
|
+ unsigned int ip0, ip1, ip2, ip3;
|
|
|
+ char eof;
|
|
|
+ if (ip.find_first_not_of("0123456789.") != std::string::npos) {
|
|
|
+ return false;
|
|
|
+ }
|
|
|
+ if (ip[0] == '0' && std::isdigit(ip[1])) {
|
|
|
+ return false;
|
|
|
+ }
|
|
|
+ if (size_t n = ip.find(".0"); n != std::string::npos && std::isdigit(ip[n + 2])) {
|
|
|
+ return false;
|
|
|
+ }
|
|
|
+ if (sscanf(ip.data(), "%3u.%3u.%3u.%3u%c", &ip0, &ip1, &ip2, &ip3, &eof) != 4) {
|
|
|
+ return false;
|
|
|
+ }
|
|
|
+ return ip0 <= 0xFF && ip1 <= 0xFF && ip2 <= 0xFF && ip3 <= 0xFF;
|
|
|
+}
|
|
|
+
|
|
|
+inline bool ipv6(std::string_view ip) {
|
|
|
+ int expected_spans = 8;
|
|
|
+
|
|
|
+ if (size_t n = ip.find('.'); n != std::string::npos) {
|
|
|
+ if (not ipv4(ip.substr(ip.find_last_of(':') + 1))) {
|
|
|
+ return false;
|
|
|
+ }
|
|
|
+ // This is a cheat to allow e.g. ::127.0.0.1 to validate
|
|
|
+ expected_spans = 7;
|
|
|
+ ip = ip.substr(0, n);
|
|
|
+ }
|
|
|
+
|
|
|
+ if (ip.find_first_not_of("0123456789ABCDEFabcdef:") != std::string::npos) {
|
|
|
+ return false;
|
|
|
+ }
|
|
|
+ if (ip.size() >= 40) {
|
|
|
+ return false;
|
|
|
+ }
|
|
|
+
|
|
|
+ bool has_compressed = false;
|
|
|
+ int groups = 0;
|
|
|
+
|
|
|
+ if (ip.starts_with("::")) {
|
|
|
+ has_compressed = true;
|
|
|
+ ip.remove_prefix(2);
|
|
|
+ }
|
|
|
+ while (!ip.empty()) {
|
|
|
+ int data;
|
|
|
+ if (sscanf(ip.data(), "%4x", &data) != 1) {
|
|
|
+ return false;
|
|
|
+ }
|
|
|
+ if (size_t n = ip.find(':'); std::min(n, ip.size()) > 4) {
|
|
|
+ return false;
|
|
|
+ } else if (n != std::string::npos) {
|
|
|
+ ip.remove_prefix(n + 1);
|
|
|
+ } else {
|
|
|
+ ip = "";
|
|
|
+ }
|
|
|
+ ++groups;
|
|
|
+ if (ip[0] == ':') {
|
|
|
+ if (std::exchange(has_compressed, true)) {
|
|
|
+ return false;
|
|
|
+ }
|
|
|
+ ip.remove_prefix(1);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ return groups == expected_spans || (has_compressed && groups < expected_spans);
|
|
|
+}
|
|
|
+
|
|
|
+// Let's be honest - no matter what RFC 5321 §4.1.2 or RFC 6531 say, the only
|
|
|
+// way to know if an email address is valid is to try and send a message to it.
|
|
|
+// Therefore, there's no point in trying to validate things according to a
|
|
|
+// complex grammar - as long as it has an '@' sign with at least one character
|
|
|
+// on each side, we ought to call it an email.
|
|
|
+inline bool email(std::string_view em) {
|
|
|
+ size_t n = em.find_last_of('@');
|
|
|
+ if (n == 0 || n >= em.size() - 1) {
|
|
|
+ return false;
|
|
|
+ }
|
|
|
+
|
|
|
+ if (em[0] == '"' && em[n - 1] == '"') {
|
|
|
+ // No validation
|
|
|
+ } else if (em.substr(0, n).find("..") != std::string::npos || em[n - 1] == '.' || em[0] == '.') {
|
|
|
+ return false;
|
|
|
+ }
|
|
|
+
|
|
|
+ em.remove_prefix(n + 1);
|
|
|
+ if (em.front() == '[' && em.back() == ']') {
|
|
|
+ em.remove_prefix(1);
|
|
|
+ em.remove_suffix(1);
|
|
|
+ if (em.starts_with("IPv6:")) {
|
|
|
+ return ipv6(std::string(em.substr(5)));
|
|
|
+ }
|
|
|
+ return ipv4(std::string(em)); // Re-acquire NULL-term
|
|
|
+ }
|
|
|
+ return hostname(em);
|
|
|
+}
|
|
|
+}
|
|
|
+
|
|
|
+namespace jvalidate {
|
|
|
+class FormatValidator {
|
|
|
+public:
|
|
|
+ using Predicate = bool (*)(std::string_view);
|
|
|
+ enum class Status { Unknown, Unimplemented, Valid, Invalid };
|
|
|
+
|
|
|
+private:
|
|
|
+ std::unordered_map<std::string, Predicate> supported_formats_{
|
|
|
+ {"date", &format::date},
|
|
|
+ {"date-time", &format::date_time},
|
|
|
+ {"duration", &format::duration},
|
|
|
+ {"email", &format::email},
|
|
|
+ {"hostname", &format::hostname},
|
|
|
+ {"idn-email", nullptr},
|
|
|
+ {"idn-hostname", nullptr},
|
|
|
+ {"ipv4", &format::ipv4},
|
|
|
+ {"ipv6", &format::ipv6},
|
|
|
+ {"iri", nullptr},
|
|
|
+ {"iri-reference", nullptr},
|
|
|
+ {"json-pointer", nullptr},
|
|
|
+ {"relative-json-pointer", nullptr},
|
|
|
+ /* {"regex", &detail::StdRegexEngine::is_valid}, */
|
|
|
+ {"time", &format::time},
|
|
|
+ {"uri", nullptr},
|
|
|
+ {"uri-reference", nullptr},
|
|
|
+ {"uri-template", nullptr},
|
|
|
+ {"uuid", &format::uuid},
|
|
|
+ };
|
|
|
+
|
|
|
+public:
|
|
|
+ FormatValidator() = default;
|
|
|
+
|
|
|
+ Status operator()(std::string const & format, std::string_view text) const {
|
|
|
+ if (auto it = supported_formats_.find(format); it != supported_formats_.end() && it->second) {
|
|
|
+ if (not it->second) {
|
|
|
+ return Status::Unimplemented;
|
|
|
+ }
|
|
|
+ return it->second(text) ? Status::Valid : Status::Invalid;
|
|
|
+ }
|
|
|
+ return Status::Unknown;
|
|
|
+ }
|
|
|
+};
|
|
|
+}
|