format.h 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681
  1. #pragma once
  2. #include <jvalidate/_macro.h>
  3. #include <cctype>
  4. #include <chrono>
  5. #include <cstddef>
  6. #include <cstring>
  7. #include <ctime>
  8. #include <string>
  9. #include <string_view>
  10. #include <unordered_map>
  11. #include <utility>
  12. #ifdef JVALIDATE_HAS_IDNA
  13. #include <ada/idna/to_unicode.h>
  14. #include <ada/idna/validity.h>
  15. #endif
  16. #include <jvalidate/detail/expect.h>
  17. #include <jvalidate/detail/idna_special_cases.h>
  18. #include <jvalidate/detail/pointer.h>
  19. #include <jvalidate/detail/relative_pointer.h>
  20. #include <jvalidate/detail/string.h>
  21. #include <jvalidate/forward.h>
  22. #define CONSTRUCTS(TYPE) format::ctor_as_valid<detail::TYPE>
  23. #define UTF32(FN) JVALIDATE_IIF(JVALIDATE_HAS_IDNA, format::utf32<format::FN<char32_t>>, nullptr)
  24. namespace jvalidate::format {
  25. bool date(std::string_view dt);
  26. bool time(std::string_view dt);
  27. bool date_time(std::string_view dt);
  28. bool duration(std::string_view dur);
  29. template <typename CharT = char> bool uri(std::basic_string_view<CharT> uri);
  30. template <typename CharT = char> bool uri_reference(std::basic_string_view<CharT> uri);
  31. bool uri_template(std::u32string_view uri);
  32. bool uuid(std::string_view id);
  33. template <typename CharT = char> bool hostname(std::basic_string_view<CharT> name);
  34. bool ipv4(std::string_view ip);
  35. bool ipv6(std::string_view ip);
  36. template <typename CharT = char> bool email(std::basic_string_view<CharT> em);
  37. }
  38. namespace jvalidate::format::detail {
  39. struct result {
  40. ptrdiff_t consumed;
  41. bool valid;
  42. };
  43. inline bool is_leapyear(int y) { return (y % 400) == 0 || ((y % 4) == 0 && (y % 100) != 0); }
  44. inline bool illegal_date(int y, int m, int d) {
  45. static constexpr int days[] = {31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31};
  46. if (is_leapyear(y) && m == 1) {
  47. --d;
  48. }
  49. return d > days[m];
  50. }
  51. inline result date(std::string_view dt) {
  52. struct tm tm;
  53. if (auto end = strptime(dt.data(), "%Y-%m-%d", &tm); end) {
  54. if ((end - dt.data()) != 10 || illegal_date(tm.tm_year + 1900, tm.tm_mon, tm.tm_mday)) {
  55. return {.consumed = 0, .valid = false};
  56. }
  57. return {.consumed = end - dt.data(), .valid = true};
  58. }
  59. return {.consumed = 0L, .valid = false};
  60. }
  61. inline bool is_leapsecond(std::tm tm) {
  62. if (tm.tm_sec != 60) {
  63. return true;
  64. }
  65. #if __cpp_lib_chrono >= 201907L
  66. tm.tm_isdst = -1;
  67. std::chrono::seconds time(std::mktime(&tm));
  68. auto const & leap_seconds = std::chrono::get_tzdb().leap_seconds;
  69. return std::ranges::find(leap_seconds, time) != leap_seconds.end();
  70. #else
  71. return false;
  72. #endif
  73. }
  74. // https://www.rfc-editor.org/rfc/rfc6570.html#section-1.5
  75. inline bool is_uschar(int c) {
  76. using P = std::pair<int, int>;
  77. constexpr std::array data{
  78. P{0xA0, 0xD7FF}, P{0xF900, 0xFDCF}, P{0xFDF0, 0xFFEF}, P{0x10000, 0x1FFFD},
  79. P{0x20000, 0x2FFFD}, P{0x30000, 0x3FFFD}, P{0x40000, 0x4FFFD}, P{0x50000, 0x5FFFD},
  80. P{0x60000, 0x6FFFD}, P{0x70000, 0x7FFFD}, P{0x80000, 0x8FFFD}, P{0x90000, 0x9FFFD},
  81. P{0xA0000, 0xAFFFD}, P{0xB0000, 0xBFFFD}, P{0xC0000, 0xCFFFD}, P{0xD0000, 0xDFFFD},
  82. P{0xE0000, 0xEFFFD},
  83. };
  84. return std::ranges::any_of(data,
  85. [c](auto & pair) { return c >= pair.first && c <= pair.second; });
  86. }
  87. template <typename CharT>
  88. inline bool is_pchar(std::basic_string_view<CharT> part, size_t & pos,
  89. std::string_view extra_valid_chars = ":@") {
  90. constexpr char const * g_hex_digits = "0123456789ABCDEFabcdef";
  91. if (std::isalnum(part[pos]) || is_uschar(part[pos]) ||
  92. std::strchr("-._~!$&'()*+,;=", part[pos])) {
  93. return true;
  94. }
  95. if (part[pos] == '%') {
  96. return pos + 2 < part.size() && std::strchr(g_hex_digits, part[++pos]) &&
  97. std::strchr(g_hex_digits, part[++pos]);
  98. }
  99. return extra_valid_chars.find(part[pos]) != part.npos;
  100. };
  101. inline bool is_uri_template_literal(std::u32string_view part, size_t & pos) {
  102. constexpr char const * g_hex_digits = "0123456789ABCDEFabcdef";
  103. if (part[pos] == '%') {
  104. return pos + 2 < part.size() && std::strchr(g_hex_digits, part[++pos]) &&
  105. std::strchr(g_hex_digits, part[++pos]);
  106. }
  107. return !std::strchr(R"( "'%<>\^`{|}`)", part[pos]) && part[pos] > 0x1F && part[pos] != 0x7F;
  108. }
  109. inline bool is_uri_template_varchar(std::u32string_view part, size_t & pos) {
  110. constexpr char const * g_hex_digits = "0123456789ABCDEFabcdef";
  111. if (part[pos] == '%') {
  112. return pos + 2 < part.size() && std::strchr(g_hex_digits, part[++pos]) &&
  113. std::strchr(g_hex_digits, part[++pos]);
  114. }
  115. return std::isalnum(part[pos]) || part[pos] == '_';
  116. }
  117. inline bool is_uri_template_expression(std::u32string_view part) {
  118. if (part.empty()) {
  119. return false;
  120. }
  121. if (std::strchr("+#./;?&=,!@|", part[0])) {
  122. part.remove_prefix(1);
  123. }
  124. for (size_t pos = part.find(','); !part.empty();
  125. part.remove_prefix(std::min(part.size(), pos)), pos = part.find(',')) {
  126. std::u32string_view varspec = part.substr(0, pos);
  127. std::u32string_view expand;
  128. if (size_t const mod = varspec.find_first_of(U":*"); mod != varspec.npos) {
  129. expand = varspec.substr(mod + 1);
  130. varspec.remove_suffix(expand.size() + 1);
  131. }
  132. if (expand.empty() || expand == U"*") {
  133. // No Modifier, or Explode
  134. } else if (expand.size() > 4 || expand[0] == '0' ||
  135. not std::ranges::all_of(expand, [](char c) { return std::isdigit(c); })) {
  136. return false;
  137. }
  138. for (size_t i = 0; i < varspec.size(); ++i) {
  139. RETURN_UNLESS(is_uri_template_varchar(varspec, i) || (i > 0 && varspec[i] == '.'), false);
  140. }
  141. }
  142. return true;
  143. }
  144. template <typename CharT> bool is_uri_authority(std::basic_string_view<CharT> uri) {
  145. // A URI Authority section MAY contain user info, which is every character up
  146. // to the first "@" character, as long as that character is not part of the path
  147. if (size_t pos = uri.find('@'); pos != uri.npos) {
  148. for (size_t i = 0; i < pos; ++i) {
  149. if (not is_pchar(uri, i, ":")) {
  150. return false;
  151. }
  152. }
  153. uri.remove_prefix(pos + 1);
  154. }
  155. // A URI Authority HOST section
  156. // If the URI starts with '[', then it MUST BE an IPv6 or an "IPvFuture"
  157. if (uri[0] == '[') {
  158. size_t pos = uri.find(']');
  159. auto ip = uri.substr(1, pos - 1);
  160. uri.remove_prefix(pos + 1);
  161. if (not ipv6(to_u8(ip))) {
  162. return false;
  163. }
  164. }
  165. // A URI Authority PORT section. Technically allows any number of digits
  166. if (size_t pos = uri.find(':'); pos != uri.npos) {
  167. if (not std::ranges::all_of(uri.substr(pos + 1), [](auto c) { return std::isdigit(c); })) {
  168. return false;
  169. }
  170. uri.remove_suffix(uri.size() - pos + 1);
  171. }
  172. // Normal URI Authority HOST section is either an IPv4 or a HOSTNAME
  173. return ipv4(to_u8(uri)) || hostname(uri);
  174. }
  175. // Tests if a URI "Query Part" or "Fragment Part" is valid and remove the part
  176. template <typename CharT> bool test_uri_part(std::basic_string_view<CharT> & uri, char delim) {
  177. size_t const pos = uri.find(delim);
  178. if (pos == uri.npos) {
  179. return true;
  180. }
  181. auto part = uri.substr(pos + 1);
  182. uri = uri.substr(0, pos);
  183. for (size_t pos = 0; pos < part.size(); ++pos) {
  184. RETURN_UNLESS(detail::is_pchar(part, pos, ":@/?"), false);
  185. }
  186. return true;
  187. };
  188. }
  189. namespace jvalidate::format {
  190. inline bool date(std::string_view dt) {
  191. auto [consumed, valid] = detail::date(dt);
  192. return valid && consumed == dt.size();
  193. }
  194. inline bool time(std::string_view dt) {
  195. std::tm tm;
  196. char const * end = strptime(dt.data(), "%T", &tm);
  197. if (end == nullptr || end == dt.end() || (end - dt.data()) < 8) {
  198. return false;
  199. }
  200. dt.remove_prefix(end - dt.begin());
  201. if (dt[0] == '.') {
  202. dt.remove_prefix(1);
  203. if (dt.empty() || not std::isdigit(dt[0])) {
  204. return false;
  205. }
  206. while (std::isdigit(dt[0])) {
  207. dt.remove_prefix(1);
  208. }
  209. }
  210. if (dt[0] == 'Z' || dt[0] == 'z') {
  211. return dt.size() == 1 && detail::is_leapsecond(tm);
  212. }
  213. if (std::strchr("+-", dt[0])) {
  214. return strptime(dt.data() + 1, "%R", &tm) == dt.end() && detail::is_leapsecond(tm);
  215. }
  216. return false;
  217. }
  218. inline bool date_time(std::string_view dt) {
  219. auto [size, good] = detail::date(dt);
  220. if (not good || std::strchr("Tt", dt[size]) == nullptr) {
  221. return false;
  222. }
  223. dt.remove_prefix(size + 1);
  224. return time(dt);
  225. }
  226. template <typename CharT> inline bool uri(std::basic_string_view<CharT> uri) {
  227. using delim = detail::char_delimiters<CharT>;
  228. // https://www.rfc-editor.org/rfc/rfc3986.html#appendix-A
  229. if (size_t const pos = uri.find(':'); pos != uri.npos) {
  230. RETURN_UNLESS(std::isalpha(uri[0]), false);
  231. for (size_t i = 1; i < pos; ++i) {
  232. RETURN_UNLESS(std::isalnum(uri[i]) || std::strchr("+-.", uri[i]), false);
  233. }
  234. uri.remove_prefix(pos + 1);
  235. } else {
  236. return false;
  237. }
  238. RETURN_UNLESS(detail::test_uri_part(uri, '#'), false);
  239. RETURN_UNLESS(detail::test_uri_part(uri, '?'), false);
  240. auto path = uri;
  241. if (uri.starts_with(delim::double_slash)) {
  242. uri.remove_prefix(2);
  243. path = uri.substr(std::min(uri.size(), uri.find('/')));
  244. uri.remove_suffix(path.size());
  245. RETURN_UNLESS(detail::is_uri_authority(uri), false);
  246. }
  247. for (size_t i = 0; i < path.size(); ++i) {
  248. RETURN_UNLESS(detail::is_pchar(path, i, "/:@"), false);
  249. }
  250. return true;
  251. }
  252. template <typename CharT> inline bool uri_reference(std::basic_string_view<CharT> uri) {
  253. using delim = detail::char_delimiters<CharT>;
  254. if (jvalidate::format::uri(uri)) {
  255. return true;
  256. }
  257. RETURN_UNLESS(detail::test_uri_part(uri, '#'), false);
  258. RETURN_UNLESS(detail::test_uri_part(uri, '?'), false);
  259. auto path = uri;
  260. if (uri.starts_with(delim::double_slash)) {
  261. uri.remove_prefix(2);
  262. path = uri.substr(std::min(uri.size(), uri.find('/')));
  263. uri.remove_suffix(path.size());
  264. RETURN_UNLESS(detail::is_uri_authority(uri), false);
  265. }
  266. if (size_t const pos = path.find('/'); pos != path.npos) {
  267. for (size_t i = 0; i < pos; ++i) {
  268. RETURN_UNLESS(detail::is_pchar(path, i, "@"), false);
  269. }
  270. path.remove_prefix(pos);
  271. }
  272. for (size_t i = 0; i < path.size(); ++i) {
  273. RETURN_UNLESS(detail::is_pchar(path, i, "/:@"), false);
  274. }
  275. return true;
  276. }
  277. inline bool uri_template(std::u32string_view uri) {
  278. for (size_t i = 0; i < uri.size(); ++i) {
  279. if (uri[i] != '{') {
  280. RETURN_UNLESS(detail::is_uri_template_literal(uri, i), false);
  281. continue;
  282. }
  283. std::u32string_view expr = uri.substr(i + 1);
  284. size_t const pos = expr.find('}');
  285. RETURN_UNLESS(pos != uri.npos, false);
  286. RETURN_UNLESS(detail::is_uri_template_expression(expr.substr(0, pos)), false);
  287. i += pos + 1;
  288. }
  289. return true;
  290. }
  291. inline bool uuid(std::string_view id) {
  292. constexpr char const * g_hex_digits = "0123456789ABCDEFabcdef";
  293. constexpr size_t g_uuid_len = 36;
  294. constexpr size_t g_uuid_tokens = 5;
  295. char tok0[9], tok1[5], tok2[5], tok3[5], tok4[13];
  296. auto is_hex = [](std::string_view s) {
  297. return s.find_first_not_of(g_hex_digits) == std::string::npos;
  298. };
  299. return id.size() == g_uuid_len &&
  300. sscanf(id.data(), "%8s-%4s-%4s-%4s-%12s", tok0, tok1, tok2, tok3, tok4) == g_uuid_tokens &&
  301. is_hex(tok0) && is_hex(tok1) && is_hex(tok2) && is_hex(tok3) && is_hex(tok4);
  302. }
  303. inline bool duration(std::string_view dur) {
  304. auto eat = [&dur](std::string_view text) {
  305. char type;
  306. unsigned int rep;
  307. if (sscanf(dur.data(), "%u%c", &rep, &type) != 2 || text.find(type) == std::string::npos) {
  308. return std::string::npos;
  309. }
  310. dur.remove_prefix(dur.find(type) + 1);
  311. return text.find(type);
  312. };
  313. // All DURATION entities must start with the prefix 'P', and cannot be empty
  314. // past that point.
  315. if (dur[0] != 'P' || dur.size() == 1) {
  316. return false;
  317. }
  318. dur.remove_prefix(1);
  319. // Special Case: a duration measured in weeks is incompatible with other
  320. // duration tokens.
  321. if (eat("W") != std::string::npos) {
  322. return dur.empty();
  323. }
  324. // DURATION takes the following form, because we use the same token for both
  325. // Months and Minutes.
  326. // "P[#Y][#M][#D][T[#H][#M][#S]]".
  327. // At least one of the optional fields must be present.
  328. if (dur[0] != 'T') {
  329. std::string_view ymd{"YMD"};
  330. // Read YMD duration offsets in that order, allowing us to skip past them.
  331. while (not ymd.empty() && not dur.empty()) {
  332. if (size_t n = eat(ymd); n != std::string::npos) {
  333. ymd.remove_prefix(n + 1);
  334. } else {
  335. return false;
  336. }
  337. }
  338. if (dur.empty()) {
  339. return true;
  340. }
  341. }
  342. // If we have a 'T' prefix for Hour/Minute/Second offsets, we must have at
  343. // least one of them present.
  344. if (dur[0] != 'T' || dur.size() == 1) {
  345. return false;
  346. }
  347. dur.remove_prefix(1);
  348. std::string_view hms{"HMS"};
  349. // Read HMS duration offsets in that order, allowing us to skip past them.
  350. while (not hms.empty() && not dur.empty()) {
  351. if (size_t n = eat(hms); n != std::string::npos) {
  352. hms.remove_prefix(n + 1);
  353. } else {
  354. return false;
  355. }
  356. }
  357. return dur.empty();
  358. }
  359. template <typename CharT>
  360. bool is_invalid_size_or_boundary_hostname(std::basic_string_view<CharT> name) {
  361. using delim = detail::char_delimiters<CharT>;
  362. return (name.empty() || detail::length_u8(name) >= 64 ||
  363. (name.size() >= 4 && name.substr(2).starts_with(delim::illegal_dashes_ulabel)) ||
  364. name[0] == '-' || name.back() == '-');
  365. }
  366. #if !JVALIDATE_HAS_IDNA
  367. inline bool hostname_part(std::string_view name) {
  368. using delim = detail::char_delimiters<char>;
  369. if (is_invalid_size_or_boundary_hostname(name)) {
  370. return false;
  371. }
  372. return std::ranges::none_of(name, [](char c) { return c != '-' && not std::isalnum(c); });
  373. }
  374. #else
  375. template <typename CharT> inline bool hostname_part(std::basic_string_view<CharT> name) {
  376. using delim = detail::char_delimiters<CharT>;
  377. // Punycode is a way to restructure UTF-8 strings to be ASCII compatibly
  378. // All Punycode string start with "xn--" (and would therefore fail below).
  379. if (name.starts_with(delim::punycode_prefix)) {
  380. std::u32string decoded = detail::to_u32(ada::idna::to_unicode(detail::to_u8(name)));
  381. return (decoded != detail::to_u32(name)) && hostname_part<char32_t>(decoded);
  382. }
  383. // An INVALID hostname part is one of the following:
  384. // - empty
  385. // - more than 63 UTF-8 characters long
  386. // - starts or ends with a '-'
  387. // - matches the regular expression /^..--.*$/
  388. if (is_invalid_size_or_boundary_hostname(name)) {
  389. return false;
  390. }
  391. // This is a much easier check in hostname than idn-hostname, since we can
  392. // just check for alphanumeric and '-'.
  393. if constexpr (std::is_same_v<char, CharT>) {
  394. return std::ranges::none_of(name, [](char c) { return c != '-' && not std::isalnum(c); });
  395. } else {
  396. return ada::idna::is_label_valid(name);
  397. }
  398. }
  399. #endif
  400. template <typename CharT> inline bool hostname(std::basic_string_view<CharT> name) {
  401. using delim = detail::char_delimiters<CharT>;
  402. if (name.find_first_of(delim::illegal_hostname_chars) != name.npos) {
  403. return false;
  404. }
  405. // In general, the maximum length of a hostname is 253 UTF-8 characters.
  406. if (detail::to_u8(name).size() > (name.back() == '.' ? 254 : 253)) {
  407. return false;
  408. }
  409. // Unfortunately, the ada-idna library does not validate things like
  410. // "is there a HEBREW character after the HEBREW COMMA".
  411. if (not std::ranges::all_of(delim::special_cases,
  412. [name](auto & sc) { return sc.accepts(name); })) {
  413. return false;
  414. }
  415. // We validate each sub-section of the hostname in parts, delimited by '.'
  416. for (size_t n = name.find('.'); n != std::string::npos;
  417. name.remove_prefix(n + 1), n = name.find('.')) {
  418. if (not hostname_part(name.substr(0, n))) {
  419. return false;
  420. }
  421. }
  422. // name.empty() would be true only if the final character in the input name
  423. // was '.', this is the only empty hostname part that we allow. Otherwise, we
  424. // have a trailing hostname_part.
  425. return name.empty() || hostname_part(name);
  426. }
  427. inline bool ipv4(std::string_view ip) {
  428. unsigned int ip0, ip1, ip2, ip3;
  429. char eof;
  430. // IPv4 address MAY only contain DIGITS and '.'
  431. if (ip.find_first_not_of("0123456789.") != ip.npos) {
  432. return false;
  433. }
  434. // Each OCTET of an IPv4 can only start with '0' if it is EXACTLY '0'
  435. if (ip[0] == '0' && std::isdigit(ip[1])) {
  436. return false;
  437. }
  438. if (size_t n = ip.find(".0"); n != ip.npos && std::isdigit(ip[n + 2])) {
  439. return false;
  440. }
  441. // sscanf returns the number of tokens parsed successfully.
  442. // Therefore, we can add a trailing character output to the format-string
  443. // and check that we failed to parse any token into the eof-character token.
  444. if (sscanf(std::string(ip).c_str(), "%3u.%3u.%3u.%3u%c", &ip0, &ip1, &ip2, &ip3, &eof) != 4) {
  445. return false;
  446. }
  447. // Affirm that each OCTET is only two bytes wide.
  448. return ip0 <= 0xFF && ip1 <= 0xFF && ip2 <= 0xFF && ip3 <= 0xFF;
  449. }
  450. inline bool ipv6(std::string_view ip) {
  451. int expected_spans = 8;
  452. // There is a special rule with IPv6 to allow an IPv4 address as a suffix
  453. if (size_t n = ip.find('.'); n != std::string::npos) {
  454. if (not ipv4(ip.substr(ip.find_last_of(':') + 1))) {
  455. return false;
  456. }
  457. // since ipv4 addresses contain 8 bytes of information, and each segment of
  458. // an ipv6 address contains 4 bytes - we should reduce the number of
  459. // expected spans to 6. Instead - we reduce it to 7 because we don't prune
  460. // the first OCTET of the IPv4 section (as it can read as a valid segment).
  461. expected_spans = 7;
  462. ip = ip.substr(0, n);
  463. }
  464. // IPv6 address MAY only contain HEXDIGITs and ':'
  465. if (ip.find_first_not_of("0123456789ABCDEFabcdef:") != std::string::npos) {
  466. return false;
  467. }
  468. // IPv6 addresses can have a maximum of 39 characters (8 4-char HEXDIGIT
  469. // segments with 7 dividing ':'s).
  470. if (ip.size() >= 40) {
  471. return false;
  472. }
  473. bool has_compressed = false;
  474. int groups = 0;
  475. if (ip.starts_with("::")) {
  476. has_compressed = true;
  477. ip.remove_prefix(2);
  478. }
  479. while (!ip.empty() && ++groups) {
  480. int data;
  481. if (sscanf(ip.data(), "%4x", &data) != 1) {
  482. // Not a 4-byte HEXDIGIT. Not sure that it's ever possible due to the
  483. // char filter above.
  484. return false;
  485. }
  486. if (size_t const n = ip.find(':'); std::min(n, ip.size()) > 4) {
  487. return false; // Segment too wide
  488. } else if (n != std::string::npos) {
  489. ip.remove_prefix(n + 1);
  490. } else {
  491. break; // End of String
  492. }
  493. // We removed the regular ':', so this is a check for a compression mark
  494. if (ip[0] != ':') {
  495. continue;
  496. }
  497. if (std::exchange(has_compressed, true)) {
  498. // The above trick allows us to ensure that there is no more than one
  499. // set of "::" compression tokens in this IPv6 adfress.
  500. return false;
  501. }
  502. ip.remove_prefix(1);
  503. }
  504. return groups == expected_spans || (has_compressed && groups < expected_spans);
  505. }
  506. // Let's be honest - no matter what RFC 5321 §4.1.2 or RFC 6531 say, the only
  507. // way to know if an email address is valid is to try and send a message to it.
  508. // Therefore, there's no point in trying to validate things according to a
  509. // complex grammar - as long as it has an '@' sign with at least one character
  510. // on each side, we ought to call it an email.
  511. template <typename CharT> inline bool email(std::basic_string_view<CharT> em) {
  512. using delim = detail::char_delimiters<CharT>;
  513. size_t const n = em.find_last_of('@');
  514. if (n == 0 || n >= em.size() - 1) {
  515. return false;
  516. }
  517. auto const who = em.substr(0, n);
  518. if (who.starts_with('"') && who.ends_with('"')) {
  519. // No validation
  520. } else if (who.starts_with('.') || who.ends_with('.')) {
  521. return false;
  522. } else if (em.substr(0, n).find(delim::dotdot) != em.npos) {
  523. return false;
  524. }
  525. // The DOMAIN section of an email address MAY be either a HOSTNAME, or an
  526. // IP Address surrounded in brackets.
  527. auto domain = em.substr(n + 1);
  528. if (not(domain.starts_with('[') && domain.ends_with(']'))) {
  529. return hostname(domain);
  530. }
  531. domain.remove_prefix(1);
  532. domain.remove_suffix(1);
  533. // When the DOMAIN is an IPv6, it must start with "IPv6:" for some
  534. // weird compatibility reason.
  535. if (auto ip = detail::to_u8(domain); ip.starts_with("IPv6:")) {
  536. return ipv6(ip.substr(5));
  537. } else {
  538. return ipv4(ip);
  539. }
  540. }
  541. template <typename T> inline bool ctor_as_valid(std::string_view str) {
  542. try {
  543. [[maybe_unused]] auto _ = T(str);
  544. return true;
  545. } catch (std::exception const &) { return false; }
  546. }
  547. template <auto Predicate> bool utf32(std::string_view str) {
  548. return Predicate(detail::to_u32(str));
  549. }
  550. }
  551. namespace jvalidate {
  552. class FormatValidator {
  553. public:
  554. using Predicate = bool (*)(std::string_view);
  555. enum class Status { Unknown, Unimplemented, Valid, Invalid };
  556. private:
  557. std::unordered_map<std::string, Predicate> supported_formats_{
  558. {"date", &format::date},
  559. {"date-time", &format::date_time},
  560. {"duration", &format::duration},
  561. {"email", &format::email},
  562. {"hostname", &format::hostname},
  563. {"idn-email", UTF32(email)},
  564. {"idn-hostname", UTF32(hostname)},
  565. {"ipv4", &format::ipv4},
  566. {"ipv6", &format::ipv6},
  567. {"iri", UTF32(uri)},
  568. {"iri-reference", UTF32(uri_reference)},
  569. {"json-pointer", CONSTRUCTS(Pointer)},
  570. {"relative-json-pointer", CONSTRUCTS(RelativePointer)},
  571. {"regex", nullptr},
  572. {"time", &format::time},
  573. {"uri", &format::uri},
  574. {"uri-reference", &format::uri_reference},
  575. {"uri-template", &format::utf32<format::uri_template>},
  576. {"uuid", &format::uuid},
  577. };
  578. public:
  579. FormatValidator() = default;
  580. FormatValidator(Predicate is_regex) { supported_formats_.insert_or_assign("regex", is_regex); }
  581. Status operator()(std::string const & format, std::string_view text) const {
  582. if (auto it = supported_formats_.find(format); it != supported_formats_.end() && it->second) {
  583. if (not it->second) {
  584. return Status::Unimplemented;
  585. }
  586. return it->second(text) ? Status::Valid : Status::Invalid;
  587. }
  588. return Status::Unknown;
  589. }
  590. };
  591. }
  592. #undef CONSTRUCTS
  593. #undef UTF32