validation_visitor.h 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639
  1. #pragma once
  2. #include <tuple>
  3. #include <unordered_map>
  4. #include <jvalidate/constraint/array_constraint.h>
  5. #include <jvalidate/constraint/general_constraint.h>
  6. #include <jvalidate/constraint/number_constraint.h>
  7. #include <jvalidate/constraint/object_constraint.h>
  8. #include <jvalidate/constraint/string_constraint.h>
  9. #include <jvalidate/constraint/visitor.h>
  10. #include <jvalidate/detail/enumerate.h>
  11. #include <jvalidate/detail/expect.h>
  12. #include <jvalidate/detail/iostream.h>
  13. #include <jvalidate/detail/number.h>
  14. #include <jvalidate/detail/pointer.h>
  15. #include <jvalidate/forward.h>
  16. #include <jvalidate/schema.h>
  17. #include <jvalidate/status.h>
  18. #include <jvalidate/validation_config.h>
  19. #include <jvalidate/validation_result.h>
  20. #define VISITED(type) std::get<std::unordered_set<type>>(*visited_)
  21. #define NOOP_UNLESS_TYPE(etype) \
  22. RETURN_UNLESS(adapter::Type::etype == document_.type(), Status::Noop)
  23. #define BREAK_EARLY_IF_NO_RESULT_TREE() \
  24. do { \
  25. if (rval == Status::Reject and not result_ and not visited_) { \
  26. break; \
  27. } \
  28. } while (false)
  29. namespace jvalidate {
  30. template <Adapter A, RegexEngine RE>
  31. class ValidationVisitor : public constraint::ConstraintVisitor {
  32. private:
  33. using VisitedAnnotation = std::tuple<std::unordered_set<size_t>, std::unordered_set<std::string>>;
  34. private:
  35. A document_;
  36. detail::Pointer where_;
  37. detail::Pointer schema_path_;
  38. schema::Node const * schema_;
  39. ValidationResult * result_;
  40. ValidationConfig const & cfg_;
  41. std::unordered_map<std::string, RE> & regex_cache_;
  42. mutable VisitedAnnotation * visited_ = nullptr;
  43. public:
  44. ValidationVisitor(A const & json, schema::Node const & schema, ValidationConfig const & cfg,
  45. std::unordered_map<std::string, RE> & regex_cache, ValidationResult * result)
  46. : document_(json), schema_(&schema), result_(result), cfg_(cfg), regex_cache_(regex_cache) {}
  47. Status visit(constraint::TypeConstraint const & cons) const {
  48. adapter::Type const type = document_.type();
  49. for (adapter::Type const accept : cons.types) {
  50. if (type == accept) {
  51. return Status::Accept;
  52. }
  53. if (accept == adapter::Type::Number && type == adapter::Type::Integer) {
  54. return Status::Accept;
  55. }
  56. if (accept == adapter::Type::Integer && type == adapter::Type::Number &&
  57. detail::is_json_integer(document_.as_number())) {
  58. return Status::Accept;
  59. }
  60. }
  61. add_error("type ", type, " is not one of {", cons.types, "}");
  62. return Status::Reject;
  63. }
  64. Status visit(constraint::ExtensionConstraint const & cons) const {
  65. return cons.validate(document_, where_, result_);
  66. }
  67. Status visit(constraint::EnumConstraint const & cons) const {
  68. auto is_equal = [this](auto const & frozen) {
  69. return document_.equals(frozen, cfg_.strict_equality);
  70. };
  71. for (auto const & option : cons.enumeration) {
  72. if (option->apply(is_equal)) {
  73. return Status::Accept;
  74. }
  75. }
  76. add_error("equals none of the values");
  77. return Status::Reject;
  78. }
  79. Status visit(constraint::AllOfConstraint const & cons) const {
  80. Status rval = Status::Accept;
  81. std::set<size_t> unmatched;
  82. for (auto const & [index, subschema] : detail::enumerate(cons.children)) {
  83. if (auto stat = validate_subschema(subschema, index); stat == Status::Reject) {
  84. rval = Status::Reject;
  85. unmatched.insert(index);
  86. }
  87. BREAK_EARLY_IF_NO_RESULT_TREE();
  88. }
  89. if (rval == Status::Reject) {
  90. add_error("does not validate subschemas ", unmatched);
  91. }
  92. return rval;
  93. }
  94. Status visit(constraint::AnyOfConstraint const & cons) const {
  95. Status rval = Status::Reject;
  96. for (auto const & [index, subschema] : detail::enumerate(cons.children)) {
  97. if (validate_subschema(subschema, index)) {
  98. rval = Status::Accept;
  99. }
  100. if (not visited_ && rval == Status::Accept) {
  101. break;
  102. }
  103. }
  104. if (rval == Status::Reject) {
  105. add_error("validates none of the subschemas");
  106. }
  107. return rval;
  108. }
  109. Status visit(constraint::OneOfConstraint const & cons) const {
  110. std::set<size_t> matches;
  111. for (auto const & [index, subschema] : detail::enumerate(cons.children)) {
  112. if (validate_subschema(subschema, index)) {
  113. matches.insert(index);
  114. }
  115. }
  116. if (matches.size() == 1) {
  117. return Status::Accept;
  118. }
  119. add_error("validates subschemas ", matches);
  120. return Status::Reject;
  121. }
  122. Status visit(constraint::NotConstraint const & cons) const {
  123. VisitedAnnotation * suppress = nullptr;
  124. std::swap(suppress, visited_);
  125. bool const rval = validate_subschema(cons.child) == Status::Reject;
  126. std::swap(suppress, visited_);
  127. if (not rval) {
  128. add_error("actually validates subschema");
  129. }
  130. return rval;
  131. }
  132. Status visit(constraint::ConditionalConstraint const & cons) const {
  133. VisitedAnnotation * suppress = nullptr;
  134. std::swap(suppress, visited_);
  135. bool const if_result(validate_subschema(cons.if_constraint));
  136. std::swap(suppress, visited_);
  137. if (if_result) {
  138. return validate_subschema(cons.then_constraint, detail::parent, "then");
  139. }
  140. return validate_subschema(cons.else_constraint, detail::parent, "else");
  141. }
  142. Status visit(constraint::MaximumConstraint const & cons) const {
  143. switch (document_.type()) {
  144. case adapter::Type::Integer:
  145. if (int64_t value = document_.as_integer(); not cons(value)) {
  146. add_error("integer ", value, " exceeds ", cons.exclusive ? "exclusive " : "", "maximum of ",
  147. cons.value);
  148. return false;
  149. }
  150. return true;
  151. case adapter::Type::Number:
  152. if (double value = document_.as_number(); not cons(value)) {
  153. add_error("number ", value, " exceeds ", cons.exclusive ? "exclusive " : "", "maximum of ",
  154. cons.value);
  155. return false;
  156. }
  157. return true;
  158. default:
  159. return Status::Noop;
  160. }
  161. }
  162. Status visit(constraint::MinimumConstraint const & cons) const {
  163. switch (document_.type()) {
  164. case adapter::Type::Integer:
  165. if (int64_t value = document_.as_integer(); not cons(value)) {
  166. add_error("integer ", value, " fails ", cons.exclusive ? "exclusive " : "", "minimum of ",
  167. cons.value);
  168. return false;
  169. }
  170. return true;
  171. case adapter::Type::Number:
  172. if (double value = document_.as_number(); not cons(value)) {
  173. add_error("number ", value, " fails ", cons.exclusive ? "exclusive " : "", "minimum of ",
  174. cons.value);
  175. return false;
  176. }
  177. return true;
  178. default:
  179. return Status::Noop;
  180. }
  181. }
  182. Status visit(constraint::MultipleOfConstraint const & cons) const {
  183. adapter::Type const type = document_.type();
  184. RETURN_UNLESS(type == adapter::Type::Number || type == adapter::Type::Integer, Status::Noop);
  185. if (double value = document_.as_number(); not cons(value)) {
  186. add_error("number ", value, " is not a multiple of ", cons.value);
  187. return false;
  188. }
  189. return true;
  190. }
  191. Status visit(constraint::MaxLengthConstraint const & cons) const {
  192. NOOP_UNLESS_TYPE(String);
  193. if (auto str = document_.as_string(); detail::length(str) > cons.value) {
  194. add_error("string '", str, "' is greater than the maximum length of ", cons.value);
  195. return false;
  196. }
  197. return true;
  198. }
  199. Status visit(constraint::MinLengthConstraint const & cons) const {
  200. NOOP_UNLESS_TYPE(String);
  201. if (auto str = document_.as_string(); detail::length(str) < cons.value) {
  202. add_error("string '", str, "' is less than the minimum length of ", cons.value);
  203. return false;
  204. }
  205. return true;
  206. }
  207. Status visit(constraint::PatternConstraint const & cons) const {
  208. NOOP_UNLESS_TYPE(String);
  209. RE const & regex = regex_cache_.try_emplace(cons.regex, cons.regex).first->second;
  210. if (auto str = document_.as_string(); not regex.search(str)) {
  211. add_error("string '", str, "' does not match pattern /", cons.regex, "/");
  212. return false;
  213. }
  214. return true;
  215. }
  216. Status visit(constraint::FormatConstraint const & cons) const {
  217. // https://json-schema.org/draft/2020-12/draft-bhutton-json-schema-validation-01#name-defined-formats
  218. NOOP_UNLESS_TYPE(String);
  219. if (not cfg_.validate_format && not cons.is_assertion) {
  220. return true;
  221. }
  222. add_error("unimplemented format assertion: '", cons.format, "'");
  223. return false;
  224. }
  225. Status visit(constraint::AdditionalItemsConstraint const & cons) const {
  226. NOOP_UNLESS_TYPE(Array);
  227. auto array = document_.as_array();
  228. Status rval = Status::Accept;
  229. for (size_t i = cons.applies_after_nth; i < array.size(); ++i) {
  230. rval &= validate_subschema_on(cons.subschema, array[i], i);
  231. BREAK_EARLY_IF_NO_RESULT_TREE();
  232. }
  233. return rval;
  234. }
  235. Status visit(constraint::ContainsConstraint const & cons) const {
  236. NOOP_UNLESS_TYPE(Array);
  237. auto array = document_.as_array();
  238. size_t const minimum = cons.minimum.value_or(1);
  239. size_t const maximum = cons.maximum.value_or(array.size());
  240. size_t matches = 0;
  241. for (size_t i = 0; i < array.size(); ++i) {
  242. if (validate_subschema_on(cons.subschema, array[i], i)) {
  243. ++matches;
  244. }
  245. }
  246. if (matches < minimum) {
  247. add_error("array does not contain at least ", minimum, " matching elements");
  248. return Status::Reject;
  249. }
  250. if (matches > maximum) {
  251. add_error("array contains more than ", maximum, " matching elements");
  252. return Status::Reject;
  253. }
  254. return Status::Accept;
  255. }
  256. Status visit(constraint::MaxItemsConstraint const & cons) const {
  257. NOOP_UNLESS_TYPE(Array);
  258. if (auto size = document_.array_size(); size > cons.value) {
  259. add_error("array with ", size, " items is greater than the maximum of ", cons.value);
  260. return false;
  261. }
  262. return true;
  263. }
  264. Status visit(constraint::MinItemsConstraint const & cons) const {
  265. NOOP_UNLESS_TYPE(Array);
  266. if (auto size = document_.array_size(); size < cons.value) {
  267. add_error("array with ", size, " items is less than the minimum of ", cons.value);
  268. return false;
  269. }
  270. return true;
  271. }
  272. Status visit(constraint::TupleConstraint const & cons) const {
  273. NOOP_UNLESS_TYPE(Array);
  274. Status rval = Status::Accept;
  275. auto array = document_.as_array();
  276. size_t const n = std::min(cons.items.size(), array.size());
  277. for (size_t i = 0; i < n; ++i) {
  278. rval &= validate_subschema_on(cons.items[i], array[i], i);
  279. BREAK_EARLY_IF_NO_RESULT_TREE();
  280. }
  281. return rval;
  282. }
  283. Status visit(constraint::UniqueItemsConstraint const & cons) const {
  284. NOOP_UNLESS_TYPE(Array);
  285. if constexpr (std::totally_ordered<A>) {
  286. std::set<A> cache;
  287. for (A const & elem : document_.as_array()) {
  288. if (not cache.insert(elem).second) {
  289. add_error("array contains duplicate elements");
  290. return Status::Reject;
  291. }
  292. }
  293. } else {
  294. auto array = document_.as_array();
  295. for (size_t i = 0; i < array.size(); ++i) {
  296. for (size_t j = i + 1; j < array.size(); ++j) {
  297. if (array[i].equals(array[j], true)) {
  298. add_error("array elements ", i, " and ", j, " are equal");
  299. return Status::Reject;
  300. }
  301. }
  302. }
  303. }
  304. return Status::Accept;
  305. }
  306. Status visit(constraint::AdditionalPropertiesConstraint const & cons) const {
  307. NOOP_UNLESS_TYPE(Object);
  308. auto matches_any_pattern = [this, &cons](std::string const & key) {
  309. for (auto & pattern : cons.patterns) {
  310. RE const & regex = regex_cache_.try_emplace(pattern, pattern).first->second;
  311. if (regex.search(key)) {
  312. return true;
  313. }
  314. }
  315. return false;
  316. };
  317. Status rval = Status::Accept;
  318. for (auto const & [key, elem] : document_.as_object()) {
  319. if (not cons.properties.contains(key) && not matches_any_pattern(key)) {
  320. rval &= validate_subschema_on(cons.subschema, elem, key);
  321. }
  322. BREAK_EARLY_IF_NO_RESULT_TREE();
  323. }
  324. return rval;
  325. }
  326. Status visit(constraint::DependenciesConstraint const & cons) const {
  327. NOOP_UNLESS_TYPE(Object);
  328. auto object = document_.as_object();
  329. Status rval = Status::Accept;
  330. for (auto const & [key, subschema] : cons.subschemas) {
  331. if (not object.contains(key)) {
  332. continue;
  333. }
  334. rval &= validate_subschema(subschema, key);
  335. BREAK_EARLY_IF_NO_RESULT_TREE();
  336. }
  337. for (auto [key, required] : cons.required) {
  338. if (not object.contains(key)) {
  339. continue;
  340. }
  341. for (auto const & [key, _] : object) {
  342. required.erase(key);
  343. }
  344. rval &= required.empty();
  345. BREAK_EARLY_IF_NO_RESULT_TREE();
  346. }
  347. return rval;
  348. }
  349. Status visit(constraint::MaxPropertiesConstraint const & cons) const {
  350. NOOP_UNLESS_TYPE(Object);
  351. if (auto size = document_.object_size(); size > cons.value) {
  352. add_error("object with ", size, " properties is greater than the maximum of ", cons.value);
  353. return false;
  354. }
  355. return true;
  356. }
  357. Status visit(constraint::MinPropertiesConstraint const & cons) const {
  358. NOOP_UNLESS_TYPE(Object);
  359. if (auto size = document_.object_size(); size < cons.value) {
  360. add_error("object with ", size, " properties is less than the minimum of ", cons.value);
  361. return false;
  362. }
  363. return true;
  364. }
  365. Status visit(constraint::PatternPropertiesConstraint const & cons) const {
  366. NOOP_UNLESS_TYPE(Object);
  367. Status rval = Status::Accept;
  368. for (auto const & [pattern, subschema] : cons.properties) {
  369. RE const & regex = regex_cache_.try_emplace(pattern, pattern).first->second;
  370. for (auto const & [key, elem] : document_.as_object()) {
  371. if (regex.search(key)) {
  372. rval &= validate_subschema_on(subschema, elem, key);
  373. }
  374. BREAK_EARLY_IF_NO_RESULT_TREE();
  375. }
  376. }
  377. return rval;
  378. }
  379. Status visit(constraint::PropertiesConstraint const & cons) const {
  380. NOOP_UNLESS_TYPE(Object);
  381. Status rval = Status::Accept;
  382. auto object = document_.as_object();
  383. if constexpr (MutableAdapter<A>) {
  384. for (auto const & [key, subschema] : cons.properties) {
  385. auto const * default_value = subschema->default_value();
  386. if (default_value && not object.contains(key)) {
  387. object.assign(key, *default_value);
  388. }
  389. }
  390. }
  391. for (auto const & [key, elem] : object) {
  392. if (auto it = cons.properties.find(key); it != cons.properties.end()) {
  393. rval &= validate_subschema_on(it->second, elem, key);
  394. }
  395. BREAK_EARLY_IF_NO_RESULT_TREE();
  396. }
  397. return rval;
  398. }
  399. Status visit(constraint::PropertyNamesConstraint const & cons) const {
  400. NOOP_UNLESS_TYPE(Object);
  401. Status rval = Status::Accept;
  402. for (auto const & [key, _] : document_.as_object()) {
  403. // TODO(samjaffe): Should we prefer a std::string adapter like valijson?
  404. typename A::value_type key_json{key};
  405. rval &= validate_subschema_on(cons.key_schema, A(key_json), std::string("$$key"));
  406. }
  407. return rval;
  408. }
  409. Status visit(constraint::RequiredConstraint const & cons) const {
  410. NOOP_UNLESS_TYPE(Object);
  411. auto required = cons.properties;
  412. for (auto const & [key, _] : document_.as_object()) {
  413. required.erase(key);
  414. }
  415. if (required.empty()) {
  416. return Status::Accept;
  417. }
  418. add_error("missing required properties ", required);
  419. return Status::Reject;
  420. }
  421. Status visit(constraint::UnevaluatedItemsConstraint const & cons) const {
  422. NOOP_UNLESS_TYPE(Array);
  423. if (not visited_) {
  424. return Status::Reject;
  425. }
  426. Status rval = Status::Accept;
  427. auto array = document_.as_array();
  428. for (size_t i = 0; i < array.size(); ++i) {
  429. if (not VISITED(size_t).contains(i)) {
  430. rval &= validate_subschema_on(cons.subschema, array[i], i);
  431. }
  432. BREAK_EARLY_IF_NO_RESULT_TREE();
  433. }
  434. return rval;
  435. }
  436. Status visit(constraint::UnevaluatedPropertiesConstraint const & cons) const {
  437. NOOP_UNLESS_TYPE(Object);
  438. if (not visited_) {
  439. return Status::Reject;
  440. }
  441. Status rval = Status::Accept;
  442. for (auto const & [key, elem] : document_.as_object()) {
  443. if (not VISITED(std::string).contains(key)) {
  444. rval &= validate_subschema_on(cons.subschema, elem, key);
  445. }
  446. BREAK_EARLY_IF_NO_RESULT_TREE();
  447. }
  448. return rval;
  449. }
  450. Status validate() {
  451. if (auto const & reject = schema_->rejects_all()) {
  452. add_error(*reject);
  453. return Status::Reject;
  454. }
  455. if (schema_->accepts_all()) {
  456. // An accept-all schema is not No-Op for the purpose of unevaluated*
  457. return Status::Accept;
  458. }
  459. VisitedAnnotation annotate;
  460. if (schema_->requires_result_context() and not visited_) {
  461. visited_ = &annotate;
  462. }
  463. Status rval = Status::Noop;
  464. if (auto ref = schema_->reference_schema()) {
  465. rval = validate_subschema(*ref, "$ref");
  466. }
  467. detail::Pointer const current_schema = schema_path_;
  468. for (auto const & [key, p_constraint] : schema_->constraints()) {
  469. BREAK_EARLY_IF_NO_RESULT_TREE();
  470. schema_path_ = current_schema / key;
  471. rval &= p_constraint->accept(*this);
  472. }
  473. for (auto const & [key, p_constraint] : schema_->post_constraints()) {
  474. BREAK_EARLY_IF_NO_RESULT_TREE();
  475. schema_path_ = current_schema / key;
  476. rval &= p_constraint->accept(*this);
  477. }
  478. return rval;
  479. }
  480. private:
  481. template <typename... Args> void add_error(Args &&... args) const {
  482. if (not result_) {
  483. return;
  484. }
  485. std::stringstream ss;
  486. using ::jvalidate::operator<<;
  487. [[maybe_unused]] int _[] = {(ss << std::forward<Args>(args), 0)...};
  488. result_->add_error(where_, schema_path_, ss.str());
  489. }
  490. template <typename C> static void merge_visited(C & to, C const & from) {
  491. to.insert(from.begin(), from.end());
  492. }
  493. template <typename... K>
  494. Status validate_subschema(constraint::Constraint::SubConstraint const & subschema,
  495. K const &... keys) const {
  496. if (schema::Node const * const * ppschema = std::get_if<0>(&subschema)) {
  497. return validate_subschema(*ppschema, keys...);
  498. } else {
  499. return std::get<1>(subschema)->accept(*this);
  500. }
  501. }
  502. template <typename... K>
  503. Status validate_subschema(schema::Node const * subschema, K const &... keys) const {
  504. VisitedAnnotation annotate;
  505. ValidationVisitor next = *this;
  506. ((next.schema_path_ /= keys), ...);
  507. std::tie(next.schema_, next.visited_) =
  508. std::forward_as_tuple(subschema, visited_ ? &annotate : nullptr);
  509. Status rval = next.validate();
  510. if (rval == Status::Accept and visited_) {
  511. merge_visited(std::get<0>(*visited_), std::get<0>(annotate));
  512. merge_visited(std::get<1>(*visited_), std::get<1>(annotate));
  513. }
  514. return rval;
  515. }
  516. template <typename K>
  517. Status validate_subschema_on(schema::Node const * subschema, A const & document,
  518. K const & key) const {
  519. ValidationResult result;
  520. ValidationVisitor next = *this;
  521. next.where_ /= key;
  522. std::tie(next.document_, next.schema_, next.result_, next.visited_) =
  523. std::forward_as_tuple(document, subschema, result_ ? &result : nullptr, nullptr);
  524. auto status = next.validate();
  525. if (status == Status::Accept and visited_) {
  526. VISITED(K).insert(key);
  527. }
  528. if (status == Status::Reject and result_) {
  529. result_->add_error(std::move(result));
  530. }
  531. return status;
  532. }
  533. };
  534. }