validation_visitor.h 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611
  1. #pragma once
  2. #include <tuple>
  3. #include <unordered_map>
  4. #include <jvalidate/constraint/array_constraint.h>
  5. #include <jvalidate/constraint/general_constraint.h>
  6. #include <jvalidate/constraint/number_constraint.h>
  7. #include <jvalidate/constraint/object_constraint.h>
  8. #include <jvalidate/constraint/string_constraint.h>
  9. #include <jvalidate/constraint/visitor.h>
  10. #include <jvalidate/detail/expect.h>
  11. #include <jvalidate/detail/iostream.h>
  12. #include <jvalidate/detail/number.h>
  13. #include <jvalidate/detail/pointer.h>
  14. #include <jvalidate/forward.h>
  15. #include <jvalidate/schema.h>
  16. #include <jvalidate/status.h>
  17. #include <jvalidate/validation_config.h>
  18. #include <jvalidate/validation_result.h>
  19. #define VISITED(type) std::get<std::unordered_set<type>>(*visited_)
  20. #define NOOP_UNLESS_TYPE(etype) \
  21. RETURN_UNLESS(adapter::Type::etype == document_.type(), Status::Noop)
  22. #define BREAK_EARLY_IF_NO_RESULT_TREE() \
  23. do { \
  24. if (rval == Status::Reject and not result_ and not visited_) { \
  25. break; \
  26. } \
  27. } while (false)
  28. namespace jvalidate {
  29. template <Adapter A, RegexEngine RE>
  30. class ValidationVisitor : public constraint::ConstraintVisitor {
  31. private:
  32. using VisitedAnnotation = std::tuple<std::unordered_set<size_t>, std::unordered_set<std::string>>;
  33. private:
  34. A document_;
  35. detail::Pointer where_;
  36. detail::Pointer schema_path_;
  37. schema::Node const * schema_;
  38. ValidationResult * result_;
  39. ValidationConfig const & cfg_;
  40. std::unordered_map<std::string, RE> & regex_cache_;
  41. mutable VisitedAnnotation * visited_ = nullptr;
  42. public:
  43. ValidationVisitor(A const & json, schema::Node const & schema, ValidationConfig const & cfg,
  44. std::unordered_map<std::string, RE> & regex_cache, ValidationResult * result)
  45. : document_(json), schema_(&schema), result_(result), cfg_(cfg), regex_cache_(regex_cache) {}
  46. Status visit(constraint::TypeConstraint const & cons) const {
  47. adapter::Type const type = document_.type();
  48. for (adapter::Type const accept : cons.types) {
  49. if (type == accept) {
  50. return Status::Accept;
  51. }
  52. if (accept == adapter::Type::Number && type == adapter::Type::Integer) {
  53. return Status::Accept;
  54. }
  55. if (accept == adapter::Type::Integer && type == adapter::Type::Number &&
  56. detail::is_json_integer(document_.as_number())) {
  57. return Status::Accept;
  58. }
  59. }
  60. add_error("type ", type, " is not one of {", cons.types, '}');
  61. return Status::Reject;
  62. }
  63. Status visit(constraint::ExtensionConstraint const & cons) const {
  64. return cons.validate(document_, where_, result_);
  65. }
  66. Status visit(constraint::EnumConstraint const & cons) const {
  67. auto is_equal = [this](auto const & frozen) {
  68. return document_.equals(frozen, cfg_.strict_equality);
  69. };
  70. for (auto const & option : cons.enumeration) {
  71. if (option->apply(is_equal)) {
  72. return Status::Accept;
  73. }
  74. }
  75. add_error("equals none of the values");
  76. return Status::Reject;
  77. }
  78. Status visit(constraint::AllOfConstraint const & cons) const {
  79. Status rval = Status::Accept;
  80. size_t i = 0;
  81. for (schema::Node const * subschema : cons.children) {
  82. rval &= validate_subschema(subschema, i);
  83. ++i;
  84. BREAK_EARLY_IF_NO_RESULT_TREE();
  85. }
  86. return rval;
  87. }
  88. Status visit(constraint::AnyOfConstraint const & cons) const {
  89. size_t i = 0;
  90. Status rval = Status::Reject;
  91. for (schema::Node const * subschema : cons.children) {
  92. if (validate_subschema(subschema, i)) {
  93. rval = Status::Accept;
  94. }
  95. if (not visited_ && rval == Status::Accept) {
  96. break;
  97. }
  98. ++i;
  99. }
  100. return rval;
  101. }
  102. Status visit(constraint::OneOfConstraint const & cons) const {
  103. size_t matches = 0;
  104. size_t i = 0;
  105. for (schema::Node const * subschema : cons.children) {
  106. if (validate_subschema(subschema, i)) {
  107. ++matches;
  108. }
  109. ++i;
  110. }
  111. return matches == 1 ? Status::Accept : Status::Reject;
  112. }
  113. Status visit(constraint::NotConstraint const & cons) const {
  114. VisitedAnnotation * suppress = nullptr;
  115. std::swap(suppress, visited_);
  116. auto rval = validate_subschema(cons.child, detail::Pointer()) == Status::Reject;
  117. std::swap(suppress, visited_);
  118. return rval;
  119. }
  120. Status visit(constraint::ConditionalConstraint const & cons) const {
  121. if (validate_subschema(cons.if_constraint, detail::Pointer())) {
  122. return validate_subschema(cons.then_constraint, "then");
  123. }
  124. return validate_subschema(cons.else_constraint, "else");
  125. }
  126. Status visit(constraint::MaximumConstraint const & cons) const {
  127. switch (document_.type()) {
  128. case adapter::Type::Integer:
  129. if (int64_t value = document_.as_integer(); not cons(value)) {
  130. add_error("integer ", value, " exceeds ", cons.exclusive ? "exclusive " : "", "maximum of ",
  131. cons.value);
  132. return false;
  133. }
  134. return true;
  135. case adapter::Type::Number:
  136. if (double value = document_.as_number(); not cons(value)) {
  137. add_error("number ", value, " exceeds ", cons.exclusive ? "exclusive " : "", "maximum of ",
  138. cons.value);
  139. return false;
  140. }
  141. return true;
  142. default:
  143. return Status::Noop;
  144. }
  145. }
  146. Status visit(constraint::MinimumConstraint const & cons) const {
  147. switch (document_.type()) {
  148. case adapter::Type::Integer:
  149. if (int64_t value = document_.as_integer(); not cons(value)) {
  150. add_error("integer ", value, " fails ", cons.exclusive ? "exclusive " : "", "minimum of ",
  151. cons.value);
  152. return false;
  153. }
  154. return true;
  155. case adapter::Type::Number:
  156. if (double value = document_.as_number(); not cons(value)) {
  157. add_error("number ", value, " fails ", cons.exclusive ? "exclusive " : "", "minimum of ",
  158. cons.value);
  159. return false;
  160. }
  161. return true;
  162. default:
  163. return Status::Noop;
  164. }
  165. }
  166. Status visit(constraint::MultipleOfConstraint const & cons) const {
  167. adapter::Type const type = document_.type();
  168. RETURN_UNLESS(type == adapter::Type::Number || type == adapter::Type::Integer, Status::Noop);
  169. if (double value = document_.as_number(); not cons(value)) {
  170. add_error("number ", value, " is not a multiple of ", cons.value);
  171. return false;
  172. }
  173. return true;
  174. }
  175. Status visit(constraint::MaxLengthConstraint const & cons) const {
  176. NOOP_UNLESS_TYPE(String);
  177. if (auto str = document_.as_string(); detail::length(str) > cons.value) {
  178. add_error("string '", str, "' is greater than the maximum length of ", cons.value);
  179. return false;
  180. }
  181. return true;
  182. }
  183. Status visit(constraint::MinLengthConstraint const & cons) const {
  184. NOOP_UNLESS_TYPE(String);
  185. if (auto str = document_.as_string(); detail::length(str) < cons.value) {
  186. add_error("string '", str, "' is less than the minimum length of ", cons.value);
  187. return false;
  188. }
  189. return true;
  190. }
  191. Status visit(constraint::PatternConstraint const & cons) const {
  192. NOOP_UNLESS_TYPE(String);
  193. RE const & regex = regex_cache_.try_emplace(cons.regex, cons.regex).first->second;
  194. if (auto str = document_.as_string(); not regex.search(str)) {
  195. add_error("string '", str, "' does not match pattern /", cons.regex, "/");
  196. return false;
  197. }
  198. return true;
  199. }
  200. Status visit(constraint::FormatConstraint const & cons) const {
  201. // https://json-schema.org/draft/2020-12/draft-bhutton-json-schema-validation-01#name-defined-formats
  202. NOOP_UNLESS_TYPE(String);
  203. if (not cfg_.validate_format && not cons.is_assertion) {
  204. return true;
  205. }
  206. std::cerr << "Unimplemented constraint format(" << cons.format << ")"
  207. << "\n";
  208. return false;
  209. }
  210. Status visit(constraint::AdditionalItemsConstraint const & cons) const {
  211. NOOP_UNLESS_TYPE(Array);
  212. auto array = document_.as_array();
  213. Status rval = Status::Accept;
  214. for (size_t i = cons.applies_after_nth; i < array.size(); ++i) {
  215. rval &= validate_subschema_on(cons.subschema, array[i], i);
  216. BREAK_EARLY_IF_NO_RESULT_TREE();
  217. }
  218. return rval;
  219. }
  220. Status visit(constraint::ContainsConstraint const & cons) const {
  221. NOOP_UNLESS_TYPE(Array);
  222. auto array = document_.as_array();
  223. size_t const minimum = cons.minimum.value_or(1);
  224. size_t const maximum = cons.maximum.value_or(array.size());
  225. size_t matches = 0;
  226. for (size_t i = 0; i < array.size(); ++i) {
  227. if (validate_subschema_on(cons.subschema, array[i], i)) {
  228. ++matches;
  229. }
  230. }
  231. if (matches < minimum) {
  232. add_error("array does not contain at least ", minimum, " matching elements");
  233. return Status::Reject;
  234. }
  235. if (matches > maximum) {
  236. add_error("array contains more than ", maximum, " matching elements");
  237. return Status::Reject;
  238. }
  239. return Status::Accept;
  240. }
  241. Status visit(constraint::MaxItemsConstraint const & cons) const {
  242. NOOP_UNLESS_TYPE(Array);
  243. if (auto size = document_.array_size(); size > cons.value) {
  244. add_error("array with ", size, " items is greater than the maximum of ", cons.value);
  245. return false;
  246. }
  247. return true;
  248. }
  249. Status visit(constraint::MinItemsConstraint const & cons) const {
  250. NOOP_UNLESS_TYPE(Array);
  251. if (auto size = document_.array_size(); size < cons.value) {
  252. add_error("array with ", size, " items is less than the minimum of ", cons.value);
  253. return false;
  254. }
  255. return true;
  256. }
  257. Status visit(constraint::TupleConstraint const & cons) const {
  258. NOOP_UNLESS_TYPE(Array);
  259. Status rval = Status::Accept;
  260. auto array = document_.as_array();
  261. size_t const n = std::min(cons.items.size(), array.size());
  262. for (size_t i = 0; i < n; ++i) {
  263. rval &= validate_subschema_on(cons.items[i], array[i], i);
  264. BREAK_EARLY_IF_NO_RESULT_TREE();
  265. }
  266. return rval;
  267. }
  268. Status visit(constraint::UniqueItemsConstraint const & cons) const {
  269. NOOP_UNLESS_TYPE(Array);
  270. if constexpr (std::totally_ordered<A>) {
  271. std::set<A> cache;
  272. for (A const & elem : document_.as_array()) {
  273. if (not cache.insert(elem).second) {
  274. add_error("array contains duplicate elements");
  275. return Status::Reject;
  276. }
  277. }
  278. } else {
  279. auto array = document_.as_array();
  280. for (size_t i = 0; i < array.size(); ++i) {
  281. for (size_t j = i + 1; j < array.size(); ++j) {
  282. if (array[i].equals(array[j], true)) {
  283. add_error("array elements ", i, " and ", j, " are equal");
  284. return Status::Reject;
  285. }
  286. }
  287. }
  288. }
  289. return Status::Accept;
  290. }
  291. Status visit(constraint::AdditionalPropertiesConstraint const & cons) const {
  292. NOOP_UNLESS_TYPE(Object);
  293. auto matches_any_pattern = [this, &cons](std::string const & key) {
  294. for (auto & pattern : cons.patterns) {
  295. RE const & regex = regex_cache_.try_emplace(pattern, pattern).first->second;
  296. if (regex.search(key)) {
  297. return true;
  298. }
  299. }
  300. return false;
  301. };
  302. Status rval = Status::Accept;
  303. for (auto const & [key, elem] : document_.as_object()) {
  304. if (not cons.properties.contains(key) && not matches_any_pattern(key)) {
  305. rval &= validate_subschema_on(cons.subschema, elem, key);
  306. }
  307. BREAK_EARLY_IF_NO_RESULT_TREE();
  308. }
  309. return rval;
  310. }
  311. Status visit(constraint::DependenciesConstraint const & cons) const {
  312. NOOP_UNLESS_TYPE(Object);
  313. auto object = document_.as_object();
  314. Status rval = Status::Accept;
  315. for (auto const & [key, subschema] : cons.subschemas) {
  316. if (not object.contains(key)) {
  317. continue;
  318. }
  319. rval &= validate_subschema(subschema, key);
  320. BREAK_EARLY_IF_NO_RESULT_TREE();
  321. }
  322. for (auto [key, required] : cons.required) {
  323. if (not object.contains(key)) {
  324. continue;
  325. }
  326. for (auto const & [key, _] : object) {
  327. required.erase(key);
  328. }
  329. rval &= required.empty();
  330. BREAK_EARLY_IF_NO_RESULT_TREE();
  331. }
  332. return rval;
  333. }
  334. Status visit(constraint::MaxPropertiesConstraint const & cons) const {
  335. NOOP_UNLESS_TYPE(Object);
  336. if (auto size = document_.object_size(); size > cons.value) {
  337. add_error("object with ", size, " properties is greater than the maximum of ", cons.value);
  338. return false;
  339. }
  340. return true;
  341. }
  342. Status visit(constraint::MinPropertiesConstraint const & cons) const {
  343. NOOP_UNLESS_TYPE(Object);
  344. if (auto size = document_.object_size(); size < cons.value) {
  345. add_error("object with ", size, " properties is less than the minimum of ", cons.value);
  346. return false;
  347. }
  348. return true;
  349. }
  350. Status visit(constraint::PatternPropertiesConstraint const & cons) const {
  351. NOOP_UNLESS_TYPE(Object);
  352. Status rval = Status::Accept;
  353. for (auto const & [pattern, subschema] : cons.properties) {
  354. RE const & regex = regex_cache_.try_emplace(pattern, pattern).first->second;
  355. for (auto const & [key, elem] : document_.as_object()) {
  356. if (regex.search(key)) {
  357. rval &= validate_subschema_on(subschema, elem, key);
  358. }
  359. BREAK_EARLY_IF_NO_RESULT_TREE();
  360. }
  361. }
  362. return rval;
  363. }
  364. Status visit(constraint::PropertiesConstraint const & cons) const {
  365. NOOP_UNLESS_TYPE(Object);
  366. Status rval = Status::Accept;
  367. auto object = document_.as_object();
  368. if constexpr (MutableAdapter<A>) {
  369. for (auto const & [key, subschema] : cons.properties) {
  370. auto const * default_value = subschema->default_value();
  371. if (default_value && not object.contains(key)) {
  372. object.assign(key, *default_value);
  373. }
  374. }
  375. }
  376. for (auto const & [key, elem] : object) {
  377. if (auto it = cons.properties.find(key); it != cons.properties.end()) {
  378. rval &= validate_subschema_on(it->second, elem, key);
  379. }
  380. BREAK_EARLY_IF_NO_RESULT_TREE();
  381. }
  382. return rval;
  383. }
  384. Status visit(constraint::PropertyNamesConstraint const & cons) const {
  385. NOOP_UNLESS_TYPE(Object);
  386. Status rval = Status::Accept;
  387. for (auto const & [key, _] : document_.as_object()) {
  388. // TODO(samjaffe): Should we prefer a std::string adapter like valijson?
  389. typename A::value_type key_json{key};
  390. rval &= validate_subschema_on(cons.key_schema, A(key_json), std::string("$$key"));
  391. }
  392. return rval;
  393. }
  394. Status visit(constraint::RequiredConstraint const & cons) const {
  395. NOOP_UNLESS_TYPE(Object);
  396. auto required = cons.properties;
  397. for (auto const & [key, _] : document_.as_object()) {
  398. required.erase(key);
  399. }
  400. if (required.empty()) {
  401. return Status::Accept;
  402. }
  403. add_error("missing required properties ", required);
  404. return Status::Reject;
  405. }
  406. Status visit(constraint::UnevaluatedItemsConstraint const & cons) const {
  407. NOOP_UNLESS_TYPE(Array);
  408. if (not visited_) {
  409. return Status::Reject;
  410. }
  411. Status rval = Status::Accept;
  412. auto array = document_.as_array();
  413. for (size_t i = 0; i < array.size(); ++i) {
  414. if (not VISITED(size_t).contains(i)) {
  415. rval &= validate_subschema_on(cons.subschema, array[i], i);
  416. }
  417. BREAK_EARLY_IF_NO_RESULT_TREE();
  418. }
  419. return rval;
  420. }
  421. Status visit(constraint::UnevaluatedPropertiesConstraint const & cons) const {
  422. NOOP_UNLESS_TYPE(Object);
  423. if (not visited_) {
  424. return Status::Reject;
  425. }
  426. Status rval = Status::Accept;
  427. for (auto const & [key, elem] : document_.as_object()) {
  428. if (not VISITED(std::string).contains(key)) {
  429. rval &= validate_subschema_on(cons.subschema, elem, key);
  430. }
  431. BREAK_EARLY_IF_NO_RESULT_TREE();
  432. }
  433. return rval;
  434. }
  435. Status validate() {
  436. if (auto const & reject = schema_->rejects_all()) {
  437. add_error(*reject);
  438. return Status::Reject;
  439. }
  440. if (schema_->accepts_all()) {
  441. // An accept-all schema is not No-Op for the purpose of unevaluated*
  442. return Status::Accept;
  443. }
  444. VisitedAnnotation annotate;
  445. if (schema_->requires_result_context() and not visited_) {
  446. visited_ = &annotate;
  447. }
  448. Status rval = Status::Noop;
  449. if (auto ref = schema_->reference_schema()) {
  450. rval = validate_subschema(*ref, "$ref");
  451. }
  452. detail::Pointer const current_schema = schema_path_;
  453. for (auto const & [key, p_constraint] : schema_->constraints()) {
  454. BREAK_EARLY_IF_NO_RESULT_TREE();
  455. schema_path_ = current_schema / key;
  456. rval &= p_constraint->accept(*this);
  457. }
  458. for (auto const & [key, p_constraint] : schema_->post_constraints()) {
  459. BREAK_EARLY_IF_NO_RESULT_TREE();
  460. schema_path_ = current_schema / key;
  461. rval &= p_constraint->accept(*this);
  462. }
  463. return rval;
  464. }
  465. private:
  466. template <typename... Args> void add_error(Args &&... args) const {
  467. if (not result_) {
  468. return;
  469. }
  470. std::stringstream ss;
  471. using ::jvalidate::operator<<;
  472. [[maybe_unused]] int _[] = {(ss << std::forward<Args>(args), 0)...};
  473. result_->add_error(where_, schema_path_, ss.str());
  474. }
  475. template <typename C> static void merge_visited(C & to, C const & from) {
  476. to.insert(from.begin(), from.end());
  477. }
  478. template <typename K>
  479. Status validate_subschema(schema::Node const * subschema, K const & key) const {
  480. VisitedAnnotation annotate;
  481. ValidationVisitor next = *this;
  482. next.schema_path_ /= key;
  483. std::tie(next.schema_, next.visited_) =
  484. std::forward_as_tuple(subschema, visited_ ? &annotate : nullptr);
  485. Status rval = next.validate();
  486. if (rval == Status::Accept and visited_) {
  487. merge_visited(std::get<0>(*visited_), std::get<0>(annotate));
  488. merge_visited(std::get<1>(*visited_), std::get<1>(annotate));
  489. }
  490. return rval;
  491. }
  492. template <typename K>
  493. Status validate_subschema_on(schema::Node const * subschema, A const & document,
  494. K const & key) const {
  495. ValidationResult result;
  496. ValidationVisitor next = *this;
  497. next.where_ /= key;
  498. std::tie(next.document_, next.schema_, next.result_, next.visited_) =
  499. std::forward_as_tuple(document, subschema, result_ ? &result : nullptr, nullptr);
  500. auto status = next.validate();
  501. if (status == Status::Accept and visited_) {
  502. VISITED(K).insert(key);
  503. }
  504. if (status == Status::Reject and result_) {
  505. result_->add_error(std::move(result));
  506. }
  507. return status;
  508. }
  509. };
  510. }