schema.h 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450
  1. #pragma once
  2. #include <map>
  3. #include <memory>
  4. #include <optional>
  5. #include <stdexcept> // IWYU pragma: keep
  6. #include <string>
  7. #include <unordered_map>
  8. #include <jvalidate/adapter.h>
  9. #include <jvalidate/constraint.h>
  10. #include <jvalidate/detail/expect.h>
  11. #include <jvalidate/detail/on_block_exit.h>
  12. #include <jvalidate/detail/parser_context.h>
  13. #include <jvalidate/detail/reference.h>
  14. #include <jvalidate/detail/reference_manager.h>
  15. #include <jvalidate/document_cache.h>
  16. #include <jvalidate/enum.h>
  17. #include <jvalidate/forward.h>
  18. namespace jvalidate::schema {
  19. /**
  20. * @brief The real "Schema" class, representing a resolved node in a schema
  21. * object. Each node is analogous to one layer of the schema json, and can
  22. * represent either a "rejects all" schema, an "accepts all" schema, or a
  23. * schema that has some selection of constraints and other features.
  24. */
  25. class Node {
  26. private:
  27. // Annotations for this schema...
  28. std::string description_;
  29. // The default value to apply to an object if if does not exist - is invoked
  30. // by the parent schema node, rather than this node itself.
  31. std::unique_ptr<adapter::Const const> default_{nullptr};
  32. // Rejects-all can provide a custom reason under some circumstances.
  33. std::optional<std::string> rejects_all_;
  34. // Actual constraint information
  35. std::optional<schema::Node const *> reference_;
  36. std::unordered_map<std::string, std::unique_ptr<constraint::Constraint>> constraints_;
  37. std::unordered_map<std::string, std::unique_ptr<constraint::Constraint>> post_constraints_;
  38. public:
  39. Node() = default;
  40. /**
  41. * @brief Construct a schema that rejects all values, with a custom reason
  42. *
  43. * @param A user-safe justification of why this schema rejects everything.
  44. * Depending on the compiler settings, this might be used to indicate things
  45. * such as attempting to load a non-existant schema.
  46. */
  47. explicit Node(std::string const & rejection_reason) : rejects_all_(rejection_reason) {}
  48. /**
  49. * @brief Actually initialize this schema node. Unfortunately, we cannot use
  50. * RAII for initializing this object because of certain optimizations and
  51. * guardrails make reference captures breakable.
  52. *
  53. * @param context The currently operating context, including the actual JSON
  54. * document being parsed at this moment.
  55. */
  56. template <Adapter A> void construct(detail::ParserContext<A> context);
  57. bool is_pure_reference() const {
  58. return reference_ && constraints_.empty() && post_constraints_.empty() && not default_;
  59. }
  60. bool accepts_all() const {
  61. return not reference_ && constraints_.empty() && post_constraints_.empty();
  62. }
  63. std::optional<std::string> const & rejects_all() const { return rejects_all_; }
  64. std::optional<schema::Node const *> reference_schema() const { return reference_; }
  65. std::string const & description() const { return description_; }
  66. bool requires_result_context() const { return not post_constraints_.empty(); }
  67. auto const & constraints() const { return constraints_; }
  68. auto const & post_constraints() const { return post_constraints_; }
  69. adapter::Const const * default_value() const { return default_.get(); }
  70. private:
  71. /**
  72. * @brief Resolve any dynamic anchors that are children of the current schema
  73. * (if this is the root node of a schema). If it is not a root node (does not
  74. * define "$id"), then this function does nothing.
  75. *
  76. * @tparam A The Adapter type for the JSON being worked with.
  77. *
  78. * @param context The currently operating context, including the actual JSON
  79. * document being parsed at this moment.
  80. *
  81. * @returns If this is a root schema - a scope object to pop the dynamic scope
  82. */
  83. template <Adapter A> detail::OnBlockExit resolve_anchor(detail::ParserContext<A> const & context);
  84. /**
  85. * @brief Resolves/embeds referenced schema information into this schema node.
  86. *
  87. * @tparam A The Adapter type for the JSON being worked with.
  88. *
  89. * @param context The currently operating context, including the actual JSON
  90. * document being parsed at this moment.
  91. *
  92. * @returns true iff there was a reference tag to follow
  93. */
  94. template <Adapter A> bool resolve_reference(detail::ParserContext<A> const & context);
  95. };
  96. }
  97. namespace jvalidate {
  98. class Schema : public schema::Node {
  99. private:
  100. friend class schema::Node;
  101. template <Adapter A> friend struct detail::ParserContext;
  102. private:
  103. schema::Node accept_;
  104. schema::Node reject_{"always false"};
  105. // An owning cache of all created schemas. Avoids storing duplicates such as
  106. // the "always-true" schema, "always-false" schema, and schemas whose only
  107. // meaningful field is "$ref", "$recursiveRef", or "$dynamicRef".
  108. std::map<detail::Reference, schema::Node> cache_;
  109. // A non-owning cache of all schemas, including duplcates where multiple
  110. // References map to the same underlying schema.
  111. std::map<detail::Reference, schema::Node const *> alias_cache_;
  112. public:
  113. /**
  114. * @brief Construct a new schema. All other constructors of this type may be
  115. * considered syntactic sugar for this constructor.
  116. *
  117. * As such, the true signature of this class's contructor is:
  118. *
  119. * Schema(Adapter| JSON
  120. * [, schema::Version]
  121. * [, URIResolver<A> | DocumentCache<A> &]
  122. * [, ConstraintFactory<A> const &])
  123. *
  124. * as long as the order of arguments is preserved - the constructor will work
  125. * no matter which arguments are ignored. The only required argument being
  126. * the JSON object/Adapter.
  127. *
  128. * @param json An adapter to a json object
  129. *
  130. * @param version The json-schema draft version that all schemas will prefer
  131. *
  132. * @param external An object capable of resolving URIs, and turning them into
  133. * Adapter objects. Holds a cache and so must be mutable.
  134. *
  135. * @param factory An object that manuafactures constraints - allows the user
  136. * to provide custom extensions or even modify the behavior of existing
  137. * keywords by overridding the virtual accessor function(s).
  138. */
  139. template <Adapter A>
  140. Schema(A const & json, schema::Version version, DocumentCache<A> & external,
  141. ConstraintFactory<A> const & factory = {}) {
  142. // Prevent unintialized data caches
  143. if (version >= schema::Version::Draft06 && json.type() == adapter::Type::Boolean) {
  144. schema::Node::operator=(std::move(json.as_boolean() ? accept_ : reject_));
  145. return;
  146. }
  147. detail::ReferenceManager<A> ref(external, json, version, factory);
  148. detail::ParserContext<A> root{*this, json, &ref.vocab(version), ref};
  149. root.where = root.dynamic_where = ref.canonicalize({}, {}, false);
  150. construct(root);
  151. }
  152. /**
  153. * @param json An adapter to a json schema
  154. *
  155. * @param version The json-schema draft version that all schemas will prefer
  156. *
  157. * @param external An object capable of resolving URIs, and turning them into
  158. * Adapter objects. Holds a cache and so must be mutable. If this constructor
  159. * is called, then it means that the cache is a one-off object, and will not
  160. * be reused.
  161. */
  162. template <Adapter A, typename... Args>
  163. // NOLINTNEXTLINE(cppcoreguidelines-rvalue-reference-param-not-moved)
  164. Schema(A const & json, schema::Version version, DocumentCache<A> && external, Args &&... args)
  165. : Schema(json, version, external, std::forward<Args>(args)...) {}
  166. /**
  167. * @param json An adapter to a json schema
  168. *
  169. * @param version The json-schema draft version that all schemas will prefer
  170. *
  171. * @param resolve A function capable of resolving URIs, and storing the
  172. * contents in a provided concrete JSON object.
  173. */
  174. template <Adapter A, typename... Args>
  175. Schema(A const & json, schema::Version version, URIResolver<A> resolve, Args &&... args)
  176. : Schema(json, version, DocumentCache<A>(resolve), std::forward<Args>(args)...) {}
  177. /**
  178. * @param json An adapter to a json schema
  179. *
  180. * @param version The json-schema draft version that all schemas will prefer
  181. */
  182. template <Adapter A, Not<DocumentCache<A>>... Args>
  183. Schema(A const & json, schema::Version version, Args &&... args)
  184. : Schema(json, version, DocumentCache<A>(), std::forward<Args>(args)...) {}
  185. /**
  186. * @param json Any non-adapter (JSON) object. Will be immedately converted
  187. * into an Adapter object to allow us to walk through it w/o specialization.
  188. */
  189. template <typename JSON, typename... Args>
  190. requires(not Adapter<JSON>)
  191. explicit Schema(JSON const & json, Args &&... args)
  192. : Schema(adapter::AdapterFor<JSON const>(json), std::forward<Args>(args)...) {}
  193. private:
  194. /**
  195. * @brief Cache an alias to a given schema, without ownership. alias_cache_ is
  196. * a many-to-one association.
  197. * Syntactic sugar for "add pointer to map and return".
  198. *
  199. * @param where The key aliasing the schema, which may also be the original
  200. * lexical key.
  201. *
  202. * @param schema The pointer to a schema being stored
  203. */
  204. schema::Node const * alias(detail::Reference const & where, schema::Node const * schema) {
  205. alias_cache_.emplace(where, schema);
  206. return schema;
  207. }
  208. /**
  209. * @brief Syntactic sugar for finding a map value as an optional instead of an
  210. * iterator that may be "end".
  211. *
  212. * @param ref The key being looked up
  213. */
  214. std::optional<schema::Node const *> from_cache(detail::Reference const & ref) {
  215. if (auto it = alias_cache_.find(ref); it != alias_cache_.end()) {
  216. return it->second;
  217. }
  218. return std::nullopt;
  219. }
  220. /**
  221. * @brief Resolve a $ref/$dynamicRef tag and construct or load from cache the
  222. * schema that is being pointed to.
  223. *
  224. * @param context All of the context information about the schema, importantly
  225. * the location information, {@see jvalidate::detail::ReferenceManager}, and
  226. * {@see jvalidate::detail::Vocabulary}.
  227. *
  228. * @param dynamic_reference Is this request coming from a "$dynamicRef"/
  229. * "$recursiveRef" tag, or a regular "$ref" tag.
  230. *
  231. * @returns A schema node, that will also be stored in a local cache.
  232. *
  233. * @throws std::runtime_error if the reference is to an unloaded URI, and we
  234. * fail to load it. If the preprocessor definition
  235. * JVALIDATE_LOAD_FAILURE_AS_FALSE_SCHEMA is set, then we instead return an
  236. * always-false schema with a custom error message. This is primarily for use
  237. * in writing tests for JSON-Schema's selfvalidation test cases.
  238. */
  239. template <Adapter A>
  240. schema::Node const * resolve(detail::Reference const & ref,
  241. detail::ParserContext<A> const & context, bool dynamic_reference) {
  242. detail::Reference const lexical =
  243. context.ref.canonicalize(ref, context.where, dynamic_reference);
  244. detail::Reference const dynamic = dynamic_reference ? lexical : context.dynamic_where / "$ref";
  245. detail::OnBlockExit scope;
  246. if (lexical.uri() != context.where.uri()) {
  247. // Whenever we change base URIs, we need to recalculate our dynamic_scope.
  248. // Otherwise, it is possible for a chain of $ref statements to
  249. // accidentally leave us ignoring $dynamicAnchor contexts.
  250. // This is demonstrated by the test descriptor in JSON-Schema-Test-Suite:
  251. // "$dynamicRef avoids the root of each schema, but scopes are still registered"
  252. scope = context.ref.dynamic_scope(lexical);
  253. }
  254. if (std::optional cached = from_cache(dynamic)) {
  255. return *cached;
  256. }
  257. std::string error;
  258. if (std::optional root = context.ref.load(lexical, context.vocab, error)) {
  259. return fetch_schema(context.rebind(*root, lexical, dynamic));
  260. }
  261. constexpr char const * prelude = "URIResolver could not find ";
  262. #ifdef JVALIDATE_LOAD_FAILURE_AS_FALSE_SCHEMA
  263. return alias(dynamic,
  264. &cache_.try_emplace(dynamic, prelude + std::string(lexical.uri())).first->second);
  265. #else
  266. JVALIDATE_THROW(std::runtime_error, prelude << lexical.uri() << ": " << error);
  267. #endif
  268. }
  269. /**
  270. * @brief Fetch from cache or create a new schema node from the given context,
  271. * which may be the result of resolving a reference {@see Schema::resolve}, or
  272. * simply loading a child schema via {@see ParserContext::node}.
  273. *
  274. * @param context The current operating context of the schema
  275. */
  276. template <Adapter A> schema::Node const * fetch_schema(detail::ParserContext<A> const & context) {
  277. // TODO(samjaffe): No longer promises uniqueness - instead track unique URI's
  278. if (std::optional cached = from_cache(context.dynamic_where)) {
  279. return *cached;
  280. }
  281. adapter::Type const type = context.schema.type();
  282. // Boolean schemas were made universally permitted in Draft06. Before then,
  283. // you could only use them for specific keywords, like additionalProperties.
  284. if (type == adapter::Type::Boolean && context.vocab->version() >= schema::Version::Draft06) {
  285. return alias(context.dynamic_where, context.schema.as_boolean() ? &accept_ : &reject_);
  286. }
  287. // If the schema is not universal accept/reject, then it MUST be an object
  288. EXPECT_M(type == adapter::Type::Object, "invalid schema at " << context.dynamic_where);
  289. // The empty object is equivalent to true, but is permitted in prior drafts
  290. if (context.schema.object_size() == 0) {
  291. return alias(context.dynamic_where, &accept_);
  292. }
  293. // Because of the below alias() expression, and the above from_cache
  294. // expression, it shouldn't be possible for try_emplace to not create a new
  295. // schema node. We keep the check in anyway just in case somehow things have
  296. // gotten into a malformed state.
  297. auto [it, created] = cache_.try_emplace(context.dynamic_where);
  298. EXPECT_M(created, "creating duplicate schema at... " << context.dynamic_where);
  299. // Do this here first in order to protect from infinite loops
  300. alias(context.dynamic_where, &it->second);
  301. it->second.construct(context);
  302. return &it->second;
  303. }
  304. };
  305. }
  306. namespace jvalidate::detail {
  307. template <Adapter A> schema::Node const * ParserContext<A>::node() const {
  308. return root.fetch_schema(*this);
  309. }
  310. template <Adapter A> schema::Node const * ParserContext<A>::always() const {
  311. return fixed_schema(schema.as_boolean());
  312. }
  313. template <Adapter A> schema::Node const * ParserContext<A>::fixed_schema(bool accept) const {
  314. return accept ? &root.accept_ : &root.reject_;
  315. }
  316. }
  317. namespace jvalidate::schema {
  318. template <Adapter A>
  319. detail::OnBlockExit Node::resolve_anchor(detail::ParserContext<A> const & context) {
  320. auto const schema = context.schema.as_object();
  321. if (context.vocab->version() < schema::Version::Draft2019_09 || not schema.contains("$id")) {
  322. return nullptr;
  323. }
  324. return context.ref.dynamic_scope(context.where);
  325. }
  326. template <Adapter A> bool Node::resolve_reference(detail::ParserContext<A> const & context) {
  327. auto const schema = context.schema.as_object();
  328. if (schema.contains("$ref")) {
  329. detail::Reference const ref(schema["$ref"].as_string());
  330. reference_ = context.root.resolve(ref, context, false);
  331. return true;
  332. }
  333. // Prior to Draft2019-09, "$ref" was the only way to reference another
  334. // schema (ignoring Draft03's extends keyword, which was more like allOf)
  335. if (context.vocab->version() < Version::Draft2019_09) {
  336. return false;
  337. }
  338. std::string const dyn_ref =
  339. context.vocab->version() > schema::Version::Draft2019_09 ? "$dynamicRef" : "$recursiveRef";
  340. if (schema.contains(dyn_ref)) {
  341. detail::Reference const ref(schema[dyn_ref].as_string());
  342. reference_ = context.root.resolve(ref, context, true);
  343. return true;
  344. }
  345. return false;
  346. }
  347. template <Adapter A> void Node::construct(detail::ParserContext<A> context) {
  348. EXPECT(context.schema.type() == adapter::Type::Object);
  349. auto const schema = context.schema.as_object();
  350. if (schema.contains("$schema")) {
  351. // At any point in the schema, we're allowed to change versions
  352. // This means that we're not version-locked to the latest grammar
  353. // (which is especially important for some breaking changes)
  354. context.vocab = &context.ref.vocab(URI(schema["$schema"].as_string()));
  355. }
  356. auto _ = resolve_anchor(context);
  357. bool const has_reference = resolve_reference(context);
  358. if (schema.contains("default")) {
  359. default_ = schema["default"].freeze();
  360. }
  361. if (schema.contains("description")) {
  362. description_ = schema["description"].as_string();
  363. }
  364. // Prior to Draft 2019-09, reference keywords take precedence over everything
  365. // else (instead of allowing direct extensions).
  366. if (has_reference && context.vocab->version() < Version::Draft2019_09) {
  367. return;
  368. }
  369. for (auto const & [key, subschema] : schema) {
  370. // Using a constraint store allows overriding certain rules, or the creation
  371. // of user-defined extention vocabularies.
  372. if (not context.vocab->is_constraint(key)) {
  373. continue;
  374. }
  375. // A constraint may return null if it is not applicable - but otherwise
  376. // well-formed. For example, before Draft-06 "exclusiveMaximum" was a
  377. // modifier property for "maximum", and not a unique constaint on its own.
  378. // Therefore, we parse it alongside parsing "maximum", and could return
  379. // nullptr when requesting a constraint pointer for "exclusiveMaximum".
  380. auto [constraint, post] = context.vocab->constraint(key, context.child(subschema, key));
  381. if (not constraint) {
  382. continue;
  383. }
  384. if (post) {
  385. post_constraints_.emplace(key, std::move(constraint));
  386. } else {
  387. constraints_.emplace(key, std::move(constraint));
  388. }
  389. }
  390. }
  391. }