schema.h 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491
  1. #pragma once
  2. #include <memory>
  3. #include <type_traits>
  4. #include <unordered_map>
  5. #include <vector>
  6. #include <jvalidate/adapter.h>
  7. #include <jvalidate/constraint.h>
  8. #include <jvalidate/detail/anchor.h>
  9. #include <jvalidate/detail/expect.h>
  10. #include <jvalidate/detail/on_block_exit.h>
  11. #include <jvalidate/detail/parser_context.h>
  12. #include <jvalidate/detail/pointer.h>
  13. #include <jvalidate/detail/reference.h>
  14. #include <jvalidate/document_cache.h>
  15. #include <jvalidate/enum.h>
  16. #include <jvalidate/forward.h>
  17. namespace jvalidate::schema {
  18. class Node {
  19. private:
  20. std::string description_;
  21. std::unique_ptr<adapter::Const const> default_{nullptr};
  22. detail::Reference uri_;
  23. std::optional<std::string> rejects_all_;
  24. std::optional<schema::Node const *> reference_{};
  25. std::unordered_map<std::string, std::unique_ptr<constraint::Constraint>> constraints_{};
  26. std::unordered_map<std::string, std::unique_ptr<constraint::Constraint>> post_constraints_{};
  27. protected:
  28. static Version schema_version(std::string_view url);
  29. static Version schema_version(Adapter auto const & json);
  30. static Version schema_version(Adapter auto const & json, Version default_version);
  31. public:
  32. Node() = default;
  33. Node(std::string const & rejection_reason) : rejects_all_(rejection_reason) {}
  34. template <Adapter A> void construct(detail::ParserContext<A> context);
  35. bool is_pure_reference() const {
  36. return reference_ && constraints_.empty() && post_constraints_.empty() && not default_;
  37. }
  38. std::optional<std::string> const & rejects_all() const { return rejects_all_; }
  39. std::optional<schema::Node const *> reference_schema() const { return reference_; }
  40. bool requires_result_context() const { return not post_constraints_.empty(); }
  41. auto const & constraints() const { return constraints_; }
  42. auto const & post_constraints() const { return constraints_; }
  43. adapter::Const const * default_value() const { return default_.get(); }
  44. private:
  45. template <Adapter A> detail::OnBlockExit resolve_anchor(detail::ParserContext<A> & context);
  46. template <Adapter A> bool resolve_reference(detail::ParserContext<A> const & context);
  47. };
  48. inline Version Node::schema_version(std::string_view url) {
  49. static std::map<std::string_view, Version> const g_schema_ids{
  50. {"json-schema.org/draft-04/schema", Version::Draft04},
  51. {"json-schema.org/draft-06/schema", Version::Draft06},
  52. {"json-schema.org/draft-07/schema", Version::Draft07},
  53. {"json-schema.org/draft/2019-09/schema", Version::Draft2019_09},
  54. {"json-schema.org/draft/2020-12/schema", Version::Draft2020_12},
  55. };
  56. if (url.ends_with('#')) {
  57. url.remove_suffix(1);
  58. }
  59. if (url.starts_with("http://") || url.starts_with("https://")) {
  60. url.remove_prefix(url.find(':') + 3);
  61. }
  62. auto it = g_schema_ids.find(url);
  63. EXPECT_T(it != g_schema_ids.end(), std::invalid_argument, url);
  64. return it->second;
  65. }
  66. Version Node::schema_version(Adapter auto const & json) {
  67. EXPECT(json.type() == adapter::Type::Object);
  68. EXPECT(json.as_object().contains("$schema"));
  69. auto const & schema = json.as_object()["$schema"];
  70. EXPECT(schema.type() == adapter::Type::String);
  71. return schema_version(schema.as_string());
  72. }
  73. Version Node::schema_version(Adapter auto const & json, Version default_version) {
  74. RETURN_UNLESS(json.type() == adapter::Type::Object, default_version);
  75. RETURN_UNLESS(json.as_object().contains("$schema"), default_version);
  76. auto const & schema = json.as_object()["$schema"];
  77. RETURN_UNLESS(schema.type() == adapter::Type::String, default_version);
  78. return schema_version(schema.as_string());
  79. }
  80. }
  81. namespace jvalidate {
  82. class Schema : public schema::Node {
  83. private:
  84. friend class schema::Node;
  85. template <Adapter A> friend class detail::ParserContext;
  86. struct DynamicRef {
  87. template <typename F>
  88. DynamicRef(detail::Reference const & where, F const & reconstruct)
  89. : where(where), reconstruct(reconstruct) {}
  90. detail::Reference where;
  91. std::function<schema::Node const *()> reconstruct;
  92. };
  93. private:
  94. schema::Node accept_;
  95. schema::Node reject_{"always false"};
  96. // A map of (URI, Anchor) => (URI, Pointer), binding an anchor reference
  97. // to it's fully resolved path.
  98. std::map<detail::Reference, detail::Reference> anchors_;
  99. // A map of anchors to DynamicRef info - note that DynamicRef.reconstruct is
  100. // an unsafe object, because it holds an object which may hold references to
  101. // temporary objects.
  102. // Nothing should be added to this object except through calling
  103. // {@see Node::resolve_anchor}, which returns a scope(exit) construct that
  104. // cleans up the element.
  105. std::map<detail::Anchor, DynamicRef> dynamic_anchors_;
  106. // An owning cache of all created schemas. Avoids storing duplicates such as
  107. // the "always-true" schema, "always-false" schema, and schemas whose only
  108. // meaningful field is "$ref", "$recursiveRef", or "$dynamicRef".
  109. std::map<detail::Reference, schema::Node> cache_;
  110. // A non-owning cache of all schemas, including duplcates where multiple
  111. // References map to the same underlying schema.
  112. std::map<detail::Reference, schema::Node const *> alias_cache_;
  113. public:
  114. /**
  115. * @brief Construct a new schema. All other constructors of this type may be
  116. * considered syntactic sugar for this constructor.
  117. *
  118. * As such, the true signature of this class's contructor is:
  119. *
  120. * Schema(Adapter| JSON
  121. * [, schema::Version]
  122. * [, URIResolver<A> | DocumentCache<A> &]
  123. * [, ConstraintFactory<A> const &])
  124. *
  125. * as long as the order of arguments is preserved - the constructor will work
  126. * no matter which arguments are ignored. The only required argument being
  127. * the JSON object/Adapter.
  128. *
  129. * @param json An adapter to a json object
  130. *
  131. * @param version The json-schema draft version that all schemas will prefer
  132. *
  133. * @param external An object capable of resolving URIs, and turning them into
  134. * Adapter objects. Holds a cache and so must be mutable.
  135. *
  136. * @param factory An object that manuafactures constraints - allows the user
  137. * to provide custom extensions or even modify the behavior of existing
  138. * keywords by overridding the virtual accessor function(s).
  139. */
  140. template <Adapter A>
  141. Schema(A const & json, schema::Version version, DocumentCache<A> & external,
  142. ConstraintFactory<A> const & factory = {}) {
  143. // Prevent unintialized data caches
  144. if (version >= schema::Version::Draft06 && json.type() == adapter::Type::Boolean) {
  145. schema::Node::operator=(std::move(json.as_boolean() ? accept_ : reject_));
  146. return;
  147. }
  148. external.cache_reference(URI(), json);
  149. detail::ParserContext<A> root{*this, json, version, factory, external};
  150. construct(root);
  151. }
  152. /**
  153. * @param json An adapter to a json schema
  154. *
  155. * @param version The json-schema draft version that all schemas will prefer
  156. *
  157. * @param external An object capable of resolving URIs, and turning them into
  158. * Adapter objects. Holds a cache and so must be mutable. If this constructor
  159. * is called, then it means that the cache is a one-off object, and will not
  160. * be reused.
  161. */
  162. template <Adapter A, typename... Args>
  163. Schema(A const & json, schema::Version version, DocumentCache<A> && external, Args &&... args)
  164. : Schema(json, version, external, std::forward<Args>(args)...) {}
  165. /**
  166. * @param json An adapter to a json schema
  167. *
  168. * @param version The json-schema draft version that all schemas will prefer
  169. *
  170. * @param resolve A function capable of resolving URIs, and storing the
  171. * contents in a provided concrete JSON object.
  172. */
  173. template <Adapter A, typename... Args>
  174. Schema(A const & json, schema::Version version, URIResolver<A> resolve, Args &&... args)
  175. : Schema(json, version, DocumentCache<A>(resolve), std::forward<Args>(args)...) {}
  176. /**
  177. * @param json An adapter to a json schema
  178. *
  179. * @param version The json-schema draft version that all schemas will prefer
  180. */
  181. template <Adapter A, Not<DocumentCache<A>>... Args>
  182. Schema(A const & json, schema::Version version, Args &&... args)
  183. : Schema(json, version, DocumentCache<A>(), std::forward<Args>(args)...) {}
  184. /**
  185. * @param json An adapter to a json schema
  186. */
  187. template <Adapter A, Not<schema::Version>... Args>
  188. explicit Schema(A const & json, Args &&... args)
  189. : Schema(json, schema_version(json), std::forward<Args>(args)...) {}
  190. /**
  191. * @param json Any non-adapter (JSON) object. Will be immedately converted
  192. * into an Adapter object to allow us to walk through it w/o specialization.
  193. */
  194. template <typename JSON, typename... Args>
  195. explicit Schema(JSON const & json, Args &&... args)
  196. : Schema(adapter::AdapterFor<JSON const>(json), std::forward<Args>(args)...) {}
  197. private:
  198. /**
  199. * @brief Associate an anchor with its absolute path
  200. * @pre We should not already have an anchor associated with this anchor
  201. *
  202. * @param anchor A URI-Reference containing only a URI and Anchor
  203. * @param from A URI-Reference representing the absolute path to this Anchor
  204. */
  205. void anchor(detail::Reference const & anchor, detail::Reference const & from) {
  206. EXPECT_M(anchors_.try_emplace(anchor.root(), from).second,
  207. "more than one anchor found for uri " << anchor);
  208. }
  209. template <Adapter A>
  210. void dynamic_anchor(detail::Anchor const & anchor, detail::ParserContext<A> const & context) {
  211. dynamic_anchors_.try_emplace(anchor, context.where,
  212. [this, context]() { return fetch_schema(context); });
  213. }
  214. void remove_dynamic_anchor(detail::Anchor const & anchor, detail::Reference const & where) {
  215. if (auto it = dynamic_anchors_.find(anchor);
  216. it != dynamic_anchors_.end() && it->second.where == where) {
  217. dynamic_anchors_.erase(it);
  218. }
  219. }
  220. schema::Node const * alias(detail::Reference const & where, schema::Node const * schema) {
  221. EXPECT_M(alias_cache_.try_emplace(where, schema).second,
  222. "more than one schema found with uri " << where);
  223. return schema;
  224. }
  225. std::optional<schema::Node const *> from_cache(detail::Reference ref) {
  226. if (auto it = anchors_.find(ref.root()); it != anchors_.end()) {
  227. ref = it->second / ref.pointer();
  228. }
  229. if (auto it = alias_cache_.find(ref); it != alias_cache_.end()) {
  230. return it->second;
  231. }
  232. return std::nullopt;
  233. }
  234. template <Adapter A>
  235. schema::Node const * resolve(detail::Reference ref, detail::ParserContext<A> const & context) {
  236. // Special case if the root-level document does not have an $id property
  237. if (ref == detail::Reference() && context.where.uri().empty()) {
  238. return this;
  239. }
  240. if (ref.uri().empty()) {
  241. ref = detail::Reference(context.where.uri(), ref.anchor(), ref.pointer());
  242. }
  243. if (std::optional cached = from_cache(ref)) {
  244. return *cached;
  245. }
  246. // SPECIAL RULE: Resolve this URI into the context of the calling URI
  247. if (not ref.uri().empty() && ref.uri().scheme().empty()) {
  248. URI const & relative_to = context.where.uri();
  249. EXPECT_M(relative_to.resource().rfind('/') != std::string::npos,
  250. "Unable to deduce root for relative uri " << ref.uri() << " (" << relative_to
  251. << ")");
  252. ref = detail::Reference(relative_to.parent() / ref.uri(), ref.anchor(), ref.pointer());
  253. }
  254. std::optional schema = context.external.try_load(ref);
  255. if (not schema.has_value()) {
  256. std::string error = "URIResolver could not resolve " + std::string(ref.uri());
  257. return alias(ref, &cache_.try_emplace(ref, error).first->second);
  258. }
  259. return fetch_schema(context.rebind(*schema, ref));
  260. }
  261. schema::Node const * resolve_dynamic(detail::Anchor const & ref) {
  262. auto it = dynamic_anchors_.find(ref);
  263. EXPECT_M(it != dynamic_anchors_.end(), "Unmatched $dynamicRef '" << ref << "'");
  264. return it->second.reconstruct();
  265. }
  266. template <Adapter A> schema::Node const * fetch_schema(detail::ParserContext<A> const & context) {
  267. // TODO(samjaffe): No longer promises uniqueness - instead track unique URI's
  268. if (std::optional cached = from_cache(context.where)) {
  269. return *cached;
  270. }
  271. adapter::Type const type = context.schema.type();
  272. if (type == adapter::Type::Boolean && context.version >= schema::Version::Draft06) {
  273. return alias(context.where, context.schema.as_boolean() ? &accept_ : &reject_);
  274. }
  275. EXPECT_M(type == adapter::Type::Object, "invalid schema at " << context.where);
  276. if (context.schema.object_size() == 0) {
  277. return alias(context.where, &accept_);
  278. }
  279. auto [it, created] = cache_.try_emplace(context.where);
  280. EXPECT_M(created, "creating duplicate schema at... " << context.where);
  281. // Do this here first in order to protect from infinite loops
  282. alias(context.where, &it->second);
  283. it->second.construct(context);
  284. if (not it->second.is_pure_reference()) {
  285. return &it->second;
  286. }
  287. // Special Case - if the only is the reference constraint, then we don't need
  288. // to store it uniquely. Draft2019_09 supports directly extending a $ref schema
  289. // in the same schema, instead of requiring an allOf clause.
  290. schema::Node const * node = *it->second.reference_schema();
  291. cache_.erase(it);
  292. return alias_cache_[context.where] = node;
  293. }
  294. };
  295. }
  296. namespace jvalidate::detail {
  297. template <Adapter A> schema::Node const * ParserContext<A>::node() const {
  298. return root.fetch_schema(*this);
  299. }
  300. template <Adapter A> schema::Node const * ParserContext<A>::always() const {
  301. return fixed_schema(schema.as_boolean());
  302. }
  303. template <Adapter A> schema::Node const * ParserContext<A>::fixed_schema(bool accept) const {
  304. return accept ? &root.accept_ : &root.reject_;
  305. }
  306. }
  307. namespace jvalidate::schema {
  308. template <Adapter A> detail::OnBlockExit Node::resolve_anchor(detail::ParserContext<A> & context) {
  309. auto const schema = context.schema.as_object();
  310. if (schema.contains("$anchor")) {
  311. // Create an anchor mapping using the current document and the anchor
  312. // string. There's no need for special validation/chaining here, because
  313. // {@see Schema::resolve} will turn all $ref/$dynamicRef anchors into
  314. // their fully-qualified path.
  315. detail::Anchor anchor(schema["$anchor"].as_string());
  316. context.root.anchor(detail::Reference(context.where.uri(), anchor), context.where);
  317. return nullptr;
  318. }
  319. if (context.version == Version::Draft2019_09 && schema.contains("$recursiveAnchor")) {
  320. EXPECT_M(schema["$recursiveAnchor"].as_boolean(), "$recursiveAnchor MUST be 'true'");
  321. context.root.dynamic_anchor(detail::Anchor(), context);
  322. return [&context]() { context.root.remove_dynamic_anchor(detail::Anchor(), context.where); };
  323. }
  324. if (context.version > Version::Draft2019_09 && schema.contains("$dynamicAnchor")) {
  325. detail::Anchor anchor(schema["$dynamicAnchor"].as_string());
  326. context.root.dynamic_anchor(anchor, context);
  327. return [&context, anchor]() { context.root.remove_dynamic_anchor(anchor, context.where); };
  328. }
  329. return nullptr;
  330. }
  331. template <Adapter A> bool Node::resolve_reference(detail::ParserContext<A> const & context) {
  332. auto const schema = context.schema.as_object();
  333. if (schema.contains("$ref")) {
  334. detail::Reference ref(schema["$ref"].as_string());
  335. reference_ = context.root.resolve(ref, context);
  336. return true;
  337. }
  338. if (context.version < Version::Draft2019_09) {
  339. return false;
  340. }
  341. if (context.version == Version::Draft2019_09 && schema.contains("$recursiveRef")) {
  342. detail::Reference ref(schema["$recursiveRef"].as_string());
  343. EXPECT_M(ref == detail::Reference(), "Only the root schema is permitted as a $recursiveRef");
  344. reference_ = context.root.resolve_dynamic(detail::Anchor());
  345. return true;
  346. }
  347. if (context.version > Version::Draft2019_09 && schema.contains("$dynamicRef")) {
  348. detail::Reference ref(schema["$dynamicRef"].as_string());
  349. reference_ = context.root.resolve_dynamic(ref.anchor());
  350. return true;
  351. }
  352. return false;
  353. }
  354. template <Adapter A> void Node::construct(detail::ParserContext<A> context) {
  355. EXPECT(context.schema.type() == adapter::Type::Object);
  356. auto const schema = context.schema.as_object();
  357. if (schema.contains("$schema")) {
  358. // At any point in the schema, we're allowed to change versions
  359. // This means that we're not version-locked to the latest grammar
  360. // (which is especially important for some breaking changes)
  361. context.version = schema_version(context.schema);
  362. }
  363. if (schema.contains("$id")) {
  364. detail::Reference id(schema["$id"].as_string(), false);
  365. if (id.uri().scheme().empty() and not context.where.uri().empty()) {
  366. id = detail::Reference(context.where.uri().parent() / id.uri(), {}, id.pointer());
  367. }
  368. if (id != context.where) {
  369. context.external.cache_reference(id.uri(), context.schema);
  370. context.root.alias(context.where = id, this);
  371. }
  372. }
  373. [[maybe_unused]] auto _ = resolve_anchor(context);
  374. bool const has_reference = resolve_reference(context);
  375. if (schema.contains("default")) {
  376. default_ = schema["default"].freeze();
  377. }
  378. if (schema.contains("description")) {
  379. description_ = schema["description"].as_string();
  380. }
  381. // Prior to Draft 2019-09, reference keywords take precedence over everything
  382. // else (instead of allowing direct extensions).
  383. if (has_reference && context.version < Version::Draft2019_09) {
  384. return;
  385. }
  386. for (auto const & [key, subschema] : schema) {
  387. // Using a constraint store allows overriding certain rules, or the creation
  388. // of user-defined extention vocabularies.
  389. auto make_constraint = context.factory(key, context.version);
  390. if (not make_constraint) {
  391. continue;
  392. }
  393. // A constraint may return null if it is not applicable - but otherwise
  394. // well-formed. For example, before Draft-06 "exclusiveMaximum" was a
  395. // modifier property for "maximum", and not a unique constaint on its own.
  396. // Therefore, we parse it alongside parsing "maximum", and could return
  397. // nullptr when requesting a constraint pointer for "exclusiveMaximum".
  398. auto constraint = make_constraint(context.child(subschema, key));
  399. if (not constraint) {
  400. continue;
  401. }
  402. if (context.factory.is_post_constraint(key)) {
  403. post_constraints_.emplace(key, std::move(constraint));
  404. } else {
  405. constraints_.emplace(key, std::move(constraint));
  406. }
  407. }
  408. }
  409. }