vocabulary.h 7.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174
  1. #pragma once
  2. #include <string_view>
  3. #include <unordered_map>
  4. #include <unordered_set>
  5. #include <jvalidate/enum.h>
  6. #include <jvalidate/forward.h>
  7. namespace jvalidate::detail {
  8. template <Adapter A> struct ParserContext;
  9. template <Adapter A> class Vocabulary {
  10. public:
  11. friend class ConstraintFactory<A>;
  12. using pConstraint = std::unique_ptr<constraint::Constraint>;
  13. using MakeConstraint = std::function<pConstraint(ParserContext<A> const &)>;
  14. private:
  15. schema::Version version_;
  16. std::unordered_map<std::string_view, MakeConstraint> make_;
  17. std::unordered_set<std::string_view> permitted_;
  18. std::unordered_set<std::string> vocabularies_;
  19. // TODO(samjaffe): Migrate this back to constraintsfactory
  20. // A list of keywords that participate in scans for "$id" and "$anchor" tokens
  21. // etc. We need to track this because it is possible (though an anti-pattern),
  22. // to embed an $id token in a "const", or in a bogus keyword.
  23. inline static const std::unordered_set<std::string_view> s_keywords{
  24. // Special tokens - we need to scan definitions for sub-ids, the keys of
  25. // these objects are arbitrary, so we need to skip past them in scanning.
  26. "$defs", "definitions",
  27. // Draft03 only - extends allows us to specify an arbitrary number of
  28. // parent schemas that we use on top of the current schema. Equivalent to
  29. // allOf.[*].$ref in Draft04+.
  30. "extends",
  31. // Algorithmic/Conditional Schema types - for annoying reasons, we need
  32. // to be able to scan the subschemas of these for $id tokens and whatnot,
  33. // despite it never being a sensible decision to embed $ids like that.
  34. "allOf", "anyOf", "not", "oneOf", "if", "then", "else",
  35. // Next are the four array-specific schema keywords
  36. "items", "prefixItems", "additionalItems", "unevaluatedItems",
  37. // And the six object-specific schema keywords. With the exception of
  38. // additionalProperties and unevaluatedProperties, all of these represent
  39. // objects mapping "arbitrary" keys onto schemas, so we need to signal
  40. // that...
  41. "dependencies", "dependentSchemas", "patternProperties", "properties", "additionalProperties",
  42. "unevaluatedProperties"};
  43. // ...using this property_keywords_ object, we list those six keywords that
  44. // are represented as an object of arbitrary keys onto schemas that may
  45. // contain $id/$anchor fields.
  46. inline static const std::unordered_set<std::string_view> s_property_keywords{
  47. "$defs", "definitions", "dependencies", "dependentSchemas", "patternProperties",
  48. "properties"};
  49. // Special rules must be applied for post constraints, of which there are
  50. // currently two. Current discussion says that "constraints" and
  51. // "post-contraints" SHOULD be run as two separate phases (since posts need
  52. // to know which items/properties would be processed, and perhaps other things
  53. // in the future), but that there is no rule on order-of-operations within
  54. // a given phase, nor is there any intention to introduce some kind of Phase 3
  55. inline static const std::unordered_set<std::string_view> s_post_constraints{
  56. "unevaluatedItems", "unevaluatedProperties"};
  57. public:
  58. Vocabulary() = default;
  59. Vocabulary(schema::Version version, std::unordered_map<std::string_view, MakeConstraint> make)
  60. : version_(version), make_(std::move(make)) {
  61. for (auto const & [keyword, _] : make_) {
  62. permitted_.emplace(keyword);
  63. }
  64. }
  65. /**
  66. * @brief Reset the list of keywords that Vocabulary actually respects
  67. *
  68. * @param permitted_keywords The selection of keywords to allow for
  69. * searches/constraint building. Note that a constraint might be
  70. * registered to a null function for compatibility with this.
  71. *
  72. * @param vocabularies An optional selection of vocabulary schemas, used
  73. * as metadata, and deducing {@see is_format_assertion}.
  74. */
  75. void restrict(std::unordered_set<std::string> const & permitted_keywords,
  76. std::unordered_set<std::string> const & vocabularies = {}) & {
  77. permitted_.clear();
  78. vocabularies_ = vocabularies;
  79. for (auto const & [keyword, _] : make_) {
  80. // We only file permitted_keywords into this Vocabulary if we have defined
  81. // bindings for that keyword
  82. if (permitted_keywords.contains(std::string(keyword))) {
  83. permitted_.insert(keyword);
  84. }
  85. }
  86. }
  87. schema::Version version() const { return version_; }
  88. bool is_format_assertion() const {
  89. // In Draft07 and prior - format assertions were considered enabled by
  90. // default. This is - of course - problematic because very few
  91. // implementations actually had full support for format constraints.
  92. if (version_ < schema::Version::Draft2019_09) {
  93. return true;
  94. }
  95. // Some implementations wouldn't even bother with format constraints, and
  96. // others would provide implementations that either missed a number of edge
  97. // cases or were flat-out wrong on certail matters.
  98. // Therefore - starting in Draft 2019-09, the format keyword is an
  99. // annotation by default, instead of an assertion.
  100. if (version_ == schema::Version::Draft2019_09) {
  101. return vocabularies_.contains("/vocab/format");
  102. }
  103. // Draft 2020-12 makes this even more explicit - having separate vocabulary
  104. // documents for "format as assertion" and "format as annotation". Allowing
  105. // validators to add format constraints that are only used for annotating
  106. // results.
  107. return vocabularies_.contains("/vocab/format-assertion");
  108. }
  109. /**
  110. * @brief Is the given "key"word actually a keyword? As in, would
  111. * I expect to resolve a constraint out of it. This is a slightly more
  112. * lenient version of {@see is_constraint} - since it allows keywords that
  113. * have a null factory, as long as they've been registered (e.g. then/else).
  114. *
  115. * @param word The "key"word being looked up (e.g. "if", "properties", ...)
  116. */
  117. bool is_keyword(std::string_view word) const {
  118. return permitted_.contains(word) && make_.contains(word) && s_keywords.contains(word);
  119. }
  120. /**
  121. * @brief Does the given "key"word represent a property object - that is to
  122. * say, an object containing some number of schemas mapped by arbitrary keys
  123. *
  124. * @param word The "key"word being looked up (e.g. "if", "properties", ...)
  125. */
  126. bool is_property_keyword(std::string_view word) const {
  127. return is_keyword(word) && s_property_keywords.contains(word);
  128. }
  129. /**
  130. * @brief Is the given word a real constraint in the Vocabulary. In essence,
  131. * it must be an enabled keyword AND it must have a non-null factory function.
  132. *
  133. * @param word The "key"word being looked up (e.g. "if", "properties", ...)
  134. */
  135. bool is_constraint(std::string_view word) const {
  136. return permitted_.contains(word) && make_.contains(word) && make_.at(word);
  137. }
  138. /**
  139. * @brief Fabricate the given constraint if real from the current context
  140. *
  141. * @param word The "key"word being looked up (e.g. "if", "properties", ...)
  142. *
  143. * @param context The current context of schema parsing, used for re-entrancy.
  144. *
  145. * @returns A pair whose first element is either a pointer to a constraint
  146. * (if word represents a supported constraint AND the constraint resolves to
  147. * something meaningful), else null.
  148. *
  149. * The second element is a boolean indicating if the constraint needs to be
  150. * evaluted after other constraints to use their tracking/annotations.
  151. * See the above comments on s_post_constraints for more info.
  152. */
  153. auto constraint(std::string_view word, ParserContext<A> const & context) const {
  154. return std::make_pair(is_constraint(word) ? make_.at(word)(context) : nullptr,
  155. s_post_constraints.contains(word));
  156. }
  157. };
  158. }