Browse Source

refactor: move the keyword validation characteristics into ConstraintFactory

Sam Jaffe 3 tháng trước cách đây
mục cha
commit
b60b58c783

+ 39 - 54
include/jvalidate/constraint.h

@@ -20,6 +20,7 @@
 #include <jvalidate/detail/vocabulary.h>
 #include <jvalidate/enum.h>
 #include <jvalidate/forward.h>
+#include <jvalidate/vocabulary.h>
 
 namespace jvalidate {
 /**
@@ -42,32 +43,12 @@ namespace jvalidate {
 template <Adapter A> class ConstraintFactory {
 public:
   using pConstraint = std::unique_ptr<constraint::Constraint>;
-  // ParserContext<A> => pConstraint
-  using MakeConstraint = typename detail::Vocabulary<A>::MakeConstraint;
-
-  using Object = decltype(std::declval<A>().as_object());
-  enum KeywordType { Keyword, Removed };
-
-  /**
-   * @brief This type represents a union of three different "parsing handler"
-   * states:
-   * 1) "Removed" - this schema token is not a keyword, and does not generate
-   *    a constraint.
-   * 2) "Keyword" - this schema token represents a keyword, but does not
-   *    have a constraint associated with it. For example: "$defs" or "$comment"
-   * 3) "Make"    - this schema token is a keyword with an associated constant
-   *    generating function.
-   */
-  struct Make {
-    Make(KeywordType t) : is_keyword(t == Keyword) {}
-    template <typename F> Make(F make) : make(make), is_keyword(true) {}
-
-    explicit operator bool() const { return make || is_keyword; }
-    operator MakeConstraint() const { return make; }
-
-    MakeConstraint make = nullptr;
-    bool is_keyword = false;
-  };
+  using DependentKeyword = vocabulary::DependentKeyword;
+  static constexpr auto Removed = vocabulary::Removed;
+  static constexpr auto Literal = vocabulary::Literal;
+  static constexpr auto Keyword = vocabulary::Keyword;
+  static constexpr auto KeywordMap = vocabulary::KeywordMap;
+  static constexpr auto PostConstraint = vocabulary::PostConstraint;
 
   /**
    * @brief In order to support multiple schema versions in a single instance of
@@ -101,11 +82,13 @@ public:
    *              {schema::Version::Draft2020_12, &Self::additionalItems}}}
    */
   struct Versioned {
-    template <typename M> Versioned(M make) : data{{schema::Version::Earliest, make}} {}
-    template <typename M> Versioned(schema::Version version, M make) : data{{version, make}} {}
-    Versioned(std::initializer_list<std::pair<schema::Version const, Make>> init) : data(init) {}
+    template <typename M = vocabulary::Metadata<A>>
+    Versioned(M make) : data{{schema::Version::Earliest, make}} {}
+    Versioned(schema::Version version, vocabulary::Metadata<A> make) : data{{version, make}} {}
+    Versioned(std::initializer_list<std::pair<schema::Version const, vocabulary::Metadata<A>>> init)
+        : data(init) {}
 
-    std::map<schema::Version, Make, std::greater<>> data;
+    std::map<schema::Version, vocabulary::Metadata<A>, std::greater<>> data;
   };
   using Store = std::unordered_map<std::string_view, Versioned>;
 
@@ -114,34 +97,34 @@ private:
 
 private:
   std::unordered_map<std::string_view, Versioned> constraints_{
-      {"$defs", {schema::Version::Draft2019_09, Keyword}},
+      {"$defs", {schema::Version::Draft2019_09, KeywordMap}},
       {"additionalItems",
-       {{schema::Version::Earliest, &Self::additionalItems},
+       {{schema::Version::Earliest, {&Self::additionalItems, Keyword}},
         {schema::Version::Draft2020_12, Removed}}},
-      {"additionalProperties", &Self::additionalProperties},
-      {"allOf", {schema::Version::Draft04, &Self::allOf}},
-      {"anyOf", {schema::Version::Draft04, &Self::anyOf}},
+      {"additionalProperties", {{&Self::additionalProperties, Keyword}}},
+      {"allOf", {schema::Version::Draft04, {&Self::allOf, Keyword}}},
+      {"anyOf", {schema::Version::Draft04, {&Self::anyOf, Keyword}}},
       {"const", {schema::Version::Draft06, &Self::isConstant}},
       {"contains", {schema::Version::Draft06, &Self::contains}},
-      {"definitions", Keyword},
-      {"dependencies", &Self::dependencies},
+      {"definitions", KeywordMap},
+      {"dependencies", {{&Self::dependencies, KeywordMap}}},
       {"dependentRequired", {schema::Version::Draft2019_09, &Self::dependentRequired}},
-      {"dependentSchemas", {schema::Version::Draft2019_09, &Self::dependentSchemas}},
+      {"dependentSchemas", {schema::Version::Draft2019_09, {&Self::dependentSchemas, KeywordMap}}},
       {"disallow",
        {{schema::Version::Earliest, &Self::disallowDraft3}, {schema::Version::Draft04, Removed}}},
       {"divisibleBy",
        {{schema::Version::Earliest, &Self::multipleOf}, {schema::Version::Draft04, Removed}}},
-      {"else", {{schema::Version::Draft07, Keyword}}},
+      {"else", {{schema::Version::Draft07, DependentKeyword{"if"}}}},
       {"enum", &Self::isInEnumuration},
       {"exclusiveMaximum", {schema::Version::Draft06, &Self::exclusiveMaximum}},
       {"exclusiveMinimum", {schema::Version::Draft06, &Self::exclusiveMinimum}},
       {"extends",
        {{schema::Version::Earliest, &Self::extendsDraft3}, {schema::Version::Draft04, Removed}}},
       {"format", &Self::format},
-      {"if", {schema::Version::Draft07, &Self::ifThenElse}},
+      {"if", {schema::Version::Draft07, {&Self::ifThenElse, Keyword}}},
       {"items",
-       {{schema::Version::Earliest, &Self::itemsTupleOrVector},
-        {schema::Version::Draft2020_12, &Self::additionalItems}}},
+       {{schema::Version::Earliest, {&Self::itemsTupleOrVector, Keyword}},
+        {schema::Version::Draft2020_12, {&Self::additionalItems, Keyword}}}},
       {"maxItems", &Self::maxItems},
       {"maxLength", &Self::maxLength},
       {"maxProperties", {schema::Version::Draft04, &Self::maxProperties}},
@@ -151,21 +134,23 @@ private:
       {"minProperties", {schema::Version::Draft04, &Self::minProperties}},
       {"minimum", &Self::minimum},
       {"multipleOf", {schema::Version::Draft04, &Self::multipleOf}},
-      {"not", {schema::Version::Draft04, &Self::isNot}},
-      {"oneOf", {schema::Version::Draft04, &Self::oneOf}},
+      {"not", {schema::Version::Draft04, {&Self::isNot, Keyword}}},
+      {"oneOf", {schema::Version::Draft04, {&Self::oneOf, Keyword}}},
       {"pattern", &Self::pattern},
-      {"patternProperties", &Self::patternProperties},
-      {"prefixItems", {schema::Version::Draft2020_12, &Self::prefixItems}},
+      {"patternProperties", {{&Self::patternProperties, KeywordMap}}},
+      {"prefixItems", {schema::Version::Draft2020_12, {&Self::prefixItems, Keyword}}},
       {"properties",
-       {{schema::Version::Earliest, &Self::propertiesDraft3},
-        {schema::Version::Draft04, &Self::properties}}},
+       {{schema::Version::Earliest, {&Self::propertiesDraft3, KeywordMap}},
+        {schema::Version::Draft04, {&Self::properties, KeywordMap}}}},
       {"propertyNames", {schema::Version::Draft06, &Self::propertyNames}},
       {"required", {schema::Version::Draft04, &Self::required}},
-      {"then", {schema::Version::Draft07, Keyword}},
+      {"then", {schema::Version::Draft07, DependentKeyword{"if"}}},
       {"type",
        {{schema::Version::Earliest, &Self::typeDraft3}, {schema::Version::Draft04, &Self::type}}},
-      {"unevaluatedItems", {schema::Version::Draft2019_09, &Self::unevaluatedItems}},
-      {"unevaluatedProperties", {schema::Version::Draft2019_09, &Self::unevaluatedProperties}},
+      {"unevaluatedItems",
+       {schema::Version::Draft2019_09, {&Self::unevaluatedItems, PostConstraint}}},
+      {"unevaluatedProperties",
+       {schema::Version::Draft2019_09, {&Self::unevaluatedProperties, PostConstraint}}},
       {"uniqueItems", &Self::uniqueItems},
   };
 
@@ -219,7 +204,7 @@ public:
   }
 
   detail::Vocabulary<A> keywords(schema::Version version) const {
-    std::unordered_map<std::string_view, MakeConstraint> rval;
+    std::unordered_map<std::string_view, vocabulary::Metadata<A>> rval;
     for (auto const & [key, versions] : constraints_) {
       if (auto it = versions.data.lower_bound(version); it != versions.data.end() && it->second) {
         rval.emplace(key, it->second);
@@ -868,7 +853,7 @@ public:
     std::string const prefix =
         context.vocab->version() >= schema::Version::Draft2020_12 ? "prefixItems" : "items";
 
-    Object const & parent = *context.parent;
+    auto const & parent = *context.parent;
     // Before Draft 2020-12, the "items" could be either a subschema or a tuple.
     // When not provided, we therefore treat it as an "accept-all" schema, and
     // thus will never have additionalItems to process. Similarly - if it is an
@@ -1174,7 +1159,7 @@ public:
     std::unordered_set<std::string> properties;
     std::vector<std::string> patterns;
 
-    Object const & parent = *context.parent;
+    auto const & parent = *context.parent;
     if (parent.contains("properties")) {
       for (auto [key, _] : parent["properties"].as_object()) {
         properties.insert(key);

+ 20 - 59
include/jvalidate/detail/vocabulary.h

@@ -6,66 +6,24 @@
 
 #include <jvalidate/enum.h>
 #include <jvalidate/forward.h>
+#include <jvalidate/vocabulary.h>
 
 namespace jvalidate::detail {
 template <Adapter A> struct ParserContext;
-template <Adapter A> class Vocabulary {
-public:
-  friend class ConstraintFactory<A>;
-  using pConstraint = std::unique_ptr<constraint::Constraint>;
-  using MakeConstraint = std::function<pConstraint(ParserContext<A> const &)>;
 
+template <Adapter A> class Vocabulary {
 private:
   schema::Version version_;
-  std::unordered_map<std::string_view, MakeConstraint> make_;
+  std::unordered_map<std::string_view, vocabulary::Metadata<A>> metadata_;
   std::unordered_set<std::string_view> permitted_;
   std::unordered_set<std::string> vocabularies_;
 
-  // TODO(samjaffe): Migrate this back to constraintsfactory
-  // A list of keywords that participate in scans for "$id" and "$anchor" tokens
-  // etc. We need to track this because it is possible (though an anti-pattern),
-  // to embed an $id token in a "const", or in a bogus keyword.
-  inline static const std::unordered_set<std::string_view> s_keywords{
-      // Special tokens - we need to scan definitions for sub-ids, the keys of
-      // these objects are arbitrary, so we need to skip past them in scanning.
-      "$defs", "definitions",
-      // Draft03 only - extends allows us to specify an arbitrary number of
-      // parent schemas that we use on top of the current schema. Equivalent to
-      // allOf.[*].$ref in Draft04+.
-      "extends",
-      // Algorithmic/Conditional Schema types - for annoying reasons, we need
-      // to be able to scan the subschemas of these for $id tokens and whatnot,
-      // despite it never being a sensible decision to embed $ids like that.
-      "allOf", "anyOf", "not", "oneOf", "if", "then", "else",
-      // Next are the four array-specific schema keywords
-      "items", "prefixItems", "additionalItems", "unevaluatedItems",
-      // And the six object-specific schema keywords. With the exception of
-      // additionalProperties and unevaluatedProperties, all of these represent
-      // objects mapping "arbitrary" keys onto schemas, so we need to signal
-      // that...
-      "dependencies", "dependentSchemas", "patternProperties", "properties", "additionalProperties",
-      "unevaluatedProperties"};
-  // ...using this property_keywords_ object, we list those six keywords that
-  // are represented as an object of arbitrary keys onto schemas that may
-  // contain $id/$anchor fields.
-  inline static const std::unordered_set<std::string_view> s_property_keywords{
-      "$defs",     "definitions", "dependencies", "dependentSchemas", "patternProperties",
-      "properties"};
-
-  // Special rules must be applied for post constraints, of which there are
-  // currently two. Current discussion says that "constraints" and
-  // "post-contraints" SHOULD be run as two separate phases (since posts need
-  // to know which items/properties would be processed, and perhaps other things
-  // in the future), but that there is no rule on order-of-operations within
-  // a given phase, nor is there any intention to introduce some kind of Phase 3
-  inline static const std::unordered_set<std::string_view> s_post_constraints{
-      "unevaluatedItems", "unevaluatedProperties"};
-
 public:
   Vocabulary() = default;
-  Vocabulary(schema::Version version, std::unordered_map<std::string_view, MakeConstraint> make)
-      : version_(version), make_(std::move(make)) {
-    for (auto const & [keyword, _] : make_) {
+  Vocabulary(schema::Version version,
+             std::unordered_map<std::string_view, vocabulary::Metadata<A>> metadata)
+      : version_(version), metadata_(std::move(metadata)) {
+    for (auto const & [keyword, _] : metadata_) {
       permitted_.emplace(keyword);
     }
   }
@@ -84,7 +42,7 @@ public:
                 std::unordered_set<std::string> const & vocabularies = {}) & {
     permitted_.clear();
     vocabularies_ = vocabularies;
-    for (auto const & [keyword, _] : make_) {
+    for (auto const & [keyword, _] : metadata_) {
       // We only file permitted_keywords into this Vocabulary if we have defined
       // bindings for that keyword
       if (permitted_keywords.contains(std::string(keyword))) {
@@ -105,7 +63,7 @@ public:
 
     // Some implementations wouldn't even bother with format constraints, and
     // others would provide implementations that either missed a number of edge
-    // cases or were flat-out wrong on certail matters.
+    // cases or were flat-out wrong on certain matters.
     // Therefore - starting in Draft 2019-09, the format keyword is an
     // annotation by default, instead of an assertion.
     if (version_ == schema::Version::Draft2019_09) {
@@ -128,7 +86,7 @@ public:
    * @param word The "key"word being looked up (e.g. "if", "properties", ...)
    */
   bool is_keyword(std::string_view word) const {
-    return permitted_.contains(word) && make_.contains(word) && s_keywords.contains(word);
+    return has(word) && metadata_.at(word).is_keyword;
   }
 
   /**
@@ -138,7 +96,7 @@ public:
    * @param word The "key"word being looked up (e.g. "if", "properties", ...)
    */
   bool is_property_keyword(std::string_view word) const {
-    return is_keyword(word) && s_property_keywords.contains(word);
+    return has(word) && metadata_.at(word).is_keyword_map;
   }
 
   /**
@@ -147,9 +105,7 @@ public:
    *
    * @param word The "key"word being looked up (e.g. "if", "properties", ...)
    */
-  bool is_constraint(std::string_view word) const {
-    return permitted_.contains(word) && make_.contains(word) && make_.at(word);
-  }
+  bool is_constraint(std::string_view word) const { return has(word) && metadata_.at(word).make; }
 
   /**
    * @brief Fabricate the given constraint if real from the current context
@@ -163,12 +119,17 @@ public:
    * something meaningful), else null.
    *
    * The second element is a boolean indicating if the constraint needs to be
-   * evaluted after other constraints to use their tracking/annotations.
+   * evaluated after other constraints to use their tracking/annotations.
    * See the above comments on s_post_constraints for more info.
    */
   auto constraint(std::string_view word, ParserContext<A> const & context) const {
-    return std::make_pair(is_constraint(word) ? make_.at(word)(context) : nullptr,
-                          s_post_constraints.contains(word));
+    return std::make_pair(is_constraint(word) ? metadata_.at(word).make(context) : nullptr,
+                          has(word) && metadata_.at(word).is_post_constraint);
+  }
+
+private:
+  bool has(std::string_view word) const {
+    return permitted_.contains(word) && metadata_.contains(word);
   }
 };
 }

+ 110 - 0
include/jvalidate/vocabulary.h

@@ -0,0 +1,110 @@
+#pragma once
+
+#include <functional>
+#include <memory>
+#include <string_view>
+
+#include <jvalidate/forward.h>
+
+namespace jvalidate::detail {
+template <Adapter A> struct ParserContext;
+}
+
+namespace jvalidate::vocabulary {
+
+/**
+ * @brief Metadata tag for marking a keyword as no longer supported. This is
+ * needed because we store the constraints by version using
+ * std::map::lower_bound, instead of needing to pound them out for every version
+ * of the JSON Schema specification.
+ *
+ * While it is permitted to reuse keywords that have been removed, it should be
+ * avoided to minimize confusion.
+ */
+constexpr struct {
+} Removed;
+
+constexpr struct {
+} Literal;
+
+/**
+ * @brief Metadata tag for marking a keyword as containing either a single
+ * subschema, or an array of subschema (e.g. "not", "oneOf", etc.). When parsing
+ * a schema, we need to be able to identify these to search for "$id" and
+ * "$anchor" tags, as they can allow us to jump into otherwise unreachable
+ * sections of the schema.
+ */
+constexpr struct {
+} Keyword;
+
+/**
+ * @brief Metadata tag for marking a keyword as containing a map of names onto
+ * subschemas (e.g. "properties"). Because the keys in this node of the schema
+ * are arbitrary strings, we need to jump past them whe searching for the "$id"
+ * and "$anchor" tags.
+ * We cannot simply do a blind recursive-walk of the schema JSON, because you
+ * could put an "$id" tag in an "example" block, where it should not be scanned.
+ */
+constexpr struct {
+} KeywordMap;
+
+/**
+ * @brief Metadata tag for marking a keyword as needing to wait until after all
+ * other (non-PostConstraint) keywords are validated.
+ * This tag is used specifically to mark "unevaluatedItems" and
+ * "unevaluatedProperties", since the rules they use to decide where to run
+ * cannot be compiled the way that "additionalItems" and "additionalProperties"
+ * can.
+ */
+constexpr struct {
+} PostConstraint;
+
+struct DependentKeyword : std::string_view {};
+
+/**
+ * @brief This type represents a union of several different "parsing handler"
+ * states, aligned to a specific starting version:
+ * A) Annotation keywords
+ * A.1) Removed              - This keyword is no longer supported.
+ * A.2) Literal              - This is a pure annotation. e.g. "$comment".
+ * A.3) KeywordMap           - This keyword does not produce any constraints,
+ *      but must be evaluated for annotations and anchors. e.g. "$defs".
+ * A.4) DependentKeyword(id) - A keyword whose annotations are only evaluated
+ *      if the depended on keyword is also present in the current schema. It
+ *      may be connected to a constraint, but that constraint will be parsed
+ *      in the depended keyword.
+ *
+ * B) Constraint keywords
+ * B.1) Make                 - A parser that does not contain any subschemas.
+ * B.2) Make, Keyword        - A parser that contains either a single
+ *      subschema or an array of subschemas. e.g. "not", "oneOf".
+ * B.3) Make, KeywordMap     - A parser that contains a key-value mapping onto
+ *      subschemas. e.g. "properties".
+ * B.4) Make, PostConstraint - A parser whose constraint applies in
+ *      Unevaluated Locations (11). Assumed to be a single subschema.
+ */
+template <Adapter A> struct Metadata {
+  using pConstraint = std::unique_ptr<constraint::Constraint>;
+
+  Metadata(decltype(Removed)) {}
+  Metadata(decltype(Literal)) {}
+  Metadata(decltype(KeywordMap)) : is_keyword(true), is_keyword_map(true) {}
+  Metadata(DependentKeyword dep) : is_keyword(true) {}
+
+  template <typename F> Metadata(F make) : make(make) {}
+  template <typename F> Metadata(F make, decltype(Keyword)) : make(make), is_keyword(true) {}
+  template <typename F>
+  Metadata(F make, decltype(KeywordMap)) : make(make), is_keyword(true), is_keyword_map(true) {}
+  template <typename F>
+  Metadata(F make, decltype(PostConstraint))
+      : make(make), is_keyword(true), is_post_constraint(true) {}
+
+  explicit operator bool() const { return make || is_keyword; }
+  operator std::function<pConstraint(detail::ParserContext<A> const &)>() const { return make; }
+
+  std::function<pConstraint(detail::ParserContext<A> const &)> make = nullptr;
+  bool is_keyword = false;
+  bool is_keyword_map = false;
+  bool is_post_constraint = false;
+};
+}