#pragma once #include #include #include #include #include namespace jvalidate::detail { template struct ParserContext; template class Vocabulary { public: friend class ConstraintFactory; using pConstraint = std::unique_ptr; using MakeConstraint = std::function const &)>; private: schema::Version version_; std::unordered_map make_; std::unordered_set permitted_; std::unordered_set vocabularies_; // TODO(samjaffe): Migrate this back to constraintsfactory // A list of keywords that participate in scans for "$id" and "$anchor" tokens // etc. We need to track this because it is possible (though an anti-pattern), // to embed an $id token in a "const", or in a bogus keyword. inline static const std::unordered_set s_keywords{ // Special tokens - we need to scan definitions for sub-ids, the keys of // these objects are arbitrary, so we need to skip past them in scanning. "$defs", "definitions", // Draft03 only - extends allows us to specify an arbitrary number of // parent schemas that we use on top of the current schema. Equivalent to // allOf.[*].$ref in Draft04+. "extends", // Algorithmic/Conditional Schema types - for annoying reasons, we need // to be able to scan the subschemas of these for $id tokens and whatnot, // despite it never being a sensible decision to embed $ids like that. "allOf", "anyOf", "not", "oneOf", "if", "then", "else", // Next are the four array-specific schema keywords "items", "prefixItems", "additionalItems", "unevaluatedItems", // And the six object-specific schema keywords. With the exception of // additionalProperties and unevaluatedProperties, all of these represent // objects mapping "arbitrary" keys onto schemas, so we need to signal // that... "dependencies", "dependentSchemas", "patternProperties", "properties", "additionalProperties", "unevaluatedProperties"}; // ...using this property_keywords_ object, we list those six keywords that // are represented as an object of arbitrary keys onto schemas that may // contain $id/$anchor fields. inline static const std::unordered_set s_property_keywords{ "$defs", "definitions", "dependencies", "dependentSchemas", "patternProperties", "properties"}; // Special rules must be applied for post constraints, of which there are // currently two. Current discussion says that "constraints" and // "post-contraints" SHOULD be run as two separate phases (since posts need // to know which items/properties would be processed, and perhaps other things // in the future), but that there is no rule on order-of-operations within // a given phase, nor is there any intention to introduce some kind of Phase 3 inline static const std::unordered_set s_post_constraints{ "unevaluatedItems", "unevaluatedProperties"}; public: Vocabulary() = default; Vocabulary(schema::Version version, std::unordered_map make) : version_(version), make_(std::move(make)) { for (auto const & [keyword, _] : make_) { permitted_.emplace(keyword); } } /** * @brief Reset the list of keywords that Vocabulary actually respects * * @param permitted_keywords The selection of keywords to allow for * searches/constraint building. Note that a constraint might be * registered to a null function for compatibility with this. * * @param vocabularies An optional selection of vocabulary schemas, used * as metadata, and deducing {@see is_format_assertion}. */ void restrict(std::unordered_set const & permitted_keywords, std::unordered_set const & vocabularies = {}) & { permitted_.clear(); vocabularies_ = vocabularies; for (auto const & [keyword, _] : make_) { // We only file permitted_keywords into this Vocabulary if we have defined // bindings for that keyword if (permitted_keywords.contains(std::string(keyword))) { permitted_.insert(keyword); } } } schema::Version version() const { return version_; } bool is_format_assertion() const { // In Draft07 and prior - format assertions were considered enabled by // default. This is - of course - problematic because very few // implementations actually had full support for format constraints. if (version_ < schema::Version::Draft2019_09) { return true; } // Some implementations wouldn't even bother with format constraints, and // others would provide implementations that either missed a number of edge // cases or were flat-out wrong on certail matters. // Therefore - starting in Draft 2019-09, the format keyword is an // annotation by default, instead of an assertion. if (version_ == schema::Version::Draft2019_09) { return vocabularies_.contains("/vocab/format"); } // Draft 2020-12 makes this even more explicit - having separate vocabulary // documents for "format as assertion" and "format as annotation". Allowing // validators to add format constraints that are only used for annotating // results. return vocabularies_.contains("/vocab/format-assertion"); } /** * @brief Is the given "key"word actually a keyword? As in, would * I expect to resolve a constraint out of it. This is a slightly more * lenient version of {@see is_constraint} - since it allows keywords that * have a null factory, as long as they've been registered (e.g. then/else). * * @param word The "key"word being looked up (e.g. "if", "properties", ...) */ bool is_keyword(std::string_view word) const { return permitted_.contains(word) && make_.contains(word) && s_keywords.contains(word); } /** * @brief Does the given "key"word represent a property object - that is to * say, an object containing some number of schemas mapped by arbitrary keys * * @param word The "key"word being looked up (e.g. "if", "properties", ...) */ bool is_property_keyword(std::string_view word) const { return is_keyword(word) && s_property_keywords.contains(word); } /** * @brief Is the given word a real constraint in the Vocabulary. In essence, * it must be an enabled keyword AND it must have a non-null factory function. * * @param word The "key"word being looked up (e.g. "if", "properties", ...) */ bool is_constraint(std::string_view word) const { return permitted_.contains(word) && make_.contains(word) && make_.at(word); } /** * @brief Fabricate the given constraint if real from the current context * * @param word The "key"word being looked up (e.g. "if", "properties", ...) * * @param context The current context of schema parsing, used for re-entrancy. * * @returns A pair whose first element is either a pointer to a constraint * (if word represents a supported constraint AND the constraint resolves to * something meaningful), else null. * * The second element is a boolean indicating if the constraint needs to be * evaluted after other constraints to use their tracking/annotations. * See the above comments on s_post_constraints for more info. */ auto constraint(std::string_view word, ParserContext const & context) const { return std::make_pair(is_constraint(word) ? make_.at(word)(context) : nullptr, s_post_constraints.contains(word)); } }; }