Browse Source

docs: 41% comment/code ratio

Sam Jaffe 3 months ago
parent
commit
3295437db7

+ 168 - 0
include/jvalidate/adapter.h

@@ -11,26 +11,124 @@
 #include <jvalidate/status.h>
 
 namespace jvalidate::adapter {
+/**
+ * @brief An interface for a type-erased reference-wrapper around a JSON node.
+ *
+ * Unlike languages like python, there are dozens of different C++ Libraries
+ * for JSON parsing/construction. Each of these libraries has its own set of
+ * getter functions, rules for handling missing values, and degree to which it
+ * can engage in fuzziness of types.
+ *
+ * Adapter has two main groups of methods:
+ * - as_*() and *_size() virtual functions
+ * - maybe_*() concrete functions
+ *
+ * Most interaction with Adapter will be done via the maybe_*() functions,
+ * with or without strictness enabled depending on what constraint is being
+ * checked.
+ */
 class Adapter {
 public:
   virtual ~Adapter() = default;
 
+  /**
+   * @brief Get the jvalidate::adapter::Type that this adapter represents.
+   * This represents the types recognized by json-schema:
+   *    null, bool, integer, number, string, array, object
+   * This function is meant to be used internally - and not by any of the
+   * Constraint objects.
+   */
   virtual Type type() const = 0;
+
+  /**
+   * @brief Obtain an immutable copy of the current node.
+   * Because an Adapter stores a reference to the underlying JSON, it cannot
+   * be stored by e.g. a Const/Enum Constraint without risking a Segfault.
+   */
   virtual std::unique_ptr<Const const> freeze() const = 0;
 
+  /**
+   * @brief Extract a boolean value from this JSON node.
+   * @pre type() == Type::Boolean
+   *
+   * @throws If the pre-condition is not valid, then this function may throw
+   * or produce other undefined behavior, depending on the implementation
+   * details of the underlying type.
+   */
   virtual bool as_boolean() const = 0;
+  /**
+   * @brief Extract an integer value from this JSON node.
+   * @pre type() == Type::Integer
+   *
+   * @throws If the pre-condition is not valid, then this function may throw
+   * or produce other undefined behavior, depending on the implementation
+   * details of the underlying type.
+   */
   virtual int64_t as_integer() const = 0;
+  /**
+   * @brief Extract a decimal value from this JSON node.
+   * @pre type() == Type::Number
+   *
+   * @throws If the pre-condition is not valid, then this function may throw
+   * or produce other undefined behavior, depending on the implementation
+   * details of the underlying type.
+   */
   virtual double as_number() const = 0;
+  /**
+   * @brief Extract a string value from this JSON node.
+   * @pre type() == Type::String
+   *
+   * @throws If the pre-condition is not valid, then this function may throw
+   * or produce other undefined behavior, depending on the implementation
+   * details of the underlying type.
+   */
   virtual std::string as_string() const = 0;
 
+  /**
+   * @brief Get the size of the JSON array in this node.
+   * @pre type() == Type::Array
+   *
+   * @throws If the pre-condition is not valid, then this function may throw
+   * or produce other undefined behavior, depending on the implementation
+   * details of the underlying type.
+   */
   virtual size_t array_size() const = 0;
+  /**
+   * @brief Get the size of the JSON object in this node.
+   * @pre type() == Type::Object
+   *
+   * @throws If the pre-condition is not valid, then this function may throw
+   * or produce other undefined behavior, depending on the implementation
+   * details of the underlying type.
+   */
   virtual size_t object_size() const = 0;
 
+  /**
+   * @brief Loop through every element of the JSON array in this node, applying
+   * the given callback function to them.
+   *
+   * @param cb A callback of the form Adapter => Status
+   *
+   * @return Status::Accept iff there are no errors
+   */
   virtual Status apply_array(AdapterCallback const & cb) const = 0;
+  /**
+   * @brief Loop through every element of the JSON object in this node, applying
+   * the given callback function to them.
+   *
+   * @param cb A callback of the form (string, Adapter) => Status
+   *
+   * @return Status::Accept iff there are no errors
+   */
   virtual Status apply_object(ObjectAdapterCallback const & cb) const = 0;
 
   virtual bool equals(Adapter const & lhs, bool strict) const = 0;
 
+  /**
+   * @brief Test if this object is null-like
+   *
+   * @param strict Does this function allow for fuzzy comparisons with strings?
+   */
   bool maybe_null(bool strict) const {
     switch (type()) {
     case Type::Null:
@@ -42,6 +140,14 @@ public:
     }
   }
 
+  /**
+   * @brief Attempts to extract a boolean value from this JSON node
+   *
+   * @param strict Does this function allow for fuzzy comparisons with strings?
+   *
+   * @return The boolean value contained if it is possible to deduce
+   * (or type() == Type::Boolean), else nullopt.
+   */
   std::optional<bool> maybe_boolean(bool strict) const {
     switch (type()) {
     case Type::Boolean:
@@ -62,6 +168,15 @@ public:
     }
   }
 
+  /**
+   * @brief Attempts to extract a integer value from this JSON node
+   *
+   * @param strict Does this function allow for fuzzy comparisons with strings
+   * and/or booleans?
+   *
+   * @return The integer value contained if it is possible to deduce from an
+   * integer, number, boolean, or string. Else nullopt
+   */
   std::optional<int64_t> maybe_integer(bool strict) const {
     switch (type()) {
     case Type::Number:
@@ -91,6 +206,14 @@ public:
     }
   }
 
+  /**
+   * @brief Attempts to extract a number value from this JSON node
+   *
+   * @param strict Does this function allow for fuzzy comparisons with strings?
+   *
+   * @return The number value contained if it is possible to deduce
+   * (or type() == Type::Integer || type() == Type::Number), else nullopt.
+   */
   std::optional<double> maybe_number(bool strict) const {
     switch (type()) {
     case Type::Number:
@@ -112,6 +235,15 @@ public:
     }
   }
 
+  /**
+   * @brief Attempts to extract a string value from this JSON node
+   *
+   * @param strict Does this function allow for fuzzy comparisons with other
+   * types?
+   *
+   * @return The string value contained if it is possible to deduce from a
+   * scalar type, else nullopt.
+   */
   std::optional<std::string> maybe_string(bool strict) const {
     switch (type()) {
     case Type::Null:
@@ -138,6 +270,15 @@ public:
     }
   }
 
+  /**
+   * @brief Attempts to extract the array length from this JSON node
+   *
+   * @param strict Does this function allow for fuzzy comparisons with other
+   * types?
+   *
+   * @return array_size() if this is an array, else 0 or nullopt, depending
+   * on some factors.
+   */
   std::optional<size_t> maybe_array_size(bool strict) const {
     switch (type()) {
     case Type::Array:
@@ -151,6 +292,15 @@ public:
     }
   }
 
+  /**
+   * @brief Attempts to extract the object length from this JSON node
+   *
+   * @param strict Does this function allow for fuzzy comparisons with other
+   * types?
+   *
+   * @return object_size() if this is an object, else 0 or nullopt, depending
+   * on some factors.
+   */
   std::optional<size_t> maybe_object_size(bool strict) const {
     switch (type()) {
     case Type::Object:
@@ -165,14 +315,32 @@ public:
   }
 };
 
+/**
+ * @brief An interface for an immutable, owning handle to a type-erased JSON
+ * node. {@see Adapter::freeze} for more explaination why this is necessary.
+ */
 class Const {
 public:
   virtual ~Const() = default;
+  /**
+   * @brief Perform an action on this object, such as copying or testing
+   * equality.
+   *
+   * @param cb A callback function of the form Adapter => Status
+   *
+   * @return the result of cb on the contained JSON
+   */
   virtual Status apply(AdapterCallback const & cb) const = 0;
 };
 }
 
 namespace jvalidate::adapter::detail {
+/**
+ * @brief The simplest implementation of Const.
+ * Depends on the AdapterTraits struct.
+ *
+ * @tparam JSON The type being adapted
+ */
 template <typename JSON> class GenericConst final : public Const {
 public:
   explicit GenericConst(JSON const & value) : value_(value) {}

+ 25 - 0
include/jvalidate/detail/anchor.h

@@ -10,6 +10,31 @@
 #include <jvalidate/detail/expect.h>
 
 namespace jvalidate::detail {
+/**
+ * @brief An Anchor is a simple name that refers to a named location (shorhand)
+ * in a JSON-schema. As compared to a URI - which can refer to either internal
+ * starting-points in the active schema or external documents on disk or at an
+ * external URL-location.
+ *
+ * Anchors are useful in cases where there is a complicated or long path to a
+ * commonly used definition. Consider for example:
+ * { "$ref": "#/properties/Column/definitions/Row" }
+ * vs.
+ * { "$ref": "#ColRow" }
+ *
+ * This can be much easier to read, or find an object when doing a quick lookup,
+ * since editors generally don't have a "lookup this JSON-Pointer" option.
+ *
+ * An anchor may only be a plain-name fragment (first character is alpha or '_',
+ * all other characters are alphanumeric, '_', '.', or '-').
+ * When defining an anchor using the "$anchor" or "$dynamicAnchor" tags, only
+ * this fragment is used. When defining an anchor as part of an "$id" tag, the
+ * form is `<URI>#<ANCHOR>`, the same as when accessing the anchor through a
+ * "$ref". In the same document - you can reference the anchor by `#<ANCHOR>`,
+ * just like how you can eschew the URI in a JSON-Pointer within the same doc.
+ *
+ * @see https://json-schema.org/draft/2020-12/draft-bhutton-json-schema-01#section-8.2.2
+ */
 class Anchor {
 private:
   std::string content_;

+ 5 - 0
include/jvalidate/detail/deref_proxy.h

@@ -1,6 +1,11 @@
 #pragma once
 
 namespace jvalidate::detail {
+/**
+ * @brief An object that acts like a pointer to an rvalue - without requiring us
+ * to heap allocate a unique_ptr.
+ * @tparam T the type being pointer to.
+ */
 template <typename T> struct DerefProxy {
   T & operator*() { return value; }
   T const & operator*() const { return value; }

+ 59 - 0
include/jvalidate/detail/dynamic_reference_context.h

@@ -10,22 +10,50 @@
 #include <jvalidate/uri.h>
 
 namespace jvalidate::detail {
+/**
+ * @brief Starting with Draft 2019-09, it is possible to set $recursiveAnchor
+ * and $recursiveRef options in schemas. In Draft 2020-12 this changes to the
+ * more powerful/generic $dynamicAnchor and $dynamicRef.
+ *
+ * The rules of handling these anchors is that we maintain a stack of all of the
+ * loaded anchors for each given name in the order that they are loaded. But the
+ * resolved reference that they point to is the first anchor by that name to be
+ * loaded. This means that we can create recursive/self-referential chains.
+ *
+ * When we encounter the appropriate $dynamicRef/$recursiveRef tag, we fetch
+ * the most appropriate anchored location - which is usually the prior mentioned
+ * first path that registered the anchor.
+ */
 class DynamicReferenceContext {
 private:
   std::deque<URI> sources_;
   std::map<Anchor, std::deque<std::optional<Reference>>> data_;
 
 public:
+  /**
+   * @brief Add all dynamic anchors contained in a given document (as defined
+   * by a common URI) to the current stack, pointing them to the earliest loaded
+   * parent reference and unregistering all of the anchors that are in context,
+   * but not in this specific document.
+   */
   OnBlockExit scope(URI const & source, std::map<Anchor, Reference> const & frame) {
+    // No-Op loading, for convenience
     if (frame.empty() && data_.empty()) {
       return nullptr;
     }
 
     sources_.push_back(source);
     for (auto const & [k, v] : frame) {
+      // If we have not currently registered this anchor, use the input
+      // reference path, else use the first reference path registered in the
+      // stack.
       data_[k].push_back(data_[k].empty() ? v : data_[k].front());
     }
 
+    // For all of the anchors that are not being pushed onto the stack, push a
+    // nullopt onto the stack (as well as ensuring that all stacks are
+    // equal-sized). This allows us to disable certain anchors in a given
+    // document (i.e. that doc does not define a specific $dynamicAnchor).
     for (auto & [k, stack] : data_) {
       if (not frame.contains(k)) {
         stack.push_back(std::nullopt);
@@ -35,6 +63,9 @@ public:
       }
     }
 
+    // Scope object the pops all of the elements on this object, due to the
+    // equal-size promise of the above while loop, we can just blindly loop
+    // through all elements to pop instead of dealing with
     return [this]() {
       sources_.pop_back();
       for (auto it = data_.begin(); it != data_.end();) {
@@ -48,8 +79,29 @@ public:
     };
   }
 
+  /**
+   * @brief Is the given anchor in the current $dynamicRef lookup context
+   * (including suppressed anchors). This check is necessary in part because we
+   * permit using $dynamicRef to refer regular $anchor objects if there is no
+   * $dynamicAnchor in the current context.
+   *
+   * TODO(samjaffe): May be able to add a nullopt check...
+   */
   bool contains(Anchor const & key) const { return data_.contains(key); }
 
+  /**
+   * @brief Safely fetch the closest matching $dynamicAnchor to the given
+   * arguments. Because $dynamicRef permits including a URI, it is techinically
+   * possible to "jump" to an anchor that is not the top-level one, this can
+   * be useful if the current scope does not generate a bookmark $dynamicAnchor.
+   *
+   * @param source The owning source, which is either the URI of the currently
+   * operating schema document, or a URI specified in the $dynamicRef value.
+   * Using this information lets us jump past suppressed anchors by explicitly
+   * stating the owning context.
+   *
+   * @param key The actual anchor being searched for.
+   */
   std::optional<Reference> lookup(URI const & source, Anchor const & key) const {
     if (auto it = data_.find(key); it != data_.end()) {
       return it->second.at(index_of(source));
@@ -57,7 +109,14 @@ public:
     return std::nullopt;
   }
 
+  /**
+   * @brief Finds the (index of the) dynamic anchor directly associated with the
+   * given URI; or the final registered anchor.
+   */
   size_t index_of(URI const & source) const {
+    // Start at the end because most commonly source will refer to the currently
+    // operating schema, which is also going to be the top item in the sources
+    // stack.
     for (size_t i = sources_.size(); i-- > 0;) {
       if (sources_[i] == source) {
         return i;

+ 58 - 0
include/jvalidate/detail/expect.h

@@ -12,6 +12,15 @@
 #endif
 
 #if defined(JVALIDATE_USE_EXCEPTIONS)
+/**
+ * @brief Throw an exception after construcing the error message.
+ *
+ * @param extype A subtype of std::exception that can be constructed using a
+ * std::string.
+ *
+ * @param message The error "message" to be emit - in the form of an iostream
+ * output chain (e.g. `"unsupported index " << i << ", valid items " << items`).
+ */
 #define JVALIDATE_THROW(extype, message)                                                           \
   do {                                                                                             \
     std::stringstream ss;                                                                          \
@@ -19,6 +28,14 @@
     throw extype(ss.str());                                                                        \
   } while (false)
 #else
+/**
+ * @brief Print an error message and then terminate execution
+ *
+ * @param extype[ignored]
+ *
+ * @param message The error "message" to be emit - in the form of an iostream
+ * output chain (e.g. `"unsupported index " << i << ", valid items " << items`).
+ */
 #define JVALIDATE_THROW(extype, message)                                                           \
   do {                                                                                             \
     std::cerr << message << std::endl;                                                             \
@@ -26,15 +43,56 @@
   } while (false)
 #endif
 
+/**
+ * @brief Assert a certain pre/post-condition is true, else emit an error of a
+ * specified type and message.
+ *
+ * @param condition A boolean or boolean-like expression that should be TRUE.
+ * If the condition is FALSE, then the other params are used to produce errors.
+ *
+ * @param extype A subtype of std::exception that can be constructed using a
+ * std::string. If exceptions are enabled, and condition is FALSE - then this
+ * is the type that will be thrown.
+ *
+ * @param message The error "message" to be emit - in the form of an iostream
+ * output chain (e.g. `"unsupported index " << i << ", valid items " << items`).
+ */
 #define EXPECT_T(condition, extype, message)                                                       \
   if (JVALIDATE_UNLIKELY(!(condition))) {                                                          \
     JVALIDATE_THROW(extype, message);                                                              \
   }
 
+/**
+ * @brief Assert a certain pre/post-condition is true, else emit an error of a
+ * specified message.
+ *
+ * @param condition A boolean or boolean-like expression that should be TRUE.
+ * If the condition is FALSE, then the other params are used to produce errors.
+ *
+ * @param message The error "message" to be emit - in the form of an iostream
+ * output chain (e.g. `"unsupported index " << i << ", valid items " << items`).
+ */
 #define EXPECT_M(condition, message) EXPECT_T(condition, std::runtime_error, message)
 
+/**
+ * @brief Assert a certain pre/post-condition is true, else emit a generic error.
+ *
+ * @param condition A boolean or boolean-like expression that should be TRUE.
+ * If the condition is FALSE, then the other params are used to produce errors.
+ */
 #define EXPECT(condition) EXPECT_M(condition, #condition " at " __FILE__ ":" << __LINE__)
 
+/**
+ * @brief Assert a certain pre/post-condition is true, else return the default
+ * expression (or void).
+ *
+ * @param condition A boolean or boolean-like expression that should be TRUE.
+ * If the condition is FALSE, then the other params are used to produce errors.
+ *
+ * @param ... Zero or One arguments representing the return value if the
+ * condition is FALSE. Zero arguments is equivalent to `return void();`, which
+ * doesn't need to be explicitly stated.
+ */
 #define RETURN_UNLESS(condition, ...)                                                              \
   if (JVALIDATE_UNLIKELY(!(condition))) {                                                          \
     return __VA_ARGS__;                                                                            \

+ 19 - 0
include/jvalidate/detail/number.h

@@ -1,16 +1,35 @@
+/**
+ * Utility functions for managing numeric types - such as converting between
+ * floating-point and integer types.
+ *
+ * None of these are particularly complex functions, but storing them in a
+ * single header with descriptive names helps the reader quickly recognize what
+ * is being done.
+ */
 #pragma once
 
 #include <cmath>
 #include <limits>
 
 namespace jvalidate::detail {
+/**
+ * @brief Determine if a floating point number is actually an integer (in the
+ * mathematical sense).
+ */
 inline bool is_json_integer(double number) { return std::floor(number) == number; }
 
+/**
+ * @brief Determine if a floating point number is actually an integer, and
+ * actually fits in the 64-bit integer type that we use for JSON Integer.
+ */
 inline bool fits_in_integer(double number) {
   static constexpr double g_int_max = std::numeric_limits<int64_t>::max();
   static constexpr double g_int_min = std::numeric_limits<int64_t>::min();
   return is_json_integer(number) && number <= g_int_max && number >= g_int_min;
 }
 
+/**
+ * @brief Determine if an unsigned integer fits into a signed integer
+ */
 inline bool fits_in_integer(uint64_t number) { return (number & 0x8000'0000'0000'0000) == 0; }
 }

+ 11 - 0
include/jvalidate/detail/on_block_exit.h

@@ -1,7 +1,16 @@
 #pragma once
 
 #include <functional>
+
 namespace jvalidate::detail {
+/**
+ * @brief An object representing a cleanup function, to be called as if it was
+ * a destructor for the current function scope. Similar to e.g. D-lang's
+ * scope(exit) or @see https://en.cppreference.com/w/cpp/experimental/scope_exit
+ *
+ * Is movable - allowing us to return this scope object from its constructing
+ * context into the actual owning context that wants to control that scope.
+ */
 class OnBlockExit {
 private:
   std::function<void()> callback_;
@@ -10,8 +19,10 @@ public:
   OnBlockExit() = default;
   template <typename F> OnBlockExit(F && callback) : callback_(callback) {}
 
+  // Must be explicity implemented because function's move ctor is non-destructive
   OnBlockExit(OnBlockExit && other) { std::swap(other.callback_, callback_); }
 
+  // Must be explicity implemented because function's move-assign is non-destructive
   OnBlockExit & operator=(OnBlockExit && other) {
     std::swap(other.callback_, callback_);
     return *this;

+ 68 - 0
include/jvalidate/detail/out.h

@@ -8,6 +8,24 @@ namespace jvalidate::detail {
 constexpr struct discard_out_t {
 } discard_out;
 
+/**
+ * @brief An optional out-parameter to a function, similar to a function
+ * that takes `T* out_param = nullptr`. Unfortunately, std::optional does not
+ * support storing references - so if we want the syntactic sugar of that, we
+ * need a custom class.
+ *
+ * In addition to acting like an optional value - we have the special behavior
+ * that we wanted - namely that "if there is a contained value provided by the
+ * caller, we will update that reference", and "if there is no value, then
+ * assigning a value will have no effect" as if we performed the idiomatic:
+ * @code
+ * if (out_param) {
+ *   *out_param = ...;
+ * }
+ * @endcode
+ *
+ * @tparam T The type being returned
+ */
 template <typename T>
   requires(std::is_same_v<T, std::decay_t<T>>)
 class out {
@@ -15,12 +33,33 @@ private:
   T * ref_ = nullptr;
 
 public:
+  // Construct an empty out parameter, that has no side-effects
   out() = default;
+  // Explicitly construct an empty out parameter, that has no side-effects
   out(discard_out_t) {}
+  // Construct an out parameter pointing to the given concrete value.
   out(T & ref) : ref_(&ref) {}
 
+  /**
+   * @breif On rare occasions, we still need to perform checks
+   * that an out-param holds a value.
+   */
   explicit operator bool() const { return ref_; }
 
+  /**
+   * @brief Update the value of this out parameter, if it holds a value. By
+   * convention, we assume that this function will only be called once, but
+   * there is no requirement for that.
+   *
+   * @tparam U Any type that can be used to construct the held type T
+   *
+   * @param val The new value being passed up to the caller
+   *
+   * @returns Nothing - this object does not behave like a normal object where
+   * you can do things like `if ((A = B).foo())` - since this object represents
+   * exclusively a way to pass an optional value back to the caller without
+   * returning a tuple.
+   */
   template <typename U>
     requires std::is_constructible_v<T, U>
   void operator=(U && val) {
@@ -31,6 +70,14 @@ public:
   }
 };
 
+/**
+ * @brief A non-optional out-parameter to a function, similar to a function that
+ * takes a `T& out_param` argument. Unlike the standard form, this type allows
+ * passing an rvalue (temporary) or an lvalue (persistant) element, and will
+ * properly handle the assigment and updating of the object as appropriate.
+ *
+ * @tparam T The type being returned
+ */
 template <typename T>
   requires(std::is_same_v<T, std::decay_t<T>>)
 class inout {
@@ -38,9 +85,19 @@ private:
   std::variant<T, T *> ref_;
 
 public:
+  // Constructs an inout parameter from an rvalue type - whose modification will
+  // not effect the calling scope.
   inout(T && value) : ref_(std::move(value)) {}
+  // Constructs an inout parameter from an lvalue type - whose modification will
+  // propogate upwards to the caller.
   inout(T & ref) : ref_(&ref) {}
 
+  /**
+   * @brief Convert this object back into its underlying type for use.
+   *
+   * @returns A reference to the contained value/reference, to avoid the cost
+   * of performing a copy-operation if the contained object is non-trivial.
+   */
   operator T const &() const {
     struct {
       T const & operator()(T const & in) const { return in; }
@@ -49,6 +106,17 @@ public:
     return std::visit(visitor, ref_);
   }
 
+  /**
+   * @brief Update the value of this out parameter. Depending on the variation
+   * contained in this type, this will either propogate up to the caller's level
+   * or will update future uses of this object.
+   *
+   * @tparam U Any type that can be used to construct the held type T
+   *
+   * @param val The new value being set
+   *
+   * @returns The updated value
+   */
   template <typename U>
     requires std::is_constructible_v<T, U>
   T const & operator=(U && val) {

+ 93 - 0
include/jvalidate/detail/pointer.h

@@ -13,6 +13,20 @@
 #include <jvalidate/forward.h>
 
 namespace jvalidate::detail {
+/**
+ * @brief A helper struct for use in appending elements to a json Pointer object
+ * in a way that allows it to be used as a template parameter - similar to how
+ * ostream allows operator<<(void(*)(ostream&)) to pass in a function callback
+ * for implementing various iomanip functions as piped (read:fluent) values.
+ *
+ * However, the primary usecase for this is in a template context, where I want
+ * to add 0-or-more path components to a JSON-Pointer of any type, and also want
+ * to support neighbor Pointers, instead of only child Pointers.
+ *
+ * For example, @see ValidationVisitor::visit(constraint::ConditionalConstraint)
+ * where we use parent to rewind the path back to the owning scope for
+ * if-then-else processing.
+ */
 struct parent_t {};
 constexpr parent_t parent;
 
@@ -20,23 +34,45 @@ class Pointer {
 public:
   Pointer() = default;
   Pointer(std::vector<std::variant<std::string, size_t>> const & tokens) : tokens_(tokens) {}
+
+  /**
+   * @brief Parse a JSON-Pointer from a serialized JSON-Pointer-String. In
+   * principle, this should either be a factory function returning an optional/
+   * throwing on error - but we'll generously assume that all JSON-Pointers are
+   * valid - and therefore that an invalidly formatter pointer string will
+   * point to somewhere non-existant (since it will be used in schema handling)
+   */
   Pointer(std::string_view path) {
     if (path.empty()) {
       return;
     }
 
     auto append_with_parse = [this](std::string in) {
+      // Best-guess that the input token text represents a numeric value.
+      // Technically - this could mean that we have an object key that is also
+      // a number (e.g. the jsonized form of map<int, T>), but we can generally
+      // assume that we are not going to use those kinds of paths in a reference
+      // field. Therefore we don't need to include any clever tricks for storage
       if (not in.empty() && in.find_first_not_of("0123456789") == std::string::npos) {
         return tokens_.push_back(std::stoull(in));
       }
 
       for (size_t i = 0; i < in.size(); ++i) {
+        // Allow URL-Escaped characters (%\x\x) to be turned into their
+        // matching ASCII characters. This allows passing abnormal chars other
+        // than '/' and '~' to be handled in all contexts.
+        // TODO(samjaffe): Only do this if enc is hex-like (currently throws?)
         if (in[i] == '%') {
           char const enc[3] = {in[i + 1], in[i + 2]};
           in.replace(i, 3, 1, char(std::stoi(enc, nullptr, 16)));
         } else if (in[i] != '~') {
+          // Not a special char-sequence, does not need massaging
           continue;
         }
+        // In order to properly support '/' inside the property name of an
+        // object, we must escape it. The designers of the JSON-Pointer RFC
+        // chose to use '~' as a special signifier. Mapping '~0' to '~', and
+        // '~1' to '/'.
         if (in[i + 1] == '0') {
           in.replace(i, 2, 1, '~');
         } else if (in[i + 1] == '1') {
@@ -46,6 +82,8 @@ public:
       tokens_.push_back(std::move(in));
     };
 
+    // JSON-Pointers are required to start with a '/' although we only enforce
+    // that rule in Reference.
     path.remove_prefix(1);
     // The rules of JSON-Pointer is that if a token were to contain a '/' as a
     // strict character: then that character would be escaped, using the above
@@ -59,6 +97,17 @@ public:
     append_with_parse(std::string(path));
   }
 
+  /**
+   * @brief Dive into a JSON object throught the entire path of the this object
+   *
+   * @param document A JSON Adapter document - confirming to the following spec:
+   * 1. Is indexable by size_t, returning its own type
+   * 2. Is indexable by std::string, returning its own type
+   * 3. Indexing into a null/incorrect json type, or for an absent child is safe
+   *
+   * @returns A new JSON Adapter at the pointed to location, or a generic null
+   * JSON object.
+   */
   auto walk(Adapter auto document) const {
     for (auto const & token : tokens_) {
       document = std::visit([&document](auto const & next) { return document[next]; }, token);
@@ -66,6 +115,12 @@ public:
     return document;
   }
 
+  /**
+   * @brief Fetch the last item in this pointer as a string (for easy
+   * formatting). This function is used more-or-less exclusively to support the
+   * improved annotation/error listing concepts described in the article:
+   * https://json-schema.org/blog/posts/fixing-json-schema-output
+   */
   std::string back() const {
     struct {
       std::string operator()(std::string const & in) const { return in; }
@@ -76,11 +131,49 @@ public:
 
   bool empty() const { return tokens_.empty(); }
 
+  /**
+   * @brief Determines if this JSON-Pointer is prefixed by the other
+   * JSON-Pointer. For example: `"/A/B/C"_jsptr.starts_with("/A/B") == true`
+   *
+   * This is an important thing to know when dealing with schemas that use
+   * Anchors or nest $id tags in a singular document. Consider the schema below:
+   * @code{.json}
+   *  {
+   *    "$id": "A",
+   *    "$defs": {
+   *      "B": {
+   *        "$anchor": "B"
+   *        "$defs": {
+   *          "C": {
+   *            "$anchor": "C"
+   *          }
+   *        }
+   *      }
+   *    }
+   *  }
+   * @endcode
+   *
+   * How can we deduce that "A#B" and "A#C" are related to one-another as parent
+   * and child nodes? First we translate them both into absolute (no-anchor)
+   * forms "A#/$defs/B" and "A#/$defs/B/$defs/C". Visually - these are now
+   * obviously related - but we need to expose the functionalty to make that
+   * check happen (that "/$defs/B/$defs/C" starts with "/$defs/B").
+   */
   bool starts_with(Pointer const & other) const {
     return other.tokens_.size() <= tokens_.size() &&
            std::equal(other.tokens_.begin(), other.tokens_.end(), tokens_.begin());
   }
 
+  /**
+   * @brief A corollary function to starts_with, create a "relative"
+   * JSON-Pointer to some parent. Relative pointers are only partially supported
+   * (e.g. if you tried to print it it would still emit the leading slash), so
+   * the standard use case of this function is to either use it when choosing
+   * a URI or Anchor that is a closer parent:
+   * `Reference(uri, anchor, ptr.relative_to(other))`
+   * or immediately concatenating it onto another absolute pointer:
+   * `abs /= ptr.relative_to(other)`
+   */
   Pointer relative_to(Pointer const & other) const {
     assert(starts_with(other));
     return Pointer(std::vector(tokens_.begin() + other.tokens_.size(), tokens_.end()));

+ 49 - 0
include/jvalidate/detail/reference.h

@@ -12,6 +12,19 @@
 namespace jvalidate::detail {
 class Reference;
 
+/**
+ * @brief A class describing a "Reference without a JSON-Pointer" object.
+ * For the sake of avoiding code-duplication, we implement References in terms
+ * of this class, which makes this comment awkward.
+ *
+ * A RootReference refers to a URI and/or an Anchor object - and is specifically
+ * meant for use with {@see ReferenceCache} and {@see ReferenceManager} to
+ * create bindings beween the Anchor/URI points of $id/$anchor tags with their
+ * absolute reference parents.
+ *
+ * Because of this, there is an intrinsic link between RootReference and
+ * Reference objects, although it is not a 1-1 relationship.
+ */
 class RootReference {
 private:
   friend class Reference;
@@ -21,9 +34,13 @@ private:
 public:
   RootReference() = default;
 
+  // Piecewise constructor for RootReference, supports (URI) and (URI, Anchor)
   explicit RootReference(URI const & uri, Anchor const & anchor = {})
       : uri_(uri), anchor_(anchor) {}
 
+  // Parser-ctor for RootReference, implemented in terms of
+  // {@see RootReference::RootReference(std::string_view, out<size_t>)}, which
+  // is also used by Reference's parser-ctor.
   explicit RootReference(std::string_view ref) : RootReference(ref, discard_out) {}
 
   bool is_relative() const { return uri_.is_relative(); }
@@ -36,26 +53,55 @@ public:
   auto operator<=>(RootReference const &) const = default;
 
 private:
+  /**
+   * @brief Parse a RootReference out from a given textual representation.
+   *
+   * @param ref A string containing a URI and/or anchor. By convention - this
+   * parameter should be "#" if it is refering to an empty RootReference.
+   *
+   * @param[out] end An output variable that tracks the end-position of the
+   * anchor. When calling {@see Reference::Reference(std::string_view)}, this
+   * lets us properly offset the view for the JSON-Pointer component without
+   * needing to re-implement the code that scans for it.
+   */
   RootReference(std::string_view ref, out<size_t> end) {
+    // By default, RootReference will consume the entire input
     end = std::string::npos;
 
+    // As also mentioned in URI, the fragment identifier is used in a
+    // JSON-Reference to separate the URI from the Anchor/Pointer component(s)
     size_t end_of_uri = ref.find('#');
     uri_ = URI(ref.substr(0, end_of_uri));
+    // If there is not a fragment-separator, then this RootReference is all URI
+    // There will be no Anchor or JSON-Pointer components to extract.
     if (end_of_uri == std::string::npos) {
       return;
     }
 
+    // Skip past the "#"
     ref.remove_prefix(end_of_uri + 1);
+    // Anchors prohibit most characters, so we can be sure that the first "/"
+    // past the URI is the endpoint of the Anchor
     size_t const pointer_start = ref.find('/');
     anchor_ = Anchor(ref.substr(0, pointer_start));
 
+    // Prohibit a trailing JSON-Pointer unless the caller provided the out-param
     EXPECT_M(end || pointer_start == std::string::npos, "JSON-Pointer is illegal in this context");
+    // Ensure proper offset is applied: add pointer_start to end_of_uri because
+    // we called remove_prefix on ref. Add an additional +1 because of the "#"
     if (pointer_start != std::string::npos) {
       end = pointer_start + end_of_uri + 1;
     }
   }
 };
 
+/**
+ * @brief A Reference is a class describing a location of a JSON value. This may
+ * describe an external document, and anchor within a document (jump to
+ * location), and/or a path to a value from a parent location. References allow
+ * us to combine all three of these properties together - although in practice
+ * the Anchor field should be empty before it escapes the detail namespace.
+ */
 class Reference {
 private:
   RootReference root_;
@@ -64,12 +110,15 @@ private:
 public:
   Reference() = default;
 
+  // Piecewise constructor for References (URI) and (URI, Pointer)
   explicit Reference(URI const & uri, Pointer const & pointer = {})
       : root_(uri), pointer_(pointer) {}
 
+  // Piecewise constructor for References (URI, Anchor) and (URI, Anchor, Pointer)
   explicit Reference(URI const & uri, Anchor const & anchor, Pointer const & pointer = {})
       : root_(uri, anchor), pointer_(pointer) {}
 
+  // Piecewise constructor for References using RootReference
   explicit Reference(RootReference const & root, Pointer const & pointer = {})
       : root_(root), pointer_(pointer) {}
 

+ 93 - 0
include/jvalidate/detail/reference_cache.h

@@ -8,17 +8,69 @@
 #include <jvalidate/detail/reference.h>
 
 namespace jvalidate::detail {
+/**
+ * @brief An bidirectional cache of absolute references
+ * (URI + Anchor + JSON-Pointer) to root references (URI + Anchor). An object of
+ * this sort is necessary for a couple of reasons:
+ *
+ * 1. It is possible to have more than one absolute reference map to the same
+ *    root reference.
+ * 2. We need to employ some special handling to properly identify the nearest
+ *    RootReference that owns a given absolute Reference - given that that ref
+ *    may not actually be anchored to the RootReference at the time.
+ */
 class ReferenceCache {
 private:
   std::map<Reference, RootReference, std::greater<>> to_anchor_;
   std::map<RootReference, Reference, std::greater<>> to_absolute_;
 
 public:
+  /**
+   * @brief Register the entry-point of a given schema-document. This function
+   * should be called exactly one time for each "$id" tag in a given schema that
+   * is being loaded.
+   *
+   * In principle, we should also perform a uniqueness check when calling
+   * to_absolute_.emplace.
+   */
   void emplace(URI const & root) {
     to_absolute_.emplace(root, root);
     to_anchor_.emplace(root, root);
   }
 
+  /**
+   * @brief Link together the absolute and root references of a document, as
+   * well as recursively walk through all possible parent URIs that are already
+   * stored in this cache.
+   *
+   * Therefore, this function will add "exactly 1" mapping to the to_absolute_
+   * map, and "at least 1, but no more than to_absolute_.size()" mappings to
+   * to_anchor_, representing all of the parent reference paths that link to the
+   * newly added RootReference
+   *
+   * @param absolute An absolute JSON Reference, which either contains no
+   * RootReference component, or contains the previous traversed RootReference,
+   * as defined by the $id, $anchor, $recursiveAnchor, or $dynamicAnchor tags.
+   *
+   * @param canonical The current RootReference being operated on from the
+   * tags listed above.
+   *
+   * For example, if we have a json document like:
+   * @code{.json}
+   * {
+   *   "$id": "A",
+   *   "$defs": {
+   *     "Example": {
+   *       "$id": "B"
+   *     }
+   *   }
+   * }
+   * @endcode
+   *
+   * then we would end up calling this function with the arguments:
+   *    absolute="#",                canonical="A#"
+   *    absolute="A#/$defs/Example", canonical="B#"
+   */
   Reference emplace(Reference const & absolute, RootReference const & canonical) {
     for (Reference where = absolute; not where.pointer().empty();) {
       // Recursively add all possible alternative paths that are equivalent to
@@ -42,6 +94,30 @@ public:
     return Reference(canonical);
   }
 
+  /**
+   * @brief Identifies the nearest RootReference that is associated with the
+   * input.
+   *
+   * @param ref An arbitrary reference that we want to locate the nearest root
+   * for.
+   *
+   * @param for_parent_reference A flag indicating if we should prohibit exact
+   * matches of reference. For example, suppose that we have the same bindings
+   * as in the above method comment:
+   *    absolute="#",                canonical="A#"
+   *    absolute="A#/$defs/Example", canonical="B#"
+   * If I request `relative_to_nearest_anchor("A#/$defs/Example", false)` then
+   * it will return `B#` as the associated RootReference, because we have stored
+   * that exact mapping in our absolute path to anchor cache.
+   *
+   * On the other hand - suppose that we want to ensure that we've acquired
+   * strict parent of the current reference.
+   * `relative_to_nearest_anchor("A#/$defs/Example", true)` would say "an anchor
+   * cannot be its own parent, therefore we cannot resolve to B#".
+   *
+   * @returns ref, recalculated to be relative_to its nearest parent root, if
+   * one is available.
+   */
   Reference relative_to_nearest_anchor(Reference const & ref,
                                        bool for_parent_reference = false) const {
     auto it = for_parent_reference ? to_anchor_.upper_bound(ref) : to_anchor_.lower_bound(ref);
@@ -51,12 +127,24 @@ public:
 
     auto const & [absolute, anchor] = *it;
     if (not ref.pointer().starts_with(absolute.pointer())) {
+      // We've undershot our reference and landed at a cousin/neighbor node
       return ref;
     }
 
     return Reference(anchor, ref.pointer().relative_to(absolute.pointer()));
   }
 
+  /**
+   * @brief Deduces the URI part of the actual parent of this node, utilizing
+   * the "an anchor cannot be its own parent" rule described above.
+   *
+   * @param parent An arbitrarily constructed reference to the parent context
+   * of some other reference we are operating on.
+   *
+   * @returns The URI of the nearest non-equal parent if it exists and/or there
+   * is a URI part in parent. If there is no URI part, we check if there is a
+   * URI for the root (input) schema.
+   */
   URI actual_parent_uri(detail::Reference const & parent) const {
     // TODO(samjaffe): Think about this some more - there's something awkward here
     URI uri = relative_to_nearest_anchor(parent, true).uri();
@@ -65,6 +153,11 @@ public:
       return uri;
     }
 
+    // This is a special case because we prohibit exact matches in the above
+    // relative_to_nearest_anchor call. Since we can only reach this line if
+    // BOTH uri and parent.uri() are empty - that means that the appropriate
+    // parent is the root document, which might have marked its URI with an
+    // $id tag.
     if (auto it = to_anchor_.find(Reference()); it != to_anchor_.end()) {
       return it->second.uri();
     }

+ 180 - 2
include/jvalidate/detail/reference_manager.h

@@ -23,6 +23,25 @@
 #include <jvalidate/uri.h>
 
 namespace jvalidate::detail {
+/**
+ * @brief An object responsible for owning/managing the various documents,
+ * references, and related functionality for ensuring that we properly construct
+ * things.
+ *
+ * In order to support this we store information on:
+ * - A {@see jvalidate::detail::ReferenceCache} that maps various absolute
+ *   Reference paths to their Canonical forms.
+ * - "Vocabularies", which describe the the set of legal keywords for
+ *   constraint parsing.
+ * - "Anchor Locations", a non-owning store of the Adapters associated with
+ *   "$id"/"$anchor" tags to allow quick lookups without having to re-walk the
+ *   document.
+ * - "Dynamic Anchors", a list of all of the "$dynamicAnchor" tags that exist
+ *   under a given "$id" tag, and those bindings which are active in the current
+ *   scope.
+ *
+ * @tparam A The adapter type being operated upon
+ */
 template <Adapter A> class ReferenceManager {
 private:
   static inline std::map<std::string_view, schema::Version> const g_schema_ids{
@@ -48,12 +67,36 @@ private:
   DynamicReferenceContext active_dynamic_anchors_;
 
 public:
+  /**
+   * @brief Construct a new ReferenceManager around a given root schema
+   *
+   * @param external A cache/loader of external documents. Due to the way that
+   * {@see jvalidate::Schema} is implemented, the cache may have the same
+   * lifetime as this object, despite being owned by mutable reference.
+   *
+   * @param root The root schema being operated on.
+   *
+   * @param version The version of the schema being used for determining the
+   * base vocabulary to work with (see the definition of schema::Version for
+   * more details on how the base vocabulary changes).
+   *
+   * @param constraints A factory for turning JSON schema information into
+   * constraints.
+   */
   ReferenceManager(DocumentCache<A> & external, A const & root, schema::Version version,
                    ConstraintFactory<A> const & constraints)
       : external_(external), constraints_(constraints), roots_{{{}, root}} {
     prime(root, {}, &vocab(version));
   }
 
+  /**
+   * @brief Turn a schema version into a vocabulary, ignoring user-defined
+   * vocabularies
+   *
+   * @param version The schema version
+   *
+   * @returns The default vocabulary for a given draft version
+   */
   Vocabulary<A> const & vocab(schema::Version version) {
     if (not vocabularies_.contains(version)) {
       vocabularies_.emplace(version, constraints_.keywords(version));
@@ -61,6 +104,18 @@ public:
     return vocabularies_.at(version);
   }
 
+  /**
+   * @brief Fetch the vocabulary information associated with a given "$schema"
+   * tag. Unlike the enum version of this function, we can also load
+   * user-defined schemas using the ReferenceCache object, if supported. This
+   * allows us to define custom constraints or remove some that we want to
+   * forbid.
+   *
+   * @param schema The location of the schema being fetched
+   *
+   * @returns If schema is a draft version - then one of the default
+   * vocabularies, else a user-schema is loaded.
+   */
   Vocabulary<A> const & vocab(URI schema) {
     if (auto it = g_schema_ids.find(schema.resource()); it != g_schema_ids.end()) {
       return vocab(it->second);
@@ -75,6 +130,12 @@ public:
     EXPECT_M(external->type() == adapter::Type::Object, "meta-schema must be an object");
 
     auto metaschema = external->as_object();
+    // All user-defined schemas MUST have a parent schema they point to
+    // Furthermore - in order to be well-formed, the schema chain must
+    // eventually point to one of the draft schemas. However - if a metaschema
+    // ends up in a recusive situation (e.g. A -> B -> A), it will not fail in
+    // the parsing step, but instead produce a malformed Schema object for
+    // validation.
     EXPECT_M(metaschema.contains("$schema"),
              "user-defined meta-schema must reference a base schema");
 
@@ -83,6 +144,10 @@ public:
     parent = vocab(URI(metaschema["$schema"].as_string()));
 
     if (metaschema.contains("$vocabulary")) {
+      // This is a silly thing we have to do because rather than have some kind
+      // of annotation/assertion divide marker for the format constraint, we
+      // instead use true/false in Draft2019-09, and have format-assertion/
+      // format-annotation vocabularies in Draft2020-12.
       auto [keywords, vocabularies] = extract_keywords(metaschema["$vocabulary"].as_object());
       parent.restrict(keywords, vocabularies);
     }
@@ -90,12 +155,39 @@ public:
     return parent;
   }
 
+  /**
+   * @brief Load the current location into the stack of dynamic ref/anchors so
+   * that we are able to properly resolve them (e.g. because an anchor got
+   * disabled).
+   *
+   * @param ref The current parsing location in the schema, which should
+   * correspond with an "$id" tag.
+   *
+   * @returns A scope object that will remove this set of dynamic ref/anchor
+   * resolutions from the stack when it exits scope.
+   */
   auto dynamic_scope(Reference const & ref) {
     URI const uri =
         ref.pointer().empty() ? ref.uri() : references_.relative_to_nearest_anchor(ref).uri();
     return active_dynamic_anchors_.scope(uri, dynamic_anchors_[uri]);
   }
 
+  /**
+   * @breif "Load" a requested document reference, which may exist in the
+   * current document, or in an external one.
+   *
+   * @param ref The location to load. Since there is no guarantee of direct
+   * relation between the current scope and this reference, we treat this like a
+   * jump.
+   *
+   * @param vocab The current vocabulary being used for parsing. It may be
+   * changed when loading the new reference if there is a "$schema" tag at the
+   * root.
+   *
+   * @returns The schema corresponding to the reference, if it can be located.
+   * As long as ref contains a valid URI/Anchor, we will return an Adapter, even
+   * if that adapter might point to a null JSON.
+   */
   std::optional<A> load(Reference const & ref, Vocabulary<A> const * vocab) {
     if (auto it = roots_.find(ref.root()); it != roots_.end()) {
       return ref.pointer().walk(it->second);
@@ -114,22 +206,47 @@ public:
       return ref.pointer().walk(it->second);
     }
 
-    // Will get called if the external schema does not declare a root document id?
+    // Will get called if the external schema does not declare a root id?
     return ref.pointer().walk(*external);
   }
 
+  /**
+   * @brief Transform a reference into its "canonical" form, in the context of
+   * the calling context (parent).
+   *
+   * @param ref The value of a "$ref" or "$dynamicRef" token, that is being
+   * looked up.
+   *
+   * @param parent The current lexical scope being operated in.
+   *
+   * @param dynamic_reference As an input, indicates that we are requesting a
+   * dynamic reference instead of a normal $ref.
+   * As an output, indicates that we effectively did resolve a dynamicRef and
+   * therefore should alter the dynamic scope in order to prevent infinite
+   * recursions in schema parsing.
+   *
+   * @returns ref, but in its canonical/lexical form.
+   */
   Reference canonicalize(Reference const & ref, Reference const & parent,
                          inout<bool> dynamic_reference) {
     URI const uri = [this, &ref, &parent]() {
+      // If there are no URIs involed (root schema does not set "$id")
+      // then we don't need to do anything clever
       if (ref.uri().empty() && parent.uri().empty()) {
         return references_.actual_parent_uri(parent);
       }
 
+      // At least one of ref and parent have a real URI/"$id" value. If it has a
+      // "root" (e.g. file:// or http://), then we don't need to do any clever
+      // alterations to identify the root.
       URI uri = ref.uri().empty() ? parent.uri() : ref.uri();
       if (not uri.is_rootless()) {
         return uri;
       }
 
+      // Now we need to compute that URI into the context of its parent, such
+      // as if ref := "file.json" and
+      // parent := "http://localhost:8000/schemas/root.json"
       URI base = references_.actual_parent_uri(parent);
       EXPECT_M(base.resource().rfind('/') != std::string::npos,
                "Unable to deduce root for relative uri " << uri << " (" << base << ")");
@@ -161,6 +278,25 @@ public:
   }
 
 private:
+  /**
+   * @brief Locate the dynamic reference being requested (if it is being
+   * requested).
+   *
+   * @param uri The dynamic reference uri being requested, generally empty.
+   *
+   * @param ref The value of a "$ref" or "$dynamicRef" token, that is being
+   * looked up. Primarily used for the anchor value, which is relevant for
+   * $dynamicRef/$dynamicAnchor.
+   *
+   * @param dynamic_reference As an input, indicates that we are requesting a
+   * dynamic reference instead of a normal $ref.
+   * As an output, indicates that we effectively did resolve a dynamicRef and
+   * therefore should alter the dynamic scope in order to prevent infinite
+   * recursions in schema parsing.
+   *
+   * @returns If there is a dynamic reference for the requested anchor, we
+   * return it.
+   */
   std::optional<Reference> dynamic(URI const & uri, Reference const & ref,
                                    inout<bool> dynamic_reference) {
     bool const anchor_is_dynamic = active_dynamic_anchors_.contains(ref.anchor());
@@ -184,12 +320,22 @@ private:
     return active_dynamic_anchors_.lookup(uri, ref.anchor());
   }
 
+  /**
+   * @brief Prepare a newly loaded document, importing schema information,
+   * ids, anchors, and dynamic anchors recursively.
+   *
+   * @param json The document being loaded
+   *
+   * @param vocab The vocabulary of legitimate keywords to iterate through to
+   * locate ids etc.
+   */
   void prime(Adapter auto const & json, Reference where, Vocabulary<A> const * vocab) {
     if (json.type() != adapter::Type::Object) {
       return;
     }
 
     auto schema = json.as_object();
+    // Update vocabulary to the latest form
     if (schema.contains("$schema")) {
       vocab = &this->vocab(URI(schema["$schema"].as_string()));
     }
@@ -204,16 +350,24 @@ private:
       }
       switch (value.type()) {
       case adapter::Type::Array: {
+        // Recurse through array-type schemas, such as anyOf, allOf, and oneOf
+        // we don't actually check that the key is one of those, because if we
+        // do something stupid like "not": [] then the parsing phase will return
+        // an error.
         for (auto const & [index, elem] : detail::enumerate(value.as_array())) {
           prime(elem, where / key / index, vocab);
         }
         break;
       }
       case adapter::Type::Object:
+        // Normal schema-type data such as not, additionalItems, etc. hold a
+        // schema as their immidiate child.
         if (not vocab->is_property_keyword(key)) {
           prime(value, where / key, vocab);
           break;
         }
+        // Special schemas are key-value stores, where the key is arbitrary and
+        // the value is the schema. Therefore we need to skip over the props.
         for (auto const & [prop, elem] : value.as_object()) {
           prime(elem, where / key / prop, vocab);
         }
@@ -223,6 +377,19 @@ private:
     }
   }
 
+  /**
+   * @brief Optionally register any root document at this location, as
+   * designated by things like the "$id" and "$anchor" tags.
+   *
+   * @param where The current lexical location in the schema - if there is an
+   * id/anchor tag, then we overwrite this value with the newly indicated root.
+   *
+   * @param version The current schema version - used to denote the name of the
+   * id tag, whether anchors are available, and how dynamic anchors function
+   * (Draft2019-09's recursiveAnchor vs. Draft2020-12's dynamicAnchor).
+   *
+   * @param json The document being primed.
+   */
   void prime_roots(Reference & where, schema::Version version, A const & json) {
     std::string const id = version <= schema::Version::Draft04 ? "id" : "$id";
     auto const schema = json.as_object();
@@ -233,7 +400,7 @@ private:
       if (root.uri().empty()) {
         root = RootReference(where.uri(), root.anchor());
       } else if (not root.uri().is_rootless() || where.uri().empty()) {
-        // By definition - rootless URIs cannot be relative
+        // By definition - rooted URIs cannot be relative
       } else if (root.uri().is_relative()) {
         root = RootReference(where.uri().parent() / root.uri(), root.anchor());
       } else {
@@ -255,6 +422,8 @@ private:
       where = references_.emplace(where, root);
     }
 
+    // Unfortunately - $recursiveAnchor and $dynamicAnchor use very different
+    // handling mechanisms, so it is not convenient to merge together
     if (version == schema::Version::Draft2019_09 && schema.contains("$recursiveAnchor") &&
         schema["$recursiveAnchor"].as_boolean()) {
       Anchor anchor;
@@ -283,6 +452,15 @@ private:
     }
   }
 
+  /**
+   * @brief Extract the supported keywords of a given selection of vocabularies
+   *
+   * @param vocabularies A map of the form (VocabularyURI => Enabled)
+   *
+   * @returns A pair containing:
+   * - All of the enabled keywords in the vocabulary
+   * - The list of enabled vocabulary metaschema (used for is_format_assertion)
+   */
   auto extract_keywords(ObjectAdapter auto const & vocabularies) const
       -> std::pair<std::unordered_set<std::string>, std::unordered_set<std::string>> {
     std::unordered_set<std::string> keywords;

+ 63 - 0
include/jvalidate/detail/scoped_state.h

@@ -6,15 +6,68 @@
 #define JVALIDATE_CONCAT2(A, B) A##B
 #define JVALIDATE_CONCAT(A, B) JVALIDATE_CONCAT2(A, B)
 
+/**
+ * @breif Create an anonymous scoped state object, which represents a temporary
+ * change of value. Since we only need to give ScopedState a name to ensure that
+ * its lifetime isn't for only a single line, this macro allows us to be more
+ * appropriately terse.
+ *
+ * @code
+ * {
+ *   scoped_state(property_, value...);
+ *   // do some things...
+ * }
+ * @endcode
+ *
+ * but this one provides exit guards in the same way that @see OnBlockExit does.
+ *
+ * @param prop A reference to a property that should be altered in the current
+ * function-scope. Is immediately modified to {@see value}, and will be returned
+ * to its original value when the current scope exits.
+ *
+ * @param value The new value to be set into prop.
+ */
 #define scoped_state(prop, value)                                                                  \
   auto JVALIDATE_CONCAT(scoped_state_, __LINE__) = detail::ScopedState(prop, value)
 
 namespace jvalidate::detail {
+/**
+ * @brief An object that alters a given value to a provided temporary, and then
+ * restores it to the original value upon being destructed. Because of this
+ * characteristic, the following two pieces of code are equivalent:
+ *
+ * @code
+ * T tmp = value...;
+ * std::swap(property_, tmp);
+ * // do some things...
+ * std::swap(property_, tmp);
+ * @endcode
+ *
+ * @code
+ * {
+ *   ScopedState tmp(property_, value...);
+ *   // do some things...
+ * }
+ * @endcode
+ */
 class ScopedState {
 private:
   std::function<void()> reset_;
 
 public:
+  /**
+   * @brief Initialize a scoped change-in-value to a property, properly guarded
+   * against early-returns, exceptions, and forgetting to reset the property.
+   *
+   * @tparam T The type of the value being updated
+   * @tparam S Any type that is compatible with T
+   *
+   * @param prop A reference to a property that should be altered in the current
+   * function-scope. Is immediately modified to {@see value}, and will be returned
+   * to its original value when the current scope exits.
+   *
+   * @param value The new value to be set into prop.
+   */
   template <typename T, typename S>
     requires(std::is_constructible_v<T, S>)
   ScopedState(T & prop, S value) : reset_([reset = prop, &prop]() { prop = reset; }) {
@@ -23,6 +76,16 @@ public:
 
   ~ScopedState() { reset_(); }
 
+  /**
+   * @brief By providing an explicit operator bool, it is possible to use
+   * ScopedState in an if statement, allowing you to write something like:
+   *
+   * @code
+   * if (scoped_state(property_, value...)) {
+   *   // do some things...
+   * }
+   * @endcode
+   */
   explicit operator bool() const { return true; }
 };
 }

+ 36 - 0
include/jvalidate/detail/string.h

@@ -1,3 +1,7 @@
+/**
+ * Utility functions for managing strings, specifically because C++'s
+ * std::string/std::regex is not well suited for UTF8 comprehensions.
+ */
 #pragma once
 
 #if __has_include(<unicode/std_string.h>)
@@ -7,6 +11,16 @@
 #endif
 
 namespace jvalidate::detail {
+/**
+ * @brief Calclates the string-length of the argument, treating multi-byte
+ * characters an unicode graphemes as single characters (which std::string
+ * cannot do).
+ *
+ * @param arg Any UTF8 compatible string (including a standard ASCII string)
+ *
+ * @returns A number no greater than arg.size(), depending on the number of
+ * graphemes/codepoints in the string.
+ */
 inline size_t length(std::string_view arg) {
 #ifdef JVALIDATE_HAS_ICU
   icu::UnicodeString ucs = icu::UnicodeString::fromUTF8(icu::StringPiece(arg));
@@ -16,17 +30,37 @@ inline size_t length(std::string_view arg) {
 #endif
 }
 
+/**
+ * @brief Ensures that any codepoints/graphemes in the given regular expression
+ * are wrapped in parenthesis in order to ensure that e.g. <PIRATE-EMOJI>*
+ * properly matches the entire emoji multiple times, instead of just the last
+ * byte of the string.
+ *
+ * Because we are only performing a regex search, and not matching/capturing
+ * groups - we don't care that all of these extra parenthesis cause us to
+ * generate new capture-groups or push some of the groups to a later point.
+ *
+ * @param arg A regular expression string, to be sanitized for UTF8 pattern-
+ * matching.
+ *
+ * @returns The regular expression, with some more parenthesis added.
+ */
 inline std::string regex_escape(std::string_view arg) {
 #ifdef JVALIDATE_HAS_ICU
   icu::UnicodeString const ucs = icu::UnicodeString::fromUTF8(icu::StringPiece(arg));
+  // Short-circuit if there are no multi-byte codepoints or graphemes, since
+  // C++ regexes don't have any problems with those.
   if (ucs.countChar32() == arg.size()) {
     return std::string(arg);
   }
 
   UErrorCode status = U_ZERO_ERROR;
+  // createCharacterInstance directly uses new - without any special allocation
+  // rules or cleanup, since the first argument is NULL.
   std::unique_ptr<icu::BreakIterator> iter(
       icu::BreakIterator::createCharacterInstance(NULL, status));
 
+  // This should never occur - unless there's like an alloc error
   if (U_FAILURE(status)) {
     return std::string(arg);
   }
@@ -36,6 +70,8 @@ inline std::string regex_escape(std::string_view arg) {
   int32_t start = iter->first();
   int32_t end = iter->next();
   while (end != icu::BreakIterator::DONE) {
+    // 0-or-1, 1-or-more, 0-or-more markings
+    // This could be optimized to only operate when on a multibyte character
     if (std::strchr("?*+", ucs.charAt(end))) {
       rval.append('(');
       rval.append(ucs, start, end - 1);

+ 42 - 0
include/jvalidate/detail/tribool.h

@@ -2,6 +2,42 @@
 
 #include <compare>
 
+/**
+ * @brief Generator-macro for creating instant tri-bools, a boolean-like type
+ * that has a "True" state, a "False" state, and a "Maybe"/"Indeterminate"
+ * state. {@see boost::tribool} for an example of this functionality.
+ *
+ * TriBool types obey the following rules of behavior (with T := True,
+ * F := False, M := Maybe):
+ *
+ * Unary operators operate as follows:
+ * | op \ in | T | F | M |
+ * |---------|---|---|---|
+ * |  bool() | T | F | T |
+ * |       ! | F | T | M |
+ * |---------|---|---|---|
+ *
+ * AND operates as follows:
+ * |   | T | F | M |
+ * |---|---|---|---|
+ * | T | T | F | T |
+ * | F | F | F | F |
+ * | M | T | F | M |
+ * |---|---|---|---|
+ *
+ * OR operates as follows:
+ * |   | T | F | M |
+ * |---|---|---|---|
+ * | T | T | T | T |
+ * | F | T | F | M |
+ * | M | T | M | M |
+ * |---|---|---|---|
+ *
+ * @param TypeName the name of the class being declared
+ * @param True the name of the truthy enumeration
+ * @param False the name of the falsy enumeration
+ * @param Maybe the name of the indeterminate enumeration
+ */
 #define JVALIDATE_TRIBOOL_TYPE(TypeName, True, False, Maybe)                                       \
   class TypeName {                                                                                 \
   public:                                                                                          \
@@ -11,12 +47,16 @@
     Enum state_;                                                                                   \
                                                                                                    \
   public:                                                                                          \
+    /* Translate a boolean into a tribool value, will never be Maybe */                            \
     TypeName(bool state) : state_(state ? True : False) {}                                         \
     TypeName(Enum state) : state_(state) {}                                                        \
                                                                                                    \
+    /* Convert to enum for use in switch() statements */                                           \
     operator Enum() const { return state_; }                                                       \
+    /* Convert to bool for use in if()/while() statements, requires static_cast otherwise */       \
     explicit operator bool() const { return state_ != False; }                                     \
                                                                                                    \
+    /* Inverts the tribool's value if it is already a concrete boolean type */                     \
     friend TypeName operator!(TypeName val) {                                                      \
       if (val.state_ == Maybe) {                                                                   \
         return Maybe;                                                                              \
@@ -24,6 +64,7 @@
       return val.state_ == False ? True : False;                                                   \
     }                                                                                              \
                                                                                                    \
+    /* Combines two tribools as if performing boolean-OR */                                        \
     friend TypeName operator|(TypeName lhs, TypeName rhs) {                                        \
       if (lhs.state_ == True || rhs.state_ == True) {                                              \
         return True;                                                                               \
@@ -34,6 +75,7 @@
       return False;                                                                                \
     }                                                                                              \
                                                                                                    \
+    /* Combines two tribools as if performing boolean-AND */                                       \
     friend TypeName operator&(TypeName lhs, TypeName rhs) {                                        \
       if (lhs.state_ == False || rhs.state_ == False) {                                            \
         return False;                                                                              \

+ 42 - 0
include/jvalidate/document_cache.h

@@ -8,6 +8,22 @@
 #include <jvalidate/uri.h>
 
 namespace jvalidate {
+/**
+ * @brief An Adapter-specific owning cache of documents that we need to load
+ * from an external resource. Because Adapter objects do not actually own the
+ * JSON objects that they wrap, we need some method of holding them in cache
+ * to prevent any use-after-free issues.
+ *
+ * As you can see from the constructor chain of {@see jvalidate::Schema},
+ * the user can either provide their own DocumentCache, which can then be shared
+ * between multiple root Schemas. Alternatively, they can provide a URIResolver,
+ * which is a function that takes a URI as input, a JSON as an out-parameter,
+ * and returns a boolean indicating success.
+ * If the URIResolver is provided, then we automatically construct a temporary
+ * DocumentCache around it for use in building the Schema. If neither the
+ * URIResolver nor a DocumentCache are provided, then we will be unable to
+ * resolve any external documents (even those on-disk).
+ */
 template <Adapter A> class DocumentCache {
 public:
   using value_type = typename A::value_type;
@@ -17,18 +33,44 @@ private:
   std::map<URI, value_type> cache_;
 
 public:
+  /**
+   * @brief Constructs an empty (read: cannot resolve anything) cache for
+   * external documents. Because there is no URIResolver, this object will
+   * always return a nullopt when trying to load anything.
+   */
   DocumentCache() = default;
+  /**
+   * @brief Construct a new DocumentCache from the given URIResolver function/
+   * function-object.
+   *
+   * @param resolve A function that consumes a URI and returns a boolean status
+   * code and concrete JSON object that can be stored in the Adapter type A as
+   * an out-parameter.
+   * This function is under no oblications to load any specific schemes from
+   * input URIs, so it is necessary to think carefully about the domain of
+   * schema references that you will be working on when implementing it.
+   * @see tests/selfvalidate_test.cxx#load_external_for_test for an example
+   * supporting http requests with libcurl and file requests with fstreams.
+   */
   DocumentCache(URIResolver<A> const & resolve) : resolve_(resolve) {}
 
   operator bool() const { return resolve_; }
 
   std::optional<A> try_load(URI const & uri) {
+    // Short circuit - without a URIResolver, we can always return nullopt,
+    // because this library doesn't promise to know how to load external
+    // schemas from any source (including files).
     if (not resolve_) {
       return std::nullopt;
     }
 
     auto [it, created] = cache_.try_emplace(uri);
     if (created && not resolve_(uri, it->second)) {
+      // Doing it this way skips out on a move operation for the JSON object,
+      // which could be useful if someone is using a legacy JSON object type.
+      // Since std::map promises stability we don't need to concern ourselves
+      // with reference invalidation even in a multi-threaded context - although
+      // this code is not threadsafe.
       cache_.erase(it);
       return std::nullopt;
     }

+ 104 - 0
include/jvalidate/schema.h

@@ -19,19 +19,48 @@
 #include <jvalidate/forward.h>
 
 namespace jvalidate::schema {
+/**
+ * @brief The real "Schema" class, representing a resolved node in a schema
+ * object. Each node is analogous to one layer of the schema json, and can
+ * represent either a "rejects all" schema, an "accepts all" schema, or a
+ * schema that has some selection of constraints and other features.
+ */
 class Node {
 private:
+  // Annotations for this schema...
   std::string description_;
+
+  // The default value to apply to an object if if does not exist - is invoked
+  // by the parent schema node, rather than this node itself.
   std::unique_ptr<adapter::Const const> default_{nullptr};
 
+  // Rejects-all can provide a custom reason under some circumstances.
   std::optional<std::string> rejects_all_;
+
+  // Actual constraint information
   std::optional<schema::Node const *> reference_{};
   std::unordered_map<std::string, std::unique_ptr<constraint::Constraint>> constraints_{};
   std::unordered_map<std::string, std::unique_ptr<constraint::Constraint>> post_constraints_{};
 
 public:
   Node() = default;
+  /**
+   * @brief Construct a schema that rejects all values, with a custom reason
+   *
+   * @param A user-safe justification of why this schema rejects everything.
+   * Depending on the compiler settings, this might be used to indicate things
+   * such as attempting to load a non-existant schema.
+   */
   Node(std::string const & rejection_reason) : rejects_all_(rejection_reason) {}
+
+  /**
+   * @brief Actually initialize this schema node. Unfortunately, we cannot use
+   * RAII for initializing this object because of certain optimizations and
+   * guardrails make reference captures breakable.
+   *
+   * @param context The currently operating context, including the actual JSON
+   * document being parsed at this moment.
+   */
   template <Adapter A> void construct(detail::ParserContext<A> context);
 
   bool is_pure_reference() const {
@@ -50,7 +79,30 @@ public:
   adapter::Const const * default_value() const { return default_.get(); }
 
 private:
+  /**
+   * @brief Resolve any dynamic anchors that are children of the current schema
+   * (if this is the root node of a schema). If it is not a root node (does not
+   * define "$id"), then this function does nothing.
+   *
+   * @tparam A The Adapter type for the JSON being worked with.
+   *
+   * @param context The currently operating context, including the actual JSON
+   * document being parsed at this moment.
+   *
+   * @returns If this is a root schema - a scope object to pop the dynamic scope
+   */
   template <Adapter A> detail::OnBlockExit resolve_anchor(detail::ParserContext<A> const & context);
+
+  /**
+   * @brief Resolves/embeds referenced schema information into this schema node.
+   *
+   * @tparam A The Adapter type for the JSON being worked with.
+   *
+   * @param context The currently operating context, including the actual JSON
+   * document being parsed at this moment.
+   *
+   * @returns true iff there was a reference tag to follow
+   */
   template <Adapter A> bool resolve_reference(detail::ParserContext<A> const & context);
 };
 }
@@ -162,11 +214,27 @@ public:
       : Schema(adapter::AdapterFor<JSON const>(json), std::forward<Args>(args)...) {}
 
 private:
+  /**
+   * @brief Cache an alias to a given schema, without ownership. alias_cache_ is
+   * a many-to-one association.
+   * Syntactic sugar for "add pointer to map and return".
+   *
+   * @param where The key aliasing the schema, which may also be the original
+   * lexical key.
+   *
+   * @param schema The pointer to a schema being stored
+   */
   schema::Node const * alias(detail::Reference const & where, schema::Node const * schema) {
     alias_cache_.emplace(where, schema);
     return schema;
   }
 
+  /**
+   * @brief Syntactic sugar for finding a map value as an optional instead of an
+   * iterator that may be "end".
+   *
+   * @param ref The key being looked up
+   */
   std::optional<schema::Node const *> from_cache(detail::Reference const & ref) {
     if (auto it = alias_cache_.find(ref); it != alias_cache_.end()) {
       return it->second;
@@ -175,6 +243,25 @@ private:
     return std::nullopt;
   }
 
+  /**
+   * @brief Resolve a $ref/$dynamicRef tag and construct or load from cache the
+   * schema that is being pointed to.
+   *
+   * @param context All of the context information about the schema, importantly
+   * the location information, {@see jvalidate::detail::ReferenceManager}, and
+   * {@see jvalidate::detail::Vocabulary}.
+   *
+   * @param dynamic_reference Is this request coming from a "$dynamicRef"/
+   * "$recursiveRef" tag, or a regular "$ref" tag.
+   *
+   * @returns A schema node, that will also be stored in a local cache.
+   *
+   * @throws std::runtime_error if the reference is to an unloaded URI, and we
+   * fail to load it. If the preprocessor definition
+   * JVALIDATE_LOAD_FAILURE_AS_FALSE_SCHEMA is set, then we instead return an
+   * always-false schema with a custom error message. This is primarily for use
+   * in writing tests for JSON-Schema's selfvalidation test cases.
+   */
   template <Adapter A>
   schema::Node const * resolve(detail::Reference const & ref,
                                detail::ParserContext<A> const & context, bool dynamic_reference) {
@@ -197,6 +284,13 @@ private:
 #endif
   }
 
+  /**
+   * @brief Fetch from cache or create a new schema node from the given context,
+   * which may be the result of resolving a reference {@see Schema::resolve}, or
+   * simply loading a child schema via {@see ParserContext::node}.
+   *
+   * @param context The current operating context of the schema
+   */
   template <Adapter A> schema::Node const * fetch_schema(detail::ParserContext<A> const & context) {
     // TODO(samjaffe): No longer promises uniqueness - instead track unique URI's
     if (std::optional cached = from_cache(context.dynamic_where)) {
@@ -204,15 +298,23 @@ private:
     }
 
     adapter::Type const type = context.schema.type();
+    // Boolean schemas were made universally permitted in Draft06. Before then,
+    // you could only use them for specific keywords, like additionalProperties.
     if (type == adapter::Type::Boolean && context.vocab->version() >= schema::Version::Draft06) {
       return alias(context.dynamic_where, context.schema.as_boolean() ? &accept_ : &reject_);
     }
 
+    // If the schema is not universal accept/reject, then it MUST be an object
     EXPECT_M(type == adapter::Type::Object, "invalid schema at " << context.dynamic_where);
+    // The empty object is equivalent to true, but is permitted in prior drafts
     if (context.schema.object_size() == 0) {
       return alias(context.dynamic_where, &accept_);
     }
 
+    // Because of the below alias() expression, and the above from_cache
+    // expression, it shouldn't be possible for try_emplace to not create a new
+    // schema node. We keep the check in anyway just in case somehow things have
+    // gotten into a malformed state.
     auto [it, created] = cache_.try_emplace(context.dynamic_where);
     EXPECT_M(created, "creating duplicate schema at... " << context.dynamic_where);
 
@@ -260,6 +362,8 @@ template <Adapter A> bool Node::resolve_reference(detail::ParserContext<A> const
     return true;
   }
 
+  // Prior to Draft2019-09, "$ref" was the only way to reference another
+  // schema (ignoring Draft03's extends keyword, which was more like allOf)
   if (context.vocab->version() < Version::Draft2019_09) {
     return false;
   }

+ 63 - 0
include/jvalidate/uri.h

@@ -7,6 +7,21 @@
 #include <jvalidate/detail/expect.h>
 
 namespace jvalidate {
+/**
+ * @brief A subsection of the Uniform Resource Identifier (URI) syntax as per
+ * RFC 3986 (https://datatracker.ietf.org/doc/html/rfc3986).
+ *
+ * This URI structure supports file paths (either as relative paths or as
+ * file:// URIs), URNs (because they are covered by the JSON-Schema test suite),
+ * and HTTP/S urls (assigning the consumer the responsibility of handling e.g.
+ * params).
+ * Additionally - this URI implementation does not support fragment parts. This
+ * is because in the context of a JSON schema - a fragment part is treated as
+ * either an Anchor, or as a JSON-Pointer.
+ *
+ * Because of these limitations, it is safe to treat this URI type as a tuple of
+ * (scheme, resource) without the connecting "://" or ":" field.
+ */
 class URI {
 private:
   std::string uri_;
@@ -17,14 +32,23 @@ public:
   URI() = default;
 
   explicit URI(std::string_view uri) : uri_(uri) {
+    // Special handling for some parsing situations where we know that an object
+    // is a URI (and thus don't need to call Reference(text).uri()) - but that
+    // URI may or may not contain a trailing hash (fragment indicator). This is the
+    // case with the "$schema" field, for example. For any given draft, the schema
+    // writer can start with "http://" OR "https://", and might end with a "#".
     if (not uri_.empty() && uri_.back() == '#') {
       uri_.pop_back();
     }
 
+    // Locate file://, http://, and https:// schemes
     if (size_t n = uri_.find("://"); n != std::string::npos) {
       scheme_ = n;
       resource_ = n + 3;
     } else if (uri_.starts_with("urn:")) {
+      // Note that we skip over the first colon, because the format of a URN
+      // token is "urn:format:data" - and therefore we want the scheme to be
+      // "urn:format", with the resource element to be "data".
       n = uri_.find(':', 4);
       scheme_ = n;
       resource_ = scheme_ + 1;
@@ -34,13 +58,52 @@ public:
   URI parent() const { return URI(std::string_view(uri_).substr(0, uri_.rfind('/'))); }
   URI root() const { return URI(std::string_view(uri_).substr(0, uri_.find('/', resource_))); }
 
+  /**
+   * @brief "Concatenate" two URIs together. Most of the logic behind this
+   * is done in {@see ReferenceManager}, rather than this class/function.
+   * Included below are some example use-cases:
+   *
+   * "file://A/B/C" / "D.json" => "file:/A/B/C/D.json"
+   * "http://example.com/foo" / "bar/baz.json" =>
+   *    "http://example.com/foo/bar/baz.json"
+   * "http://example.com/foo" / "/bar/baz.json" =>
+   *    "http://example.com/bar/baz.json" (notice that we lost foo)
+   *
+   * Note that example 3 is not achieved through this function, but through code
+   * in ReferenceManager that says something like:
+   * @code{.cpp}
+   * if (not relative.is_relative()) {
+   *    uri = uri.root() / relative;
+   * }
+   * @endcode
+   *
+   * @param relative The "relative" URI to append to this one. Even though I say
+   * relative, this URI may start with a leading "/", as long as it is rootless.
+   * In that case, this URI is expected to be an HTTP/S URI - and we are going
+   * to replace everything after the hostname with the contents of relative.
+   */
   URI operator/(URI const & relative) const {
     std::string div = uri_.ends_with("/") || relative.uri_.starts_with("/") ? "" : "/";
     return URI(uri_ + div + relative.uri_);
   }
 
+  /**
+   * @brief Synonym for "does not have a scheme", used for short-circuiting
+   * relative URI handling.
+   */
   bool is_rootless() const { return scheme_ == 0; }
+  /**
+   * @brief Even if a URI does not have a scheme, it could still be non-relative
+   * item, such as the URI "/dev/null" - which unambiguously refers to to root
+   * directory (in a *nix type filesystem) - as opposed to "dev/null", which
+   * could mean different resources in different parent contexts.
+   *
+   * Given that the "$id" that we set acts similar to `cd` in a shell, knowing
+   * this let's us know if we're looking at "an entirely separate location", or
+   * a "child/sibling location".
+   */
   bool is_relative() const { return is_rootless() && uri_[resource_] != '/'; }
+
   std::string_view scheme() const { return std::string_view(uri_).substr(0, scheme_); }
   std::string_view resource() const { return std::string_view(uri_).substr(resource_); }
 

+ 70 - 0
include/jvalidate/validator.h

@@ -9,6 +9,19 @@
 #include <jvalidate/validation_visitor.h>
 
 namespace jvalidate::detail {
+/**
+ * @brief An implementation of a regular expression "engine", for use with
+ * constraints like "pattern" and "patternProperties".
+ * Uses std::regex as its underlying implementation.
+ *
+ * While being std::regex means that it is the most sensible choice for a
+ * default RegexEngine, the performance of std::regex is generally the worst
+ * among C++ regex utilities, and it struggles to compile several patterns.
+ * See https://stackoverflow.com/questions/70583395/ for an explaination.
+ *
+ * If you need to use complicated patterns in your json schema, provide a
+ * RegexEngine compatible wrapper for a different library, such as re2.
+ */
 class StdRegexEngine {
 public:
   std::regex regex_;
@@ -20,6 +33,12 @@ public:
 }
 
 namespace jvalidate {
+/**
+ * @brief A validator is the tool by which a JSON object is actually validated
+ * against a schema.
+ *
+ * @tparam RE A type that can be used to solve regular expressions
+ */
 template <RegexEngine RE = detail::StdRegexEngine> class ValidatorT {
 private:
   schema::Node const & schema_;
@@ -27,9 +46,30 @@ private:
   std::unordered_map<std::string, RE> regex_cache_;
 
 public:
+  /**
+   * @brief Construct a Validator
+   *
+   * @param schema The root schema being validated against. Must outlive this.
+   *
+   * @param cfg Any special (runtime) configuration rules being applied to the
+   * validator.
+   */
   ValidatorT(schema::Node const & schema, ValidationConfig const & cfg = {})
       : schema_(schema), cfg_(cfg) {}
 
+  /**
+   * @brief Run validation on the given JSON
+   *
+   * @tparam A Any Adapter type, in principle a subclass of adapter::Adapter.
+   * Disallows mutation via ValidationConfig.construct_default_values
+   *
+   * @param json The value being validated
+   *
+   * @param result An optional out-param of detailed information about
+   * validation failures. If result is not provided, then the validator will
+   * terminate on the first error. Otherwise it will run through the entire
+   * schema to provide a record of all of the failures.
+   */
   template <Adapter A>
     requires(not MutableAdapter<A>)
   bool validate(A const & json, ValidationResult * result = nullptr) {
@@ -39,11 +79,38 @@ public:
         ValidationVisitor<A, RE>(json, schema_, cfg_, regex_cache_, result).validate());
   }
 
+  /**
+   * @brief Run validation on the given JSON
+   *
+   * @tparam A Any Adapter type that supports assignment, in principle a
+   * subclass of adapter::Adapter.
+   *
+   * @param json The value being validated. Because A is a reference-wrapper,
+   * the underlying value may be mutated.
+   *
+   * @param result An optional out-param of detailed information about
+   * validation failures. If result is not provided, then the validator will
+   * terminate on the first error. Otherwise it will run through the entire
+   * schema to provide a record of all of the failures.
+   */
   template <MutableAdapter A> bool validate(A const & json, ValidationResult * result = nullptr) {
     return static_cast<bool>(
         ValidationVisitor<A, RE>(json, schema_, cfg_, regex_cache_, result).validate());
   }
 
+  /**
+   * @brief Run validation on the given JSON
+   *
+   * @tparam JSON A concrete JSON type. Will be turned into an Adapter, or a
+   * MutableAdapter (if json is non-const and exists).
+   *
+   * @param json The value being validated.
+   *
+   * @param result An optional out-param of detailed information about
+   * validation failures. If result is not provided, then the validator will
+   * terminate on the first error. Otherwise it will run through the entire
+   * schema to provide a record of all of the failures.
+   */
   template <typename JSON>
     requires(not Adapter<JSON>)
   bool validate(JSON & json, ValidationResult * result = nullptr) {
@@ -51,6 +118,9 @@ public:
   }
 };
 
+/**
+ * @brief Syntactic sugar for ValidatorT<>.
+ */
 class Validator : public ValidatorT<> {
 public:
   using Validator::ValidatorT::ValidatorT;