| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118 |
- #pragma once
- #include <string>
- #include <string_view>
- #include <jvalidate/compat/compare.h>
- #include <jvalidate/detail/expect.h>
- namespace jvalidate {
- /**
- * @brief A subsection of the Uniform Resource Identifier (URI) syntax as per
- * RFC 3986 (https://datatracker.ietf.org/doc/html/rfc3986).
- *
- * This URI structure supports file paths (either as relative paths or as
- * file:// URIs), URNs (because they are covered by the JSON-Schema test suite),
- * and HTTP/S urls (assigning the consumer the responsibility of handling e.g.
- * params).
- * Additionally - this URI implementation does not support fragment parts. This
- * is because in the context of a JSON schema - a fragment part is treated as
- * either an Anchor, or as a JSON-Pointer.
- *
- * Because of these limitations, it is safe to treat this URI type as a tuple of
- * (scheme, resource) without the connecting "://" or ":" field.
- */
- class URI {
- private:
- std::string uri_;
- size_t scheme_{0};
- size_t resource_{0};
- public:
- URI() = default;
- explicit URI(std::string_view uri) : uri_(uri) {
- // Special handling for some parsing situations where we know that an object
- // is a URI (and thus don't need to call Reference(text).uri()) - but that
- // URI may or may not contain a trailing hash (fragment indicator). This is the
- // case with the "$schema" field, for example. For any given draft, the schema
- // writer can start with "http://" OR "https://", and might end with a "#".
- if (not uri_.empty() && uri_.back() == '#') {
- uri_.pop_back();
- }
- // Locate file://, http://, and https:// schemes
- if (size_t n = uri_.find("://"); n != std::string::npos) {
- scheme_ = n;
- resource_ = n + 3;
- } else if (uri_.starts_with("urn:")) {
- // Note that we skip over the first colon, because the format of a URN
- // token is "urn:format:data" - and therefore we want the scheme to be
- // "urn:format", with the resource element to be "data".
- n = uri_.find(':', 4);
- scheme_ = n;
- resource_ = scheme_ + 1;
- }
- }
- URI parent() const { return URI(std::string_view(uri_).substr(0, uri_.rfind('/'))); }
- URI root() const { return URI(std::string_view(uri_).substr(0, uri_.find('/', resource_))); }
- /**
- * @brief "Concatenate" two URIs together. Most of the logic behind this
- * is done in {@see ReferenceManager}, rather than this class/function.
- * Included below are some example use-cases:
- *
- * "file://A/B/C" / "D.json" => "file:/A/B/C/D.json"
- * "http://example.com/foo" / "bar/baz.json" =>
- * "http://example.com/foo/bar/baz.json"
- * "http://example.com/foo" / "/bar/baz.json" =>
- * "http://example.com/bar/baz.json" (notice that we lost foo)
- *
- * Note that example 3 is not achieved through this function, but through code
- * in ReferenceManager that says something like:
- * @code{.cpp}
- * if (not relative.is_relative()) {
- * uri = uri.root() / relative;
- * }
- * @endcode
- *
- * @param relative The "relative" URI to append to this one. Even though I say
- * relative, this URI may start with a leading "/", as long as it is rootless.
- * In that case, this URI is expected to be an HTTP/S URI - and we are going
- * to replace everything after the hostname with the contents of relative.
- */
- URI operator/(URI const & relative) const {
- std::string div = uri_.ends_with("/") || relative.uri_.starts_with("/") ? "" : "/";
- return URI(uri_ + div + relative.uri_);
- }
- /**
- * @brief Synonym for "does not have a scheme", used for short-circuiting
- * relative URI handling.
- */
- bool is_rootless() const { return scheme_ == 0; }
- /**
- * @brief Even if a URI does not have a scheme, it could still be non-relative
- * item, such as the URI "/dev/null" - which unambiguously refers to to root
- * directory (in a *nix type filesystem) - as opposed to "dev/null", which
- * could mean different resources in different parent contexts.
- *
- * Given that the "$id" that we set acts similar to `cd` in a shell, knowing
- * this let's us know if we're looking at "an entirely separate location", or
- * a "child/sibling location".
- */
- bool is_relative() const { return is_rootless() && uri_[resource_] != '/'; }
- std::string_view scheme() const { return std::string_view(uri_).substr(0, scheme_); }
- std::string_view resource() const { return std::string_view(uri_).substr(resource_); }
- explicit operator std::string const &() const { return uri_; }
- char const * c_str() const { return uri_.c_str(); }
- bool empty() const { return uri_.empty(); }
- friend std::ostream & operator<<(std::ostream & os, URI const & self) { return os << self.uri_; }
- auto operator<=>(URI const & lhs) const = default;
- };
- }
|