#pragma once #include #include #include #include namespace jvalidate { /** * @brief A subsection of the Uniform Resource Identifier (URI) syntax as per * RFC 3986 (https://datatracker.ietf.org/doc/html/rfc3986). * * This URI structure supports file paths (either as relative paths or as * file:// URIs), URNs (because they are covered by the JSON-Schema test suite), * and HTTP/S urls (assigning the consumer the responsibility of handling e.g. * params). * Additionally - this URI implementation does not support fragment parts. This * is because in the context of a JSON schema - a fragment part is treated as * either an Anchor, or as a JSON-Pointer. * * Because of these limitations, it is safe to treat this URI type as a tuple of * (scheme, resource) without the connecting "://" or ":" field. */ class URI { private: std::string uri_; size_t scheme_{0}; size_t resource_{0}; public: URI() = default; explicit URI(std::string_view uri) : uri_(uri) { // Special handling for some parsing situations where we know that an object // is a URI (and thus don't need to call Reference(text).uri()) - but that // URI may or may not contain a trailing hash (fragment indicator). This is the // case with the "$schema" field, for example. For any given draft, the schema // writer can start with "http://" OR "https://", and might end with a "#". if (not uri_.empty() && uri_.back() == '#') { uri_.pop_back(); } // Locate file://, http://, and https:// schemes if (size_t n = uri_.find("://"); n != std::string::npos) { scheme_ = n; resource_ = n + 3; } else if (uri_.starts_with("urn:")) { // Note that we skip over the first colon, because the format of a URN // token is "urn:format:data" - and therefore we want the scheme to be // "urn:format", with the resource element to be "data". n = uri_.find(':', 4); scheme_ = n; resource_ = scheme_ + 1; } } URI parent() const { return URI(std::string_view(uri_).substr(0, uri_.rfind('/'))); } URI root() const { return URI(std::string_view(uri_).substr(0, uri_.find('/', resource_))); } /** * @brief "Concatenate" two URIs together. Most of the logic behind this * is done in {@see ReferenceManager}, rather than this class/function. * Included below are some example use-cases: * * "file://A/B/C" / "D.json" => "file:/A/B/C/D.json" * "http://example.com/foo" / "bar/baz.json" => * "http://example.com/foo/bar/baz.json" * "http://example.com/foo" / "/bar/baz.json" => * "http://example.com/bar/baz.json" (notice that we lost foo) * * Note that example 3 is not achieved through this function, but through code * in ReferenceManager that says something like: * @code{.cpp} * if (not relative.is_relative()) { * uri = uri.root() / relative; * } * @endcode * * @param relative The "relative" URI to append to this one. Even though I say * relative, this URI may start with a leading "/", as long as it is rootless. * In that case, this URI is expected to be an HTTP/S URI - and we are going * to replace everything after the hostname with the contents of relative. */ URI operator/(URI const & relative) const { std::string div = uri_.ends_with("/") || relative.uri_.starts_with("/") ? "" : "/"; return URI(uri_ + div + relative.uri_); } /** * @brief Synonym for "does not have a scheme", used for short-circuiting * relative URI handling. */ bool is_rootless() const { return scheme_ == 0; } /** * @brief Even if a URI does not have a scheme, it could still be non-relative * item, such as the URI "/dev/null" - which unambiguously refers to to root * directory (in a *nix type filesystem) - as opposed to "dev/null", which * could mean different resources in different parent contexts. * * Given that the "$id" that we set acts similar to `cd` in a shell, knowing * this let's us know if we're looking at "an entirely separate location", or * a "child/sibling location". */ bool is_relative() const { return is_rootless() && uri_[resource_] != '/'; } std::string_view scheme() const { return std::string_view(uri_).substr(0, scheme_); } std::string_view resource() const { return std::string_view(uri_).substr(resource_); } explicit operator std::string const &() const { return uri_; } char const * c_str() const { return uri_.c_str(); } bool empty() const { return uri_.empty(); } friend std::ostream & operator<<(std::ostream & os, URI const & self) { return os << self.uri_; } auto operator<=>(URI const & lhs) const = default; }; }