uri.h 4.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124
  1. #pragma once
  2. #include <cstdlib>
  3. #include <ostream>
  4. #include <string>
  5. #include <string_view>
  6. #include <jvalidate/compat/compare.h> // IWYU pragma: keep
  7. namespace jvalidate {
  8. /**
  9. * @brief A subsection of the Uniform Resource Identifier (URI) syntax as per
  10. * RFC 3986 (https://datatracker.ietf.org/doc/html/rfc3986).
  11. *
  12. * This URI structure supports file paths (either as relative paths or as
  13. * file:// URIs), URNs (because they are covered by the JSON-Schema test suite),
  14. * and HTTP/S urls (assigning the consumer the responsibility of handling e.g.
  15. * params).
  16. * Additionally - this URI implementation does not support fragment parts. This
  17. * is because in the context of a JSON schema - a fragment part is treated as
  18. * either an Anchor, or as a JSON-Pointer.
  19. *
  20. * Because of these limitations, it is safe to treat this URI type as a tuple of
  21. * (scheme, resource) without the connecting "://" or ":" field.
  22. */
  23. class URI {
  24. private:
  25. std::string uri_;
  26. size_t scheme_{0};
  27. size_t resource_{0};
  28. public:
  29. URI() = default;
  30. explicit URI(std::string_view uri) : uri_(uri) {
  31. // Special handling for some parsing situations where we know that an object
  32. // is a URI (and thus don't need to call Reference(text).uri()) - but that
  33. // URI may or may not contain a trailing hash (fragment indicator). This is the
  34. // case with the "$schema" field, for example. For any given draft, the schema
  35. // writer can start with "http://" OR "https://", and might end with a "#".
  36. if (not uri_.empty() && uri_.back() == '#') {
  37. uri_.pop_back();
  38. }
  39. // Locate file://, http://, and https:// schemes
  40. if (size_t pos = uri_.find("://"); pos != std::string::npos) {
  41. scheme_ = pos;
  42. resource_ = pos + 3;
  43. } else if (uri_.starts_with("urn:")) {
  44. // Note that we skip over the first colon, because the format of a URN
  45. // token is "urn:format:data" - and therefore we want the scheme to be
  46. // "urn:format", with the resource element to be "data".
  47. pos = uri_.find(':', 4);
  48. scheme_ = pos;
  49. resource_ = scheme_ + 1;
  50. }
  51. }
  52. URI parent() const { return URI(std::string_view(uri_).substr(0, uri_.rfind('/'))); }
  53. URI root() const { return URI(std::string_view(uri_).substr(0, uri_.find('/', resource_))); }
  54. /**
  55. * @brief "Concatenate" two URIs together. Most of the logic behind this
  56. * is done in {@see ReferenceManager}, rather than this class/function.
  57. * Included below are some example use-cases:
  58. *
  59. * "file://A/B/C" / "D.json" => "file:/A/B/C/D.json"
  60. * "http://example.com/foo" / "bar/baz.json" =>
  61. * "http://example.com/foo/bar/baz.json"
  62. * "http://example.com/foo" / "/bar/baz.json" =>
  63. * "http://example.com/bar/baz.json" (notice that we lost foo)
  64. *
  65. * Note that example 3 is not achieved through this function, but through code
  66. * in ReferenceManager that says something like:
  67. * @code{.cpp}
  68. * if (not relative.is_relative()) {
  69. * uri = uri.root() / relative;
  70. * }
  71. * @endcode
  72. *
  73. * @param relative The "relative" URI to append to this one. Even though I say
  74. * relative, this URI may start with a leading "/", as long as it is rootless.
  75. * In that case, this URI is expected to be an HTTP/S URI - and we are going
  76. * to replace everything after the hostname with the contents of relative.
  77. */
  78. URI operator/(URI const & relative) const {
  79. if (relative.uri_.starts_with("/")) {
  80. return URI(uri_ + relative.uri_);
  81. }
  82. if (relative.uri_.starts_with("./")) {
  83. return URI(uri_ + relative.uri_.substr(1));
  84. }
  85. return URI(uri_ + "/" + relative.uri_);
  86. }
  87. /**
  88. * @brief Synonym for "does not have a scheme", used for short-circuiting
  89. * relative URI handling.
  90. */
  91. bool is_rootless() const { return scheme_ == 0; }
  92. /**
  93. * @brief Even if a URI does not have a scheme, it could still be non-relative
  94. * item, such as the URI "/dev/null" - which unambiguously refers to to root
  95. * directory (in a *nix type filesystem) - as opposed to "dev/null", which
  96. * could mean different resources in different parent contexts.
  97. *
  98. * Given that the "$id" that we set acts similar to `cd` in a shell, knowing
  99. * this let's us know if we're looking at "an entirely separate location", or
  100. * a "child/sibling location".
  101. */
  102. bool is_relative() const { return is_rootless() && uri_[resource_] != '/'; }
  103. std::string_view scheme() const { return std::string_view(uri_).substr(0, scheme_); }
  104. std::string_view resource() const { return std::string_view(uri_).substr(resource_); }
  105. explicit operator std::string const &() const { return uri_; }
  106. char const * c_str() const { return uri_.c_str(); }
  107. bool empty() const { return uri_.empty(); }
  108. friend std::ostream & operator<<(std::ostream & os, URI const & self) { return os << self.uri_; }
  109. auto operator<=>(URI const & lhs) const = default;
  110. };
  111. }