uri.h 4.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118
  1. #pragma once
  2. #include <string>
  3. #include <string_view>
  4. #include <jvalidate/compat/compare.h>
  5. #include <jvalidate/detail/expect.h>
  6. namespace jvalidate {
  7. /**
  8. * @brief A subsection of the Uniform Resource Identifier (URI) syntax as per
  9. * RFC 3986 (https://datatracker.ietf.org/doc/html/rfc3986).
  10. *
  11. * This URI structure supports file paths (either as relative paths or as
  12. * file:// URIs), URNs (because they are covered by the JSON-Schema test suite),
  13. * and HTTP/S urls (assigning the consumer the responsibility of handling e.g.
  14. * params).
  15. * Additionally - this URI implementation does not support fragment parts. This
  16. * is because in the context of a JSON schema - a fragment part is treated as
  17. * either an Anchor, or as a JSON-Pointer.
  18. *
  19. * Because of these limitations, it is safe to treat this URI type as a tuple of
  20. * (scheme, resource) without the connecting "://" or ":" field.
  21. */
  22. class URI {
  23. private:
  24. std::string uri_;
  25. size_t scheme_{0};
  26. size_t resource_{0};
  27. public:
  28. URI() = default;
  29. explicit URI(std::string_view uri) : uri_(uri) {
  30. // Special handling for some parsing situations where we know that an object
  31. // is a URI (and thus don't need to call Reference(text).uri()) - but that
  32. // URI may or may not contain a trailing hash (fragment indicator). This is the
  33. // case with the "$schema" field, for example. For any given draft, the schema
  34. // writer can start with "http://" OR "https://", and might end with a "#".
  35. if (not uri_.empty() && uri_.back() == '#') {
  36. uri_.pop_back();
  37. }
  38. // Locate file://, http://, and https:// schemes
  39. if (size_t n = uri_.find("://"); n != std::string::npos) {
  40. scheme_ = n;
  41. resource_ = n + 3;
  42. } else if (uri_.starts_with("urn:")) {
  43. // Note that we skip over the first colon, because the format of a URN
  44. // token is "urn:format:data" - and therefore we want the scheme to be
  45. // "urn:format", with the resource element to be "data".
  46. n = uri_.find(':', 4);
  47. scheme_ = n;
  48. resource_ = scheme_ + 1;
  49. }
  50. }
  51. URI parent() const { return URI(std::string_view(uri_).substr(0, uri_.rfind('/'))); }
  52. URI root() const { return URI(std::string_view(uri_).substr(0, uri_.find('/', resource_))); }
  53. /**
  54. * @brief "Concatenate" two URIs together. Most of the logic behind this
  55. * is done in {@see ReferenceManager}, rather than this class/function.
  56. * Included below are some example use-cases:
  57. *
  58. * "file://A/B/C" / "D.json" => "file:/A/B/C/D.json"
  59. * "http://example.com/foo" / "bar/baz.json" =>
  60. * "http://example.com/foo/bar/baz.json"
  61. * "http://example.com/foo" / "/bar/baz.json" =>
  62. * "http://example.com/bar/baz.json" (notice that we lost foo)
  63. *
  64. * Note that example 3 is not achieved through this function, but through code
  65. * in ReferenceManager that says something like:
  66. * @code{.cpp}
  67. * if (not relative.is_relative()) {
  68. * uri = uri.root() / relative;
  69. * }
  70. * @endcode
  71. *
  72. * @param relative The "relative" URI to append to this one. Even though I say
  73. * relative, this URI may start with a leading "/", as long as it is rootless.
  74. * In that case, this URI is expected to be an HTTP/S URI - and we are going
  75. * to replace everything after the hostname with the contents of relative.
  76. */
  77. URI operator/(URI const & relative) const {
  78. std::string div = uri_.ends_with("/") || relative.uri_.starts_with("/") ? "" : "/";
  79. return URI(uri_ + div + relative.uri_);
  80. }
  81. /**
  82. * @brief Synonym for "does not have a scheme", used for short-circuiting
  83. * relative URI handling.
  84. */
  85. bool is_rootless() const { return scheme_ == 0; }
  86. /**
  87. * @brief Even if a URI does not have a scheme, it could still be non-relative
  88. * item, such as the URI "/dev/null" - which unambiguously refers to to root
  89. * directory (in a *nix type filesystem) - as opposed to "dev/null", which
  90. * could mean different resources in different parent contexts.
  91. *
  92. * Given that the "$id" that we set acts similar to `cd` in a shell, knowing
  93. * this let's us know if we're looking at "an entirely separate location", or
  94. * a "child/sibling location".
  95. */
  96. bool is_relative() const { return is_rootless() && uri_[resource_] != '/'; }
  97. std::string_view scheme() const { return std::string_view(uri_).substr(0, scheme_); }
  98. std::string_view resource() const { return std::string_view(uri_).substr(resource_); }
  99. explicit operator std::string const &() const { return uri_; }
  100. char const * c_str() const { return uri_.c_str(); }
  101. bool empty() const { return uri_.empty(); }
  102. friend std::ostream & operator<<(std::ostream & os, URI const & self) { return os << self.uri_; }
  103. auto operator<=>(URI const & lhs) const = default;
  104. };
  105. }