Pārlūkot izejas kodu

refactor: convert Pointer to use a sliding-window iterator instead of a token vector

Sam Jaffe 1 mēnesi atpakaļ
vecāks
revīzija
868a5222f5
1 mainītis faili ar 154 papildinājumiem un 96 dzēšanām
  1. 154 96
      include/jvalidate/detail/pointer.h

+ 154 - 96
include/jvalidate/detail/pointer.h

@@ -1,6 +1,5 @@
 #pragma once
 
-#include <algorithm>
 #include <cassert>
 #include <cstddef>
 #include <cstdlib>
@@ -8,13 +7,11 @@
 #include <stdexcept> // IWYU pragma: keep
 #include <string>
 #include <string_view>
-#include <utility>
-#include <variant>
-#include <vector>
 
 #include <jvalidate/compat/compare.h> // IWYU pragma: keep
 #include <jvalidate/detail/expect.h>
 #include <jvalidate/detail/number.h>
+#include <jvalidate/enum.h>
 #include <jvalidate/forward.h>
 
 namespace jvalidate::detail {
@@ -36,10 +33,10 @@ struct parent_t {};        // NOLINT(readability-identifier-naming)
 constexpr parent_t parent; // NOLINT(readability-identifier-naming)
 
 class Pointer {
+private:
+  class iterator; // NOLINT(readability-identifier-naming)
 public:
   Pointer() = default;
-  explicit(false) Pointer(std::vector<std::variant<std::string, size_t>> const & tokens)
-      : tokens_(tokens) {}
 
   /**
    * @brief Parse a JSON-Pointer from a serialized JSON-Pointer-String. In
@@ -48,65 +45,38 @@ public:
    * valid - and therefore that an invalidly formatter pointer string will
    * point to somewhere non-existant (since it will be used in schema handling)
    */
-  explicit(false) Pointer(std::string_view path) {
-    if (path.empty()) {
-      return;
-    }
+  explicit(false) Pointer(std::string_view path);
 
-    auto append_with_parse = [this](std::string in) {
-      // Best-guess that the input token text represents a numeric value.
-      // Technically - this could mean that we have an object key that is also
-      // a number (e.g. the jsonized form of map<int, T>), but we can generally
-      // assume that we are not going to use those kinds of paths in a reference
-      // field. Therefore we don't need to include any clever tricks for storage
-      if (not in.empty() && in.find_first_not_of("0123456789") == std::string::npos) {
-        tokens_.emplace_back(from_str<size_t>(in));
-        return;
+  static std::string deserialize(std::string_view view) {
+    std::string in(view);
+    for (size_t i = 0; i < in.size(); ++i) {
+      // Allow URL-Escaped characters (%\x\x) to be turned into their
+      // matching ASCII characters. This allows passing abnormal chars other
+      // than '/' and '~' to be handled in all contexts.
+      // TODO(samjaffe): Only do this if enc is hex-like (currently throws?)
+      if (in[i] == '%') {
+        std::string_view const enc = std::string_view(in).substr(i + 1, 2);
+        // NOLINTNEXTLINE(cppcoreguidelines-avoid-magic-numbers)
+        in.replace(i, 3, 1, from_str<char>(enc, 16));
+        continue;
       }
-
-      for (size_t i = 0; i < in.size(); ++i) {
-        // Allow URL-Escaped characters (%\x\x) to be turned into their
-        // matching ASCII characters. This allows passing abnormal chars other
-        // than '/' and '~' to be handled in all contexts.
-        // TODO(samjaffe): Only do this if enc is hex-like (currently throws?)
-        if (in[i] == '%') {
-          std::string_view const enc = std::string_view(in).substr(i + 1, 2);
-          // NOLINTNEXTLINE(cppcoreguidelines-avoid-magic-numbers)
-          in.replace(i, 3, 1, from_str<char>(enc, 16));
-          continue;
-        }
-        if (in[i] != '~') {
-          // Not a special char-sequence, does not need massaging
-          continue;
-        }
-        // In order to properly support '/' inside the property name of an
-        // object, we must escape it. The designers of the JSON-Pointer RFC
-        // chose to use '~' as a special signifier. Mapping '~0' to '~', and
-        // '~1' to '/'.
-        if (in[i + 1] == '0') {
-          in.replace(i, 2, 1, '~');
-        } else if (in[i + 1] == '1') {
-          in.replace(i, 2, 1, '/');
-        } else {
-          JVALIDATE_THROW(std::runtime_error, "Illegal ~ code");
-        }
+      if (in[i] != '~') {
+        // Not a special char-sequence, does not need massaging
+        continue;
+      }
+      // In order to properly support '/' inside the property name of an
+      // object, we must escape it. The designers of the JSON-Pointer RFC
+      // chose to use '~' as a special signifier. Mapping '~0' to '~', and
+      // '~1' to '/'.
+      if (in[i + 1] == '0') {
+        in.replace(i, 2, 1, '~');
+      } else if (in[i + 1] == '1') {
+        in.replace(i, 2, 1, '/');
+      } else {
+        JVALIDATE_THROW(std::runtime_error, "Illegal ~ code");
       }
-      tokens_.emplace_back(std::move(in));
-    };
-
-    // JSON-Pointers are required to start with a '/'.
-    EXPECT_M(path.starts_with('/'), "Missing leading '/' in JSON Pointer: " << path);
-    path.remove_prefix(1);
-    // The rules of JSON-Pointer is that if a token were to contain a '/' as a
-    // strict character: then that character would be escaped, using the above
-    // rules. We take advantage of string_view's sliding view to make iteration
-    // easy.
-    for (size_t pos = path.find('/'); pos != std::string::npos;
-         path.remove_prefix(pos + 1), pos = path.find('/')) {
-      append_with_parse(std::string(path.substr(0, pos)));
     }
-
-    append_with_parse(std::string(path));
+    return in;
   }
 
   /**
@@ -120,12 +90,7 @@ public:
    * @returns A new JSON Adapter at the pointed to location, or a generic null
    * JSON object.
    */
-  auto walk(Adapter auto document) const {
-    for (auto const & token : tokens_) {
-      document = std::visit([&document](auto const & next) { return document[next]; }, token);
-    }
-    return document;
-  }
+  auto walk(Adapter auto document) const;
 
   /**
    * @brief Fetch the last item in this pointer as a string (for easy
@@ -133,15 +98,9 @@ public:
    * improved annotation/error listing concepts described in the article:
    * https://json-schema.org/blog/posts/fixing-json-schema-output
    */
-  std::string back() const {
-    struct {
-      std::string operator()(std::string const & in) const { return in; }
-      std::string operator()(size_t in) const { return std::to_string(in); }
-    } g_as_str;
-    return tokens_.empty() ? "" : std::visit(g_as_str, tokens_.back());
-  }
+  std::string back() const;
 
-  bool empty() const { return tokens_.empty(); }
+  bool empty() const { return value_.empty(); }
 
   /**
    * @brief Determines if this JSON-Pointer is prefixed by the other
@@ -171,10 +130,7 @@ public:
    * obviously related - but we need to expose the functionalty to make that
    * check happen (that "/$defs/B/$defs/C" starts with "/$defs/B").
    */
-  bool starts_with(Pointer const & other) const {
-    return other.tokens_.size() <= tokens_.size() &&
-           std::equal(other.tokens_.begin(), other.tokens_.end(), tokens_.begin());
-  }
+  bool starts_with(Pointer const & other) const { return value_.starts_with(other.value_); }
 
   /**
    * @brief A corollary function to starts_with, create a "relative"
@@ -188,51 +144,153 @@ public:
    */
   Pointer relative_to(Pointer const & other) const {
     assert(starts_with(other));
-    return {
-        std::vector(tokens_.begin() + static_cast<ptrdiff_t>(other.tokens_.size()), tokens_.end())};
+    Pointer rval;
+    rval.value_ = value_.substr(other.value_.size());
+    return rval;
   }
 
-  Pointer parent(size_t levels = 1) const {
-    return {{tokens_.begin(), tokens_.end() - static_cast<ptrdiff_t>(levels)}};
-  }
+  Pointer parent(size_t levels = 1) const;
 
   Pointer & operator/=(Pointer const & relative) {
-    tokens_.insert(tokens_.end(), relative.tokens_.begin(), relative.tokens_.end());
+    value_ += relative.value_;
     return *this;
   }
 
   Pointer operator/(Pointer const & relative) const { return Pointer(*this) /= relative; }
 
-  Pointer & operator/=(parent_t) {
-    tokens_.pop_back();
-    return *this;
-  }
+  Pointer & operator/=(parent_t);
 
   Pointer operator/(parent_t) const { return parent(); }
 
   Pointer & operator/=(std::string_view key) {
-    tokens_.emplace_back(std::string(key));
+    value_ += '/';
+    value_ += std::string(key);
     return *this;
   }
 
   Pointer operator/(std::string_view key) const { return Pointer(*this) /= key; }
 
   Pointer & operator/=(size_t index) {
-    tokens_.emplace_back(index);
+    value_ += '/';
+    value_ += std::to_string(index);
     return *this;
   }
 
   Pointer operator/(size_t index) const { return Pointer(*this) /= index; }
 
+  iterator begin() const;
+  iterator end() const;
+
   friend std::ostream & operator<<(std::ostream & os, Pointer const & self) {
-    for (auto const & elem : self.tokens_) {
-      std::visit([&os](auto const & tok) { os << '/' << tok; }, elem);
-    }
-    return os;
+    return os << self.value_;
   }
   auto operator<=>(Pointer const &) const = default;
 
 private:
-  std::vector<std::variant<std::string, size_t>> tokens_;
+  std::string value_;
 };
+
+class Pointer::iterator {
+public:
+  using value_type = std::string_view;
+  using reference = std::string_view;
+  using pointer = void;
+  using difference_type = std::ptrdiff_t;
+  using iterator_category = std::bidirectional_iterator_tag;
+
+  explicit iterator(std::string_view view, size_t position = std::string_view::npos) : view_(view) {
+    if (position < view.size()) {
+      curr_ = position;
+      next_ = view_.find('/', curr_ + 1);
+    }
+  }
+
+  std::string_view operator*() const {
+    if (next_ == std::string_view::npos) {
+      return view_.substr(curr_ + 1);
+    }
+    return view_.substr(curr_ + 1, next_ - curr_ - 1);
+  }
+
+  iterator & operator++() {
+    curr_ = next_;
+    if (curr_ != std::string_view::npos) {
+      next_ = view_.find('/', curr_ + 1);
+    }
+    return *this;
+  }
+
+  iterator & operator--() {
+    next_ = curr_;
+    if (next_ == std::string_view::npos) {
+      curr_ = view_.rfind('/');
+    } else if (next_ != 0) {
+      curr_ = view_.rfind('/', next_ - 1);
+    }
+    return *this;
+  }
+
+  friend bool operator==(iterator const & lhs, iterator const & rhs) = default;
+
+private:
+  friend class Pointer;
+  std::string_view view_;
+  size_t curr_ = std::string_view::npos;
+  size_t next_ = std::string_view::npos;
+};
+
+inline auto Pointer::begin() const -> iterator { return iterator(value_, 0); }
+inline auto Pointer::end() const -> iterator { return iterator(value_); }
+
+inline std::string Pointer::back() const { return std::string(*--end()); }
+inline Pointer Pointer::parent(size_t levels) const {
+  if (levels == 0) {
+    return *this;
+  }
+
+  iterator it = end();
+  std::advance(it, -levels);
+
+  if (it.curr_ > value_.size()) {
+    return {};
+  }
+
+  Pointer rval = *this;
+  rval.value_.resize(it.curr_);
+  return rval;
+}
+
+inline Pointer & Pointer::operator/=(parent_t) {
+  iterator it = --end();
+  value_.resize(it.curr_ > value_.size() ? 0 : it.curr_);
+  return *this;
+}
+
+inline Pointer::Pointer(std::string_view path) : value_(path) {
+  if (path.empty()) {
+    return;
+  }
+
+  // JSON-Pointers are required to start with a '/'.
+  EXPECT_M(path.starts_with('/'), "Missing leading '/' in JSON Pointer: " << path);
+  // The rules of JSON-Pointer is that if a token were to contain a '/' as a
+  // strict character: then that character would be escaped, using the above
+  // rules. We take advantage of string_view's sliding view to make iteration
+  // easy.
+  for (std::string_view token : *this) {
+    deserialize(token);
+  }
+}
+
+inline auto Pointer::walk(Adapter auto document) const {
+  for (std::string_view token : *this) {
+    if (document.type() == adapter::Type::Array) {
+      document = document[from_str<size_t>(token)];
+      continue;
+    }
+
+    document = document[deserialize(token)];
+  }
+  return document;
+}
 }