ソースを参照

Merge branch 'release/clean'

* release/clean:
  Improve docs
  Add a few missing integral types
  Test coverage
  Modify cast utils to support ADL
  Reimplement reverse
  Make tokenizer support string_view.
Sam Jaffe 2 年 前
コミット
0c4a42ca82

+ 97 - 0
README.md

@@ -0,0 +1,97 @@
+#  String Utilities in C++
+
+A couple of utilities for improving string usability
+
+## Join
+
+Concatenate the elements of a container with a joining token. Uses ostreams.
+
+## Tokenizer/Split
+
+Split a string into a vector of strings. There are two different versions of the tokenizer: normal and escapable. The EscapableTokenizer cannot return string\_views, because it may have to doctor the contents.
+
+Provides the following features:
+
+### Ignore Empty Tokens
+Discard any token which is the empty string, enabled by default.
+
+``` c++
+string_utils::Tokenizer split(",");
+std::string_view const input = "A,B,C,,D";
+
+split(input); // [ "A", "B", "C", "D" ]
+
+split.ignore_empty_tokens(false);
+split(input); // [ "A", "B", "C", "", "D" ]
+```
+
+
+### Max Outputs
+Limit the number of outputs returned, the default is _infinite_ (size_t::max).
+
+``` c++
+string_utils::Tokenizer split(",");
+std::string_view const input = "A,B,C,D";
+
+split(input).size(); // 4
+
+split.max_outputs(3);
+split(input).size(); // 3
+```
+
+### Truncate
+If there would be more tokens in the result than the maximum allowable, you can choose to either return all of the rest-tokens in the last token element, or return only the Nth concrete token.
+
+``` c++
+string_utils::Tokenizer split(",");
+split.max_outputs(3);
+
+std::string_view const input = "A,B,C,D";
+
+split(input); // [ "A", "B", "C,D" ]
+
+split.truncate(true);
+split(input); // [ "A", "B", "C" ]
+```
+
+### Reverse Search Order
+Instead of tokenizing the string from front-to-back, do it from back-to-front.
+
+``` c++
+string_utils::Tokenizer split(",");
+split.max_outputs(3);
+split.reverse_search(true);
+
+std::string_view const input = "A,B,C,D";
+
+split(input); // [ "A,B", "C", "D" ]
+
+split.truncate(true);
+split(input); // [ "B", "C", "D" ]
+```
+
+### Quotes
+By providing a special quote character (with an optional escape sequence), it is possible to parse more complicated expressions. This is useful for example with CSV data, as you may need to represent a comma inside one of the fields.
+
+In order to allow the regular tokenize to return a vector of string\_views, this is stored in a different class.
+
+``` c++
+string_utils::Tokenizer split(",");
+// CSVs use a quotation mark for quotes, and we'll define doubled quotes as an escaped quote
+string_utils::EscapableTokenizer esplit = split.escapable({'"', R"("")"});
+
+std::string_view const input = R"(A,B,"C,D",""E"",F)";
+
+esplit(input); // [ "A", "B", "C,D", "\"E\"", "F" ]
+```
+
+## Cast - Coercing types from strings
+
+In GoogleMock, if you don't want to define an ostream operator for your type, you can define a function `PrintTo(T const &, std::ostream*)` in the same namespace as `T`. GoogleMock then uses ADL to find that function and use it to print out the formatted version.
+
+There are two different functions that are important: the singular token cast and the multi-token cast.
+
+```
+bool cast(T &out, std::string_view);
+bool cast(T &out, std::vector<std::string_view> const &);
+```

+ 90 - 111
include/string_utils/cast.h

@@ -21,156 +21,135 @@
 #include "string_utils/forwards.h"
 #include "string_utils/traits.h"
 
-#define SAFE_NUMBER_PARSE(func, ...) \
-  [](char const *in, char **out) { return func(in, out, ##__VA_ARGS__); }
+#define SAFE_NUMBER_PARSE(func, ...)                                           \
+  [](char const * in, char ** out) { return func(in, out, ##__VA_ARGS__); }
 
 namespace string_utils::detail {
-template <typename T, typename F>
-bool cast_number(std::string_view str, T & to, F func) noexcept;
-template <typename S> std::vector<S> keyval(S const &input);
-}
-
-namespace string_utils {
-
-inline bool cast(std::string_view str, std::string & to) noexcept {
-  to = std::string(str);
-  return true;
-}
-
-inline bool cast(std::string_view str, long & to) noexcept {
-  return detail::cast_number(str, to, SAFE_NUMBER_PARSE(std::strtol, 10));
-}
-
-inline bool cast(std::string_view str, long long & to) noexcept {
-  return detail::cast_number(str, to, SAFE_NUMBER_PARSE(std::strtoll, 10));
-}
-
-inline bool cast(std::string_view str, float & to) noexcept {
-  return detail::cast_number(str, to, SAFE_NUMBER_PARSE(std::strtof));
+template <typename Actual, typename Out>
+bool ctor_cast(Out & out, std::string_view str) noexcept {
+  auto [rval, found] = cast<Actual>(str);
+  if (found) { out = Out(std::move(rval)); }
+  return found;
 }
 
-inline bool cast(std::string_view str, double & to) noexcept {
-  return detail::cast_number(str, to, SAFE_NUMBER_PARSE(std::strtod));
+template <typename Tuple, size_t... Is>
+bool cast_tuple(std::vector<std::string_view> const & str, Tuple & to,
+                std::index_sequence<Is...>) noexcept {
+  return ((cast(std::get<Is>(to), str[Is])) && ...);
 }
 
-inline bool cast(std::string_view str, long double & to) noexcept {
-  return detail::cast_number(str, to, SAFE_NUMBER_PARSE(std::strtold));
+template <typename T, typename F>
+bool cast_number(std::string_view str, T & to, F func) noexcept {
+  char * counter = nullptr;
+  to = func(str.data(), &counter);
+  return counter == str.end();
 }
 
-inline bool cast(std::string_view str, int & to) noexcept {
-  auto [tmp, success] = cast<long>(str);
-  to = static_cast<int>(tmp);
-  return success && tmp == static_cast<long>(to);
+inline std::vector<std::string_view> keyval(std::string_view input) noexcept {
+  if (size_t const pos = input.find('='); pos < input.size()) {
+    return {input.substr(0, pos), input.substr(pos + 1)};
+  }
+  return {input};
 }
 
-inline bool cast(std::string_view str, bool & to) noexcept {
+inline bool cast_bool(bool & out, std::string_view str) noexcept {
   if (any_of(str, "true", "TRUE", "YES", "1")) {
-    to = true;
+    out = true;
     return true;
   } else if (any_of(str, "false", "FALSE", "NO", "0")) {
-    to = false;
+    out = false;
     return true;
   }
   return false;
 }
-
 }
 
+// This should be placed last in the file
 namespace string_utils {
 
-template <typename V, typename S, typename T> bool maybe_cast(S const & str, T & to) noexcept {
-  auto [rval, found] = cast<V>(str);
-  if (found) { to = std::move(rval); }
-  return found;
-}
-
-template <typename T> struct cast_helper<T, std::enable_if_t<detail::is_container<T>{}>> {
-  template <typename S>
-  bool operator()(std::vector<S> const &strs, T & to) const noexcept {
-    for (S const &elem : strs) {
-      if constexpr (detail::is_associative<T>{}) {
-        auto [tmp, success] = cast<typename T::value_type>(detail::keyval(elem));
-        if (!success) { return false; }
-        to.insert(std::move(tmp));
-      } else {
-        auto [tmp, success] = cast<typename T::value_type>(elem);
-        if (!success) { return false; }
-        to.insert(to.end(), std::move(tmp));
-      }
-    }
+template <typename Out> bool cast(Out & out, std::string_view str) noexcept {
+  if constexpr (std::is_same_v<Out, long>) {
+    return detail::cast_number(str, out, SAFE_NUMBER_PARSE(std::strtol, 10));
+  } else if constexpr (std::is_same_v<Out, unsigned long>) {
+    return detail::cast_number(str, out, SAFE_NUMBER_PARSE(std::strtoul, 10));
+  } else if constexpr (std::is_same_v<Out, long long>) {
+    return detail::cast_number(str, out, SAFE_NUMBER_PARSE(std::strtoll, 10));
+  } else if constexpr (std::is_same_v<Out, unsigned long long>) {
+    return detail::cast_number(str, out, SAFE_NUMBER_PARSE(std::strtoull, 10));
+  } else if constexpr (std::is_same_v<Out, float>) {
+    return detail::cast_number(str, out, SAFE_NUMBER_PARSE(std::strtof));
+  } else if constexpr (std::is_same_v<Out, double>) {
+    return detail::cast_number(str, out, SAFE_NUMBER_PARSE(std::strtod));
+  } else if constexpr (std::is_same_v<Out, long double>) {
+    return detail::cast_number(str, out, SAFE_NUMBER_PARSE(std::strtold));
+  } else if constexpr (std::is_same_v<Out, bool>) {
+    return detail::cast_bool(out, str);
+  } else if constexpr (std::is_same_v<Out, char>) {
+    out = str[0];
+    return str.size() == 1;
+  } else if constexpr (std::is_constructible_v<Out, std::string_view>) {
+    out = Out(str);
     return true;
+  } else if constexpr (std::is_integral_v<Out>) {
+    using V = std::conditional_t<std::is_unsigned_v<Out>, unsigned long, long>;
+    auto [tmp, success] = cast<V>(str);
+    out = static_cast<Out>(tmp);
+    return success && tmp == static_cast<V>(out);
+  } else {
+    static_assert(detail::always_false<Out>{}, "No match for cast(string)");
   }
-};
-
-template <typename T> struct cast_helper<T, std::enable_if_t<detail::is_tuple<T>{}>> {
-  template <typename S, size_t... Is>
-  bool cast_tuple(S const & str, T & to, std::index_sequence<Is...>) const noexcept {
-    return ((cast(str[Is], std::get<Is>(to))) && ...);
-  }
-  
-  template <typename S> bool operator()(std::vector<S> const &strs, T & to) const noexcept {
-    constexpr size_t N = std::tuple_size_v<T>;
-    return strs.size() == N && cast_tuple(strs, to, std::make_index_sequence<N>{});
-  }
-};
-
-template <typename T>
-struct cast_helper<T, std::enable_if_t<std::is_constructible_v<T, std::string_view>>> {
-  bool operator()(std::string_view str, T & to) const noexcept {
-    to = str;
-    return true;
-  }
-};
-
-template <typename... Ts>
-struct cast_helper<std::variant<Ts...>> {
-  bool operator()(std::string_view str, std::variant<Ts...> & to) const noexcept {
-    return (maybe_cast<Ts>(str, to) || ...);
-  }
-};
-
-template <typename T>
-struct cast_helper<std::optional<T>> {
-  bool operator()(std::string_view str, std::optional<T> & to) const noexcept {
-    return maybe_cast<T>(str, to) || true;
-  }
-};
-
 }
 
-namespace string_utils::detail {
-template <typename Tuple, size_t... Is>
-bool cast_tuple(std::vector<std::string> const & str, Tuple & to,
-                std::index_sequence<Is...>) noexcept {
-  return ((cast(str[Is], std::get<Is>(to))) && ...);
+template <typename... Ts>
+bool cast(std::variant<Ts...> & out, std::string_view str) noexcept {
+  return (detail::ctor_cast<Ts>(out, str) || ...);
 }
 
-template <typename T, typename F>
-bool cast_number(std::string_view str, T & to, F func) noexcept {
-  char *counter = nullptr;
-  to = func(str.data(), &counter);
-  return counter == str.end();
+template <typename Out>
+bool cast(std::optional<Out> & out, std::string_view str) noexcept {
+  return detail::ctor_cast<Out>(out, str) || true;
 }
 
-template <typename S> std::vector<S> keyval(S const &input) {
-  size_t const pos = input.find('=');
-  return pos == S::npos ? std::vector{input}
-                        : std::vector{input.substr(0, pos), input.substr(pos + 1)};
-}
+template <typename Out>
+bool cast(Out & out, std::vector<std::string_view> const & strs) noexcept {
+  if constexpr (detail::is_associative_v<Out>) {
+    for (auto elem : strs) {
+      auto [tmp, success] =
+          cast<typename Out::value_type>(detail::keyval(elem));
+      if (!success) { return false; }
+      out.insert(std::move(tmp));
+    }
+    return true;
+  } else if constexpr (detail::is_container_v<Out>) {
+    for (auto elem : strs) {
+      auto [tmp, success] = cast<typename Out::value_type>(elem);
+      if (!success) { return false; }
+      out.insert(out.end(), std::move(tmp));
+    }
+    return true;
+  } else if constexpr (detail::is_tuple_v<Out>) {
+    constexpr size_t N = std::tuple_size_v<Out>;
+    return strs.size() == N &&
+           detail::cast_tuple(strs, out, std::make_index_sequence<N>{});
+  } else {
+    static_assert(detail::always_false<Out>{},
+                  "No match for cast(vector<string>)");
+  }
 }
 
-// This should be placed last in the file
-namespace string_utils {
 template <typename T> std::pair<T, bool> cast(std::string_view str) noexcept {
   std::pair<detail::decay_t<T>, bool> rval;
-  rval.second = cast(str, rval.first);
+  using ::string_utils::cast;
+  rval.second = cast(rval.first, str);
   return rval;
 }
 
 template <typename T, typename S>
 std::pair<T, bool> cast(std::vector<S> const & strs) noexcept {
+  std::vector<std::string_view> tmp{strs.begin(), strs.end()};
   std::pair<detail::decay_t<T>, bool> rval;
-  rval.second = cast(strs, rval.first);
+  using ::string_utils::cast;
+  rval.second = cast(rval.first, tmp);
   return rval;
 }
 }

+ 9 - 6
include/string_utils/forwards.h

@@ -11,14 +11,17 @@
 #include "string_utils/traits.h"
 
 namespace string_utils {
-// A helper object for providing partial specializations for casting
-template <typename, typename = void> struct cast_helper;
+
+class Tokenizer;
+class EscapedTokenizer;
 
 // The main parser
 template <typename T> std::pair<T, bool> cast(std::string_view str) noexcept;
-template <typename T, typename S> std::pair<T, bool> cast(std::vector<S> const &str) noexcept;
+template <typename T, typename S>
+std::pair<T, bool> cast(std::vector<S> const & str) noexcept;
+
+template <typename Out> bool cast(Out & out, std::string_view str) noexcept;
 
-template <typename S, typename T,
-          typename = std::enable_if_t<detail::has_result<cast_helper<T>(S, T&)>{}>>
-bool cast(S const &str, T & to) noexcept { return cast_helper<T>{}(str, to); }
+template <typename Out>
+bool cast(Out & out, std::vector<std::string_view> const & strs) noexcept;
 }

+ 41 - 20
include/string_utils/tokenizer.h

@@ -11,48 +11,69 @@
 #include <string>
 #include <vector>
 
+#include "string_utils/forwards.h"
+
 namespace string_utils {
 
-class tokenizer {
+class Tokenizer {
 public:
-  static constexpr size_t const infinite_outputs{~size_t(0)};
-  struct quote {
+  static constexpr size_t const UNLIMITED = ~0ul;
+
+protected:
+  struct Quote {
     char on;
-    std::string escaped;
+    std::string_view escaped;
   };
 
 private:
-  std::string divider_;
-  quote quote_;
-  size_t max_outputs_{infinite_outputs};
+  std::string_view divider_;
+  Quote quote_{'\0', ""};
+  size_t max_outputs_{UNLIMITED};
   bool truncate_{false};
   bool ignore_empty_tokens_{true};
   bool escapable_{false};
   bool reverse_search_{false};
 
 public:
-  tokenizer(std::string divider, struct quote quote = {'\0', ""});
+  explicit Tokenizer(std::string_view divider);
 
-  tokenizer & max_outputs(size_t new_max_outputs);
-  tokenizer & truncate(bool new_truncate_overage);
-  tokenizer & ignore_empty_tokens(bool new_ignore_empty_tokens);
-  tokenizer & escapable(bool new_escapable);
-  tokenizer & reverse_search(bool new_reverse);
+  Tokenizer & max_outputs(size_t new_max_outputs);
+  Tokenizer & truncate(bool new_truncate_overage);
+  Tokenizer & ignore_empty_tokens(bool new_ignore_empty_tokens);
+  Tokenizer & reverse_search(bool new_reverse);
+  [[nodiscard]] EscapedTokenizer escapable(Quote quote = Quote{'\0', ""}) const;
 
-  std::vector<std::string> operator()(std::string input) const;
+  std::vector<std::string> operator()(std::string && input) const;
+  std::vector<std::string_view> operator()(std::string_view input) const;
 
-private:
-  size_t max_outputs() const;
+protected:
+  Tokenizer & quote(Quote quote);
+  std::string unescape(std::string_view token) const;
+};
+
+class EscapedTokenizer : public Tokenizer {
+public:
+  explicit EscapedTokenizer(std::string_view divider,
+                            Quote quote = Quote{'\0', ""});
+  explicit EscapedTokenizer(Tokenizer impl, Quote quote);
+
+  EscapedTokenizer & max_outputs(size_t new_max_outputs);
+  EscapedTokenizer & truncate(bool new_truncate_overage);
+  EscapedTokenizer & ignore_empty_tokens(bool new_ignore_empty_tokens);
+  EscapedTokenizer & reverse_search(bool new_reverse);
+
+  std::vector<std::string> operator()(std::string && input) const;
+  std::vector<std::string> operator()(std::string_view input) const;
 };
 
 inline auto split(std::string const & data, std::string const & on,
-                  size_t max = tokenizer::infinite_outputs) {
-  return tokenizer{on}.max_outputs(max)(data);
+                  size_t max = Tokenizer::UNLIMITED) {
+  return Tokenizer{on}.max_outputs(max)(data);
 }
 
 inline auto rsplit(std::string const & data, std::string const & on,
-                   size_t max = tokenizer::infinite_outputs) {
-  return tokenizer{on}.reverse_search(true).max_outputs(max)(data);
+                   size_t max = Tokenizer::UNLIMITED) {
+  return Tokenizer{on}.reverse_search(true).max_outputs(max)(data);
 }
 
 }

+ 23 - 7
include/string_utils/traits.h

@@ -12,28 +12,44 @@
 #include <type_traits>
 
 namespace string_utils::detail {
-template <typename> struct always_false : std::false_type {};
 template <typename, typename = void> struct has_result : std::false_type {};
 template <typename F>
 struct has_result<F, std::void_t<std::result_of_t<F>>> : std::true_type {};
 
 template <typename, typename = void> struct is_tuple : std::false_type {};
 template <typename T>
-struct is_tuple<T, std::void_t<typename std::tuple_size<T>::type>> : std::true_type {};
+struct is_tuple<T, std::void_t<typename std::tuple_size<T>::type>>
+    : std::true_type {};
 
 template <typename, typename = void> struct is_associative : std::false_type {};
 template <typename T>
-struct is_associative<T, std::void_t<typename T::mapped_type>> : std::true_type {};
+struct is_associative<T, std::void_t<typename T::mapped_type>>
+    : std::true_type {};
 
 template <typename C>
-using insert_t = decltype(std::declval<C>().insert(std::declval<typename C::iterator>(),
-                                                   std::declval<typename C::value_type>()));
+using insert_t =
+    decltype(std::declval<C>().insert(std::declval<typename C::iterator>(),
+                                      std::declval<typename C::value_type>()));
 template <typename, typename = void> struct is_container : std::false_type {};
 template <typename T>
-struct is_container<T, std::void_t<typename T::value_type, insert_t<T>>> : std::true_type {};
+struct is_container<T, std::void_t<typename T::value_type, insert_t<T>>>
+    : std::true_type {};
 
 template <typename T> struct decay { using type = std::decay_t<T>; };
 template <template <typename...> class C, typename... Ts>
-struct decay<C<Ts...>> { using type = C<std::decay_t<Ts>...>; };
+struct decay<C<Ts...>> {
+  using type = C<std::decay_t<Ts>...>;
+};
+}
+
+namespace string_utils::detail {
+template <typename> struct always_false : std::false_type {};
+
+template <typename T> constexpr bool has_result_v = has_result<T>::value;
+template <typename T> constexpr bool is_tuple_v = is_tuple<T>::value;
+template <typename T>
+constexpr bool is_associative_v = is_associative<T>::value;
+template <typename T> constexpr bool is_container_v = is_container<T>::value;
+
 template <typename T> using decay_t = typename decay<std::decay_t<T>>::type;
 }

+ 136 - 54
src/tokenizer.cxx

@@ -8,103 +8,185 @@
 
 #include "string_utils/tokenizer.h"
 
-namespace string_utils {
+namespace {
+bool is_escaped(std::string_view str, std::size_t p) {
+  if (p == 0 || str[p - 1] != '\\') { return false; }
+  return ((p - str.find_last_not_of('\\', p - 1) - 1) & 1) == 1;
+}
 
-template <typename C> static void reverse(C & str) {
-  std::reverse(str.begin(), str.end());
+auto promote(std::vector<std::string_view> input) {
+  return std::vector<std::string>(input.cbegin(), input.cend());
 }
 
-tokenizer::tokenizer(std::string divider, struct quote quote)
-    : divider_(std::move(divider)), quote_(std::move(quote)) {}
+bool current_token_is(std::string_view token, size_t offset,
+                      std::string_view find) {
+  return find.size() > 0 and token.compare(offset, find.size(), find) == 0;
+};
+}
+
+namespace string_utils {
 
-tokenizer & tokenizer::max_outputs(size_t new_max_outputs) {
+Tokenizer::Tokenizer(std::string_view divider) : divider_(divider) {}
+
+Tokenizer & Tokenizer::max_outputs(size_t new_max_outputs) {
   max_outputs_ = new_max_outputs;
   return *this;
 }
 
-tokenizer & tokenizer::truncate(bool new_truncate) {
+Tokenizer & Tokenizer::truncate(bool new_truncate) {
   truncate_ = new_truncate;
   return *this;
 }
 
-tokenizer & tokenizer::ignore_empty_tokens(bool new_ignore_empty_tokens) {
+Tokenizer & Tokenizer::ignore_empty_tokens(bool new_ignore_empty_tokens) {
   ignore_empty_tokens_ = new_ignore_empty_tokens;
   return *this;
 }
 
-tokenizer & tokenizer::escapable(bool new_escapable) {
-  escapable_ = new_escapable;
+Tokenizer & Tokenizer::reverse_search(bool new_reverse) {
+  reverse_search_ = new_reverse;
   return *this;
 }
 
-tokenizer & tokenizer::reverse_search(bool new_reverse) {
-  if (reverse_search_ != new_reverse) {
-    reverse(divider_);
-    reverse(quote_.escaped);
-  }
-  reverse_search_ = new_reverse;
+EscapedTokenizer::EscapedTokenizer(std::string_view divider, Quote quote)
+    : EscapedTokenizer(Tokenizer(divider), quote) {}
+
+EscapedTokenizer::EscapedTokenizer(Tokenizer tok, Quote quote)
+    : Tokenizer(tok) {
+  this->quote(quote);
+}
+
+EscapedTokenizer & EscapedTokenizer::max_outputs(size_t new_max_outputs) {
+  Tokenizer::max_outputs(new_max_outputs);
   return *this;
 }
 
-static std::size_t countback(std::string const & str, std::size_t p, char c) {
-  if (p == 0 || str[p - 1] != c) return 0;
-  return p - str.find_last_not_of(c, p - 1) - 1;
+EscapedTokenizer & EscapedTokenizer::truncate(bool new_truncate) {
+  Tokenizer::truncate(new_truncate);
+  return *this;
 }
 
-std::vector<std::string> tokenizer::operator()(std::string input) const {
-  auto equals_from = [&input](std::string const & token, std::size_t from) {
-    return token.size() + from < input.size() &&
-           std::strncmp(input.c_str() + from, token.c_str(), token.size()) == 0;
-  };
+EscapedTokenizer &
+EscapedTokenizer::ignore_empty_tokens(bool new_ignore_empty_tokens) {
+  Tokenizer::ignore_empty_tokens(new_ignore_empty_tokens);
+  return *this;
+}
 
-  if (reverse_search_) { reverse(input); }
+EscapedTokenizer & EscapedTokenizer::reverse_search(bool new_reverse) {
+  Tokenizer::reverse_search(new_reverse);
+  return *this;
+}
 
-  std::vector<std::string> rval;
-  std::string buffer;
-  buffer.reserve(input.size());
+Tokenizer & Tokenizer::quote(Quote quote) {
+  quote_ = quote;
+  escapable_ = true;
+  return *this;
+}
+
+EscapedTokenizer Tokenizer::escapable(Quote quote) const {
+  return EscapedTokenizer(*this, quote);
+}
+
+std::vector<std::string_view>
+Tokenizer::operator()(std::string_view input) const {
+  std::vector<std::string_view> rval;
   // If max_outputs_ == infinite_outputs, this will be infinite enough to work
   // since we'll hit overflow on the string itself before this.
   std::size_t const max = max_outputs_ - !truncate_;
   std::size_t const qsz = quote_.escaped.size();
-  std::size_t from = 0;
+  size_t span = 0;
+  auto index = [this, &input, &span]() {
+    return reverse_search_ ? input.size() - span - 1 : span;
+  };
   bool in_quote{false};
-  for (std::size_t pos = 0; pos < input.size() && rval.size() < max; ++pos) {
-    // We check for escaped-quotes before we check for quotes to minimise
-    // complexity. Once in a quote, we simply append everything without checking
-    // for the divider until the end quote is encountered (escaped quotes are
-    // processed normally).
-    if (qsz > 0 && equals_from(quote_.escaped, pos)) {
-      buffer.append(1, quote_.on);
-      pos += qsz - 1;
-    } else if (input[pos] == quote_.on) {
-      in_quote = !in_quote;
-    } else if (in_quote || !equals_from(divider_, pos)) {
-      buffer.append(1, input[pos]);
-    } else if (escapable_ && countback(input, pos, '\\') % 2) {
-      buffer.back() = input[pos];
-    } else if (!in_quote) {
-      if (!ignore_empty_tokens_ || buffer.size()) { rval.emplace_back(buffer); }
-      from = pos + 1;
-      buffer.clear();
+  while (not input.empty() and rval.size() < max and span == 0) {
+    for (span = 0; span < input.size(); ++span) {
+      // We check for escaped-quotes before we check for quotes to minimise
+      // complexity. Once in a quote, we simply append everything without
+      // checking for the divider until the end quote is encountered (escaped
+      // quotes are processed normally).
+      if (not quote_.escaped.empty() and
+          current_token_is(input, index(), quote_.escaped)) {
+        span += qsz - 1;
+      } else if (input[index()] == quote_.on) {
+        in_quote = !in_quote;
+      } else if (in_quote or not current_token_is(input, index(), divider_)) {
+        continue;
+      } else if (escapable_ and is_escaped(input, index())) {
+        continue;
+      } else if (reverse_search_) {
+        if (not ignore_empty_tokens_ or span > 0) {
+          rval.push_back(input.substr(index() + 1, span));
+        }
+        input.remove_suffix(span + divider_.size());
+        span = 0;
+        break;
+      } else {
+        if (not ignore_empty_tokens_ or span > 0) {
+          rval.push_back(input.substr(0, span));
+        }
+        input.remove_prefix(span + divider_.size());
+        span = 0;
+        break;
+      }
     }
   }
   // Due to the special handling rules of the truncate feature, we need
   // to add an additional layer of handling around empty tokens and buffer
-  if (ignore_empty_tokens_ && equals_from(divider_, from)) { ++from; }
+  if (ignore_empty_tokens_ and current_token_is(input, span, divider_)) {
+    input.remove_prefix(divider_.size());
+  }
   // Additionally, we do not want to include the final element if there is
   // actually no data remaining in the buffer/input string, even when we permit
   // empty tokens in our output.
-  if (rval.size() < max_outputs_ && !(buffer.empty() && from == input.size())) {
-    rval.emplace_back(buffer.empty() ? input.substr(from) : buffer);
+  if (rval.size() < max_outputs_ and not input.empty()) {
+    rval.push_back(input);
   }
+  if (reverse_search_) { std::reverse(rval.begin(), rval.end()); }
+  return rval;
+}
 
-  if (reverse_search_) {
-    reverse(rval);
-    for (auto & str : rval) {
-      reverse(str);
+std::string Tokenizer::unescape(std::string_view token) const {
+  std::string rval;
+  rval.reserve(token.size());
+  for (size_t i = 0; i < token.size(); ++i) {
+    /*if (current_token_is(token, i, "\\\\")) {
+      rval.append(1, '\\');
+      ++i;
+    } else */
+    // The order of these tests is important!
+    // First we check if the current token is an escaped-quote - if so,
+    // replace it with the regular quote.
+    if (current_token_is(token, i, quote_.escaped)) {
+      rval.append(1, quote_.on);
+    } else if (token[i] == '\\' and current_token_is(token, i + 1, divider_)) {
+      // Then we check if we're looking at an escaped divider
+      rval.append(divider_);
+      i += divider_.size();
+    } else if (token[i] != quote_.on) {
+      // Lastly - we remote unescaped quotes
+      rval.append(1, token[i]);
     }
   }
   return rval;
 }
 
+std::vector<std::string> Tokenizer::operator()(std::string && input) const {
+  return promote(operator()(std::string_view(input)));
+}
+
+std::vector<std::string>
+EscapedTokenizer::operator()(std::string_view input) const {
+  auto rval = promote(Tokenizer::operator()(std::string_view(input)));
+  for (auto & token : rval) {
+    token = unescape(token);
+  }
+  return rval;
+}
+
+std::vector<std::string>
+EscapedTokenizer::operator()(std::string && input) const {
+  return operator()(std::string_view(input));
+}
+
 }

+ 2 - 0
string-utils.xcodeproj/project.pbxproj

@@ -70,6 +70,7 @@
 		CD266880252FFA7E00B3E667 /* tokenizer_test.cxx */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = tokenizer_test.cxx; sourceTree = "<group>"; };
 		CD266886252FFAAE00B3E667 /* string_utils-test.xctest */ = {isa = PBXFileReference; explicitFileType = wrapper.cfbundle; includeInIndex = 0; path = "string_utils-test.xctest"; sourceTree = BUILT_PRODUCTS_DIR; };
 		CD26688A252FFAAE00B3E667 /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = "<group>"; };
+		CD87CD7E29BCC63600C5949D /* README.md */ = {isa = PBXFileReference; lastKnownFileType = net.daringfireball.markdown; path = README.md; sourceTree = "<group>"; };
 		CDC883E228560A7C0088C91E /* any_of.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = any_of.h; sourceTree = "<group>"; };
 		CDC883E328560A7C0088C91E /* tokenizer.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = tokenizer.h; sourceTree = "<group>"; };
 		CDC883E428560A7C0088C91E /* cast.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = cast.h; sourceTree = "<group>"; };
@@ -98,6 +99,7 @@
 		CD266859252FF4B600B3E667 = {
 			isa = PBXGroup;
 			children = (
+				CD87CD7E29BCC63600C5949D /* README.md */,
 				CD26686D252FF51F00B3E667 /* GoogleMock.xcodeproj */,
 				CD26686A252FF4E100B3E667 /* string_utils */,
 				CDC883E028560A7C0088C91E /* include */,

+ 72 - 6
test/cast_test.cxx

@@ -10,6 +10,7 @@
 
 #include "xcode_gtest_helper.h"
 
+using testing::ElementsAre;
 using testing::FieldsAre;
 using testing::Pair;
 using namespace string_utils;
@@ -95,33 +96,98 @@ TEST(CastKeyValTest, FailsOnTooFewTokens) {
 }
 
 TEST(CastKeyValTest, ParsesTokens) {
-  auto [value, success] = cast<pair_t>(std::vector{"key","value"});
+  auto [value, success] = cast<pair_t>(std::vector{"key", "value"});
   EXPECT_TRUE(success);
   EXPECT_THAT(value, Pair("key", "value"));
 }
 
 TEST(CastKeyValTest, FailsOnTooManyTokens) {
-  auto [value, success] = cast<pair_t>(std::vector{"key","value","mapping"});
+  auto [value, success] = cast<pair_t>(std::vector{"key", "value", "mapping"});
   EXPECT_FALSE(success);
 }
 
 TEST(CastTupleTest, FailsOnTooFewTokens) {
-  auto [value, success] = cast<tuple_t>(std::vector{"0","A"});
+  auto [value, success] = cast<tuple_t>(std::vector{"0", "A"});
   EXPECT_FALSE(success);
 }
 
 TEST(CastTupleTest, FailsOnTooManyTokens) {
-  auto [value, success] = cast<tuple_t>(std::vector{"0","1","A","B"});
+  auto [value, success] = cast<tuple_t>(std::vector{"0", "1", "A", "B"});
   EXPECT_FALSE(success);
 }
 
 TEST(CastTupleTest, ParsesIfAllGood) {
-  auto [value, success] = cast<tuple_t>(std::vector{"0","1","A"});
+  auto [value, success] = cast<tuple_t>(std::vector{"0", "1", "A"});
   EXPECT_TRUE(success);
   EXPECT_THAT(value, FieldsAre(0, 1, "A"));
 }
 
 TEST(CastTupleTest, FailsOnAnyParseError) {
-  auto [value, success] = cast<tuple_t>(std::vector{"0","Q","A"});
+  auto [value, success] = cast<tuple_t>(std::vector{"0", "Q", "A"});
+  EXPECT_FALSE(success);
+}
+
+struct Bindable {
+  int a;
+  std::string b;
+};
+
+bool cast(Bindable & out, std::vector<std::string_view> const & strs) {
+  return strs.size() == 2 && cast(out.a, strs[0]) && cast(out.b, strs[1]);
+}
+
+struct Positive {
+  int value;
+};
+
+bool cast(Positive & out, std::string_view str) {
+  return cast(out.value, str) && out.value > 0;
+}
+
+TEST(CastTest, UsesADL) {
+  auto [value, success] = cast<Bindable>(std::vector{"0", "Q"});
+  EXPECT_TRUE(success);
+}
+
+TEST(CastTest, CanImposeConstraint) {
+  auto [value, success] = cast<Positive>("0");
+  EXPECT_FALSE(success);
+}
+
+TEST(CastContainerTest, CanCastVector) {
+  auto [value, success] =
+      cast<std::vector<std::string>>(std::vector{"0", "Q", "A"});
+  EXPECT_TRUE(success);
+  EXPECT_THAT(value, ElementsAre("0", "Q", "A"));
+}
+
+TEST(CastContainerTest, FailsIfBadValueCast) {
+  auto [value, success] = cast<std::vector<int>>(std::vector{"0", "Q", "A"});
+  EXPECT_FALSE(success);
+}
+
+TEST(CastContainerTest, CanCastSet) {
+  auto [value, success] =
+      cast<std::set<std::string>>(std::vector{"0", "0", "A"});
+  EXPECT_TRUE(success);
+  EXPECT_THAT(value, ElementsAre("0", "A"));
+}
+
+TEST(CastContainerTest, CanCastMap) {
+  auto [value, success] = cast<std::map<std::string, std::string>>(
+      std::vector{"0=1", "0=2", "A=B"});
+  EXPECT_TRUE(success);
+  EXPECT_THAT(value, ElementsAre(Pair("0", "1"), Pair("A", "B")));
+}
+
+TEST(CastContainerTest, FailsIfBadCastOnKey) {
+  auto [value, success] =
+      cast<std::map<int, std::string>>(std::vector{"0=1", "0=2", "A=B"});
+  EXPECT_FALSE(success);
+}
+
+TEST(CastContainerTest, FailsIfBadCastOnValue) {
+  auto [value, success] =
+      cast<std::map<std::string, int>>(std::vector{"0=1", "0=2", "A=B"});
   EXPECT_FALSE(success);
 }

+ 26 - 18
test/tokenizer_test.cxx

@@ -14,92 +14,100 @@ using namespace string_utils;
 
 TEST(TokenizerTest, SplitsStringOverToken) {
   std::string const input = "A.B.C.D";
-  std::vector<std::string> const expected{"A", "B", "C", "D"};
+  std::vector<std::string_view> const expected{"A", "B", "C", "D"};
   EXPECT_THAT(split(input, "."), expected);
 }
 
 TEST(TokenizerTest, SplitsStringUpToNTimes) {
   std::string const input = "A.B.C.D";
-  std::vector<std::string> const expected{"A", "B", "C.D"};
+  std::vector<std::string_view> const expected{"A", "B", "C.D"};
   EXPECT_THAT(split(input, ".", 3), expected);
 }
 
 TEST(TokenizerTest, IgnoresEmptyElementsAtStart) {
   std::string const input = ".A.B.C";
-  std::vector<std::string> const expected{"A", "B", "C"};
+  std::vector<std::string_view> const expected{"A", "B", "C"};
   EXPECT_THAT(split(input, ".", 3), expected);
 }
 
 TEST(TokenizerTest, IgnoresEmptyElements) {
   std::string const input = "A..B.C";
-  std::vector<std::string> const expected{"A", "B", "C"};
+  std::vector<std::string_view> const expected{"A", "B", "C"};
   EXPECT_THAT(split(input, ".", 3), expected);
 }
 
 TEST(TokenizerTest, IgnoresEmptyElementsOnEnd) {
   std::string const input = "A.B..C";
-  std::vector<std::string> const expected{"A", "B", "C"};
+  std::vector<std::string_view> const expected{"A", "B", "C"};
   EXPECT_THAT(split(input, ".", 3), expected);
 }
 
 TEST(TokenizerTest, TruncateDiscardsOverageInsteadOfNotParsingPast) {
   std::string const input = "A.B.C.D";
-  std::vector<std::string> const expected{"A", "B", "C"};
-  EXPECT_THAT(tokenizer(".").max_outputs(3).truncate(true)(input), expected);
+  std::vector<std::string_view> const expected{"A", "B", "C"};
+  EXPECT_THAT(Tokenizer(".").max_outputs(3).truncate(true)(input), expected);
+}
+
+TEST(TokenizerTest, RTruncateDiscardsOverageInsteadOfNotParsingPast) {
+  std::string const input = "A.B.C.D";
+  std::vector<std::string_view> const expected{"B", "C", "D"};
+  EXPECT_THAT(
+      Tokenizer(".").max_outputs(3).truncate(true).reverse_search(true)(input),
+      expected);
 }
 
 TEST(TokenizerTest, EmptyIsPlacedCorrectlyWhenEnabled) {
   std::string const input = "A..B.C";
-  std::vector<std::string> const expected{"A", "", "B.C"};
-  EXPECT_THAT(tokenizer(".").max_outputs(3).ignore_empty_tokens(false)(input),
+  std::vector<std::string_view> const expected{"A", "", "B.C"};
+  EXPECT_THAT(Tokenizer(".").max_outputs(3).ignore_empty_tokens(false)(input),
               expected);
 }
 
 TEST(TokenizerTest, MaxSizeWithEmptyCanResultInTokenWithDividerPrefix) {
   std::string const input = "A.B..C";
-  std::vector<std::string> const expected{"A", "B", ".C"};
-  EXPECT_THAT(tokenizer(".").max_outputs(3).ignore_empty_tokens(false)(input),
+  std::vector<std::string_view> const expected{"A", "B", ".C"};
+  EXPECT_THAT(Tokenizer(".").max_outputs(3).ignore_empty_tokens(false)(input),
               expected);
 }
 
 TEST(TokenizerTest, EscapableTokensStickTogether) {
   std::string const input = R"(A B\ C)";
   std::vector<std::string> const expected{"A", "B C"};
-  EXPECT_THAT(tokenizer(" ").escapable(true)(input), expected);
+  EXPECT_THAT(EscapedTokenizer(" ")(input), expected);
 }
 
 TEST(TokenizerTest, CorrectlySplitsWhenEvenEscapes) {
   std::string const input = R"(A B\\ C)";
   std::vector<std::string> const expected{"A", R"(B\\)", "C"};
-  EXPECT_THAT(tokenizer(" ").escapable(true)(input), expected);
+  EXPECT_THAT(EscapedTokenizer(" ")(input), expected);
 }
 
 TEST(TokenizerTest, QuotesAreDiscarded) {
   std::string const input = R"(A,"B",C)";
   std::vector<std::string> const expected{"A", "B", "C"};
-  EXPECT_THAT(tokenizer(",", {'"'})(input), expected);
+  EXPECT_THAT(EscapedTokenizer(",", {'"'})(input), expected);
 }
 
 TEST(TokenizerTest, QuotedTokensStickTogether) {
   std::string const input = R"(A,"B,C")";
   std::vector<std::string> const expected{"A", "B,C"};
-  EXPECT_THAT(tokenizer(",", {'"'})(input), expected);
+  EXPECT_THAT(EscapedTokenizer(",", {'"'})(input), expected);
 }
 
 TEST(TokenizerTest, QuotedTokensAreEscapable) {
   std::string const input = R"(A,"B\",C")";
   std::vector<std::string> const expected{"A", "B\",C"};
-  EXPECT_THAT(tokenizer(",", {'"', "\\\""})(input), expected);
+  EXPECT_THAT(EscapedTokenizer(",", {'"', "\\\""})(input), expected);
 }
 
 TEST(TokenizerTest, QuoteTokenLiteralIsApplicable) {
   std::string const input = R"(A,"B"",C")";
   std::vector<std::string> const expected{"A", "B\",C"};
-  EXPECT_THAT(tokenizer(",", {'"', "\"\""})(input), expected);
+  EXPECT_THAT(EscapedTokenizer(",", {'"', "\"\""})(input), expected);
 }
 
 TEST(TokenizerTest, QuotesDontNeedToBeAtStartAndEnd) {
   std::string const input = R"(A,B",C")";
   std::vector<std::string> const expected{"A", "B,C"};
-  EXPECT_THAT(tokenizer(",", {'"'})(input), expected);
+  EXPECT_THAT(EscapedTokenizer(",", {'"'})(input), expected);
 }