Sfoglia il codice sorgente

feat: add a string_utils::join and string_utils::rsplit

Sam Jaffe 3 anni fa
parent
commit
3fe60f3c46

+ 29 - 0
include/string_utils/join.h

@@ -0,0 +1,29 @@
+//
+//  join.h
+//  string-utils
+//
+//  Created by Sam Jaffe on 6/18/22.
+//  Copyright © 2022 Sam Jaffe. All rights reserved.
+//
+
+#pragma once
+
+#include <sstream>
+#include <string>
+
+namespace string_utils {
+template <typename C>
+std::string join(C const & container, std::string_view token) {
+  if (container.empty()) { return ""; }
+  std::stringstream ss;
+
+  auto it = container.begin();
+  ss << *it;
+
+  while (++it != container.end()) {
+    ss << token << *it;
+  }
+
+  return ss.str();
+}
+}

+ 8 - 1
include/string_utils/tokenizer.h

@@ -28,6 +28,7 @@ private:
   bool truncate_{false};
   bool ignore_empty_tokens_{true};
   bool escapable_{false};
+  bool reverse_search_{false};
 
 public:
   tokenizer(std::string divider, struct quote quote = {'\0', ""});
@@ -36,8 +37,9 @@ public:
   tokenizer & truncate(bool new_truncate_overage);
   tokenizer & ignore_empty_tokens(bool new_ignore_empty_tokens);
   tokenizer & escapable(bool new_escapable);
+  tokenizer & reverse_search(bool new_reverse);
 
-  std::vector<std::string> operator()(std::string const & input) const;
+  std::vector<std::string> operator()(std::string input) const;
 
 private:
   size_t max_outputs() const;
@@ -48,4 +50,9 @@ inline auto split(std::string const & data, std::string const & on,
   return tokenizer{on}.max_outputs(max)(data);
 }
 
+inline auto rsplit(std::string const & data, std::string const & on,
+                   size_t max = tokenizer::infinite_outputs) {
+  return tokenizer{on}.reverse_search(true).max_outputs(max)(data);
+}
+
 }

+ 24 - 2
src/tokenizer.cxx

@@ -10,6 +10,10 @@
 
 namespace string_utils {
 
+template <typename C> static void reverse(C & str) {
+  std::reverse(str.begin(), str.end());
+}
+
 tokenizer::tokenizer(std::string divider, struct quote quote)
     : divider_(std::move(divider)), quote_(std::move(quote)) {}
 
@@ -33,17 +37,28 @@ tokenizer & tokenizer::escapable(bool new_escapable) {
   return *this;
 }
 
+tokenizer & tokenizer::reverse_search(bool new_reverse) {
+  if (reverse_search_ != new_reverse) {
+    reverse(divider_);
+    reverse(quote_.escaped);
+  }
+  reverse_search_ = new_reverse;
+  return *this;
+}
+
 static std::size_t countback(std::string const & str, std::size_t p, char c) {
   if (p == 0 || str[p - 1] != c) return 0;
   return p - str.find_last_not_of(c, p - 1) - 1;
 }
 
-std::vector<std::string>
-tokenizer::operator()(std::string const & input) const {
+std::vector<std::string> tokenizer::operator()(std::string input) const {
   auto equals_from = [&input](std::string const & token, std::size_t from) {
     return token.size() + from < input.size() &&
            std::strncmp(input.c_str() + from, token.c_str(), token.size()) == 0;
   };
+
+  if (reverse_search_) { reverse(input); }
+
   std::vector<std::string> rval;
   std::string buffer;
   buffer.reserve(input.size());
@@ -82,6 +97,13 @@ tokenizer::operator()(std::string const & input) const {
   if (rval.size() < max_outputs_ && !(buffer.empty() && from == input.size())) {
     rval.emplace_back(buffer.empty() ? input.substr(from) : buffer);
   }
+
+  if (reverse_search_) {
+    reverse(rval);
+    for (auto & str : rval) {
+      reverse(str);
+    }
+  }
   return rval;
 }
 

+ 2 - 0
string-utils.xcodeproj/project.pbxproj

@@ -60,6 +60,7 @@
 /* Begin PBXFileReference section */
 		CD11D62025D96C620088CB79 /* cast_test.cxx */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = cast_test.cxx; sourceTree = "<group>"; };
 		CD11D62825D96C990088CB79 /* xcode_gtest_helper.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = xcode_gtest_helper.h; sourceTree = "<group>"; };
+		CD21041F285E45F300F15770 /* join.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = join.h; sourceTree = "<group>"; };
 		CD25F29D28568BCA006AE2BB /* traits.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = traits.h; sourceTree = "<group>"; };
 		CD25F2A328568CB6006AE2BB /* forwards.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = forwards.h; sourceTree = "<group>"; };
 		CD266862252FF4B600B3E667 /* libstring-utils.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = "libstring-utils.a"; sourceTree = BUILT_PRODUCTS_DIR; };
@@ -177,6 +178,7 @@
 				CDC883E328560A7C0088C91E /* tokenizer.h */,
 				CDC883E428560A7C0088C91E /* cast.h */,
 				CD25F2A328568CB6006AE2BB /* forwards.h */,
+				CD21041F285E45F300F15770 /* join.h */,
 			);
 			path = string_utils;
 			sourceTree = "<group>";