Ver código fonte

Add option to escape dividers.

Sam Jaffe 4 anos atrás
pai
commit
9b9852d627
3 arquivos alterados com 40 adições e 5 exclusões
  1. 5 2
      include/string_utils/tokenizer.h
  2. 23 3
      src/tokenizer.cxx
  3. 12 0
      test/tokenizer_test.cxx

+ 5 - 2
include/string_utils/tokenizer.h

@@ -21,6 +21,7 @@ private:
   size_t max_outputs_{infinite_outputs};
   bool truncate_{false};
   bool ignore_empty_tokens_{true};
+  bool escapable_{false};
 
 public:
   tokenizer(std::string const & divider);
@@ -28,10 +29,12 @@ public:
   tokenizer &max_outputs(size_t new_max_outputs);
   tokenizer &truncate(bool new_truncate_overage);
   tokenizer &ignore_empty_tokens(bool new_ignore_empty_tokens);
-  
-  std::vector<std::string> operator()(std::string const &input) const;
+  tokenizer &escapable(bool new_escapable);
+
+  std::vector<std::string> operator()(std::string input) const;
   
 private:
+  std::size_t find(std::string &input, std::size_t from) const;
   size_t max_outputs() const;
 };
 

+ 23 - 3
src/tokenizer.cxx

@@ -27,15 +27,35 @@ tokenizer &tokenizer::ignore_empty_tokens(bool new_ignore_empty_tokens) {
   return *this;
 }
 
-std::vector<std::string> tokenizer::operator()(std::string const &input) const {
+tokenizer &tokenizer::escapable(bool new_escapable) {
+  escapable_ = new_escapable;
+  return *this;
+}
+
+static std::size_t countback(std::string const &str, std::size_t p, char c) {
+  if (p == 0 || str[p - 1] != c) return 0;
+  return p - str.find_last_not_of(c, p - 1) - 1;
+}
+
+std::size_t tokenizer::find(std::string &input, std::size_t from) const {
+  auto pos = input.find(divider_, from);
+  while (escapable_ && pos != std::string::npos &&
+         countback(input, pos, '\\') % 2) {
+    input.erase(pos - 1, 1);
+    pos = input.find(divider_, pos);
+  }
+  return pos;
+}
+
+std::vector<std::string> tokenizer::operator()(std::string input) const {
   std::vector<std::string> rval;
   // If max_outputs_ == infinite_outputs, this will be infinite enough to work
   // since we'll hit overflow on the string itself before this.
   size_t const max = max_outputs_ - !truncate_;
   size_t i = 0;
-  for (size_t n = input.find(divider_);
+  for (size_t n = find(input, i);
        n != std::string::npos && rval.size() < max;
-       i = n + 1, n = input.find(divider_, i)) {
+       i = n + 1, n = find(input, i)) {
     if (ignore_empty_tokens_ && i == n) {
       continue;
     }

+ 12 - 0
test/tokenizer_test.cxx

@@ -70,3 +70,15 @@ TEST(TokenizerTest, MaxSizeWithEmptyCanResultInTokenWithDividerPrefix) {
   EXPECT_THAT(tokenizer(".").max_outputs(3).ignore_empty_tokens(false)(input),
               expected);
 }
+
+TEST(TokenizerTest, EscapableTokensStickTogether) {
+  std::string const input = R"(A.B\.C)";
+  std::vector<std::string> const expected{"A", "B.C"};
+  EXPECT_THAT(tokenizer(".").escapable(true)(input), expected);
+}
+
+TEST(TokenizerTest, CorrectlySplitsWhenEvenEscapes) {
+  std::string const input = R"(A.B\\.C)";
+  std::vector<std::string> const expected{"A", R"(B\\)", "C"};
+  EXPECT_THAT(tokenizer(".").escapable(true)(input), expected);
+}