Kaynağa Gözat

Reimplement reverse

Sam Jaffe 2 yıl önce
ebeveyn
işleme
65f2c5b2c9
2 değiştirilmiş dosya ile 24 ekleme ve 14 silme
  1. 16 14
      src/tokenizer.cxx
  2. 8 0
      test/tokenizer_test.cxx

+ 16 - 14
src/tokenizer.cxx

@@ -89,14 +89,15 @@ EscapedTokenizer Tokenizer::escapable(Quote quote) const {
 
 std::vector<std::string_view>
 Tokenizer::operator()(std::string_view input) const {
-  //  if (reverse_search_) { reverse(input); }
-
   std::vector<std::string_view> rval;
   // If max_outputs_ == infinite_outputs, this will be infinite enough to work
   // since we'll hit overflow on the string itself before this.
   std::size_t const max = max_outputs_ - !truncate_;
   std::size_t const qsz = quote_.escaped.size();
   size_t span = 0;
+  auto index = [this, &input, &span]() {
+    return reverse_search_ ? input.size() - span - 1 : span;
+  };
   bool in_quote{false};
   while (not input.empty() and rval.size() < max and span == 0) {
     for (span = 0; span < input.size(); ++span) {
@@ -105,14 +106,21 @@ Tokenizer::operator()(std::string_view input) const {
       // checking for the divider until the end quote is encountered (escaped
       // quotes are processed normally).
       if (not quote_.escaped.empty() and
-          current_token_is(input, span, quote_.escaped)) {
+          current_token_is(input, index(), quote_.escaped)) {
         span += qsz - 1;
-      } else if (input[span] == quote_.on) {
+      } else if (input[index()] == quote_.on) {
         in_quote = !in_quote;
-      } else if (in_quote or not current_token_is(input, span, divider_)) {
+      } else if (in_quote or not current_token_is(input, index(), divider_)) {
         continue;
-      } else if (escapable_ and is_escaped(input, span)) {
+      } else if (escapable_ and is_escaped(input, index())) {
         continue;
+      } else if (reverse_search_) {
+        if (not ignore_empty_tokens_ or span > 0) {
+          rval.push_back(input.substr(index() + 1, span));
+        }
+        input.remove_suffix(span + divider_.size());
+        span = 0;
+        break;
       } else {
         if (not ignore_empty_tokens_ or span > 0) {
           rval.push_back(input.substr(0, span));
@@ -132,15 +140,9 @@ Tokenizer::operator()(std::string_view input) const {
   // actually no data remaining in the buffer/input string, even when we permit
   // empty tokens in our output.
   if (rval.size() < max_outputs_ and not input.empty()) {
-    rval.emplace_back(input);
+    rval.push_back(input);
   }
-
-  //  if (reverse_search_) {
-  //    reverse(rval);
-  //    for (auto & str : rval) {
-  //      reverse(str);
-  //    }
-  //  }
+  if (reverse_search_) { std::reverse(rval.begin(), rval.end()); }
   return rval;
 }
 

+ 8 - 0
test/tokenizer_test.cxx

@@ -48,6 +48,14 @@ TEST(TokenizerTest, TruncateDiscardsOverageInsteadOfNotParsingPast) {
   EXPECT_THAT(Tokenizer(".").max_outputs(3).truncate(true)(input), expected);
 }
 
+TEST(TokenizerTest, RTruncateDiscardsOverageInsteadOfNotParsingPast) {
+  std::string const input = "A.B.C.D";
+  std::vector<std::string_view> const expected{"B", "C", "D"};
+  EXPECT_THAT(
+      Tokenizer(".").max_outputs(3).truncate(true).reverse_search(true)(input),
+      expected);
+}
+
 TEST(TokenizerTest, EmptyIsPlacedCorrectlyWhenEnabled) {
   std::string const input = "A..B.C";
   std::vector<std::string_view> const expected{"A", "", "B.C"};