5 years ago · ad57d0fb26
--- a/src/tokenizer.cxx
+++ b/src/tokenizer.cxx
@@ -44,11 +44,19 @@ std::vector<std::string> tokenizer::operator()(std::string const &input) const {
 
				   buffer.reserve(input.size());
			
 
				   // If max_outputs_ == infinite_outputs, this will be infinite enough to work
			
 
				   // since we'll hit overflow on the string itself before this.
			
 
				-  size_t const max = max_outputs_ - !truncate_;
			
 
				+  std::size_t const max = max_outputs_ - !truncate_;
			
 
				+  std::size_t const qsz = quote_.escaped.size();
			
 
				   std::size_t from = 0;
			
 
				   bool in_quote{false};
			
 
				   for (std::size_t pos = 0; pos < input.size() && rval.size() < max; ++pos) {
			
 
				-    if (input[pos] == quote_.on) {
			
 
				+    // We check for escaped-quotes before we check for quotes to minimise
			
 
				+    // complexity. Once in a quote, we simply append everything without checking
			
 
				+    // for the divider until the end quote is encountered (escaped quotes are
			
 
				+    // processed normally).
			
 
				+    if (qsz > 0 && input.find(quote_.escaped.c_str(), pos, qsz) == pos) {
			
 
				+      buffer.append(1, quote_.on);
			
 
				+      pos += qsz - 1;
			
 
				+    } else if (input[pos] == quote_.on) {
			
 
				       in_quote = !in_quote;
			
 
				     } else if (input.find(divider_.c_str(), pos, divider_.size()) != pos ||
			
 
				                in_quote) {
			
@@ -63,6 +71,8 @@ std::vector<std::string> tokenizer::operator()(std::string const &input) const {
 
				       buffer.clear();
			
 
				     }
			
 
				   }
			
 
				+  // Due to the special handling rules of the truncate feature, we need
			
 
				+  // to add an additional layer of handling around empty tokens and buffer
			
 
				   if (ignore_empty_tokens_ && input.find(divider_, from) == from) {
			
 
				     ++from;
			
 
				   }
			
--- a/test/tokenizer_test.cxx
+++ b/test/tokenizer_test.cxx
@@ -93,6 +93,12 @@ TEST(TokenizerTest, QuotedTokensAreEscapable) {
 
				   EXPECT_THAT(tokenizer(",", {'"', "\\\""})(input), expected);
			
 
				 }
			
 
				 
			
 
				+TEST(TokenizerTest, QuoteTokenLiteralIsApplicable) {
			
 
				+  std::string const input = R"(A,"B"",C")";
			
 
				+  std::vector<std::string> const expected{"A", "B\",C"};
			
 
				+  EXPECT_THAT(tokenizer(",", {'"', "\"\""})(input), expected);
			
 
				+}
			
 
				+
			
 
				 TEST(TokenizerTest, QuotesDontNeedToBeAtStartAndEnd) {
			
 
				   std::string const input = R"(A,B",C")";
			
 
				   std::vector<std::string> const expected{"A", "B,C"};