|
|
@@ -44,11 +44,19 @@ std::vector<std::string> tokenizer::operator()(std::string const &input) const {
|
|
|
buffer.reserve(input.size());
|
|
|
// If max_outputs_ == infinite_outputs, this will be infinite enough to work
|
|
|
// since we'll hit overflow on the string itself before this.
|
|
|
- size_t const max = max_outputs_ - !truncate_;
|
|
|
+ std::size_t const max = max_outputs_ - !truncate_;
|
|
|
+ std::size_t const qsz = quote_.escaped.size();
|
|
|
std::size_t from = 0;
|
|
|
bool in_quote{false};
|
|
|
for (std::size_t pos = 0; pos < input.size() && rval.size() < max; ++pos) {
|
|
|
- if (input[pos] == quote_.on) {
|
|
|
+ // We check for escaped-quotes before we check for quotes to minimise
|
|
|
+ // complexity. Once in a quote, we simply append everything without checking
|
|
|
+ // for the divider until the end quote is encountered (escaped quotes are
|
|
|
+ // processed normally).
|
|
|
+ if (qsz > 0 && input.find(quote_.escaped.c_str(), pos, qsz) == pos) {
|
|
|
+ buffer.append(1, quote_.on);
|
|
|
+ pos += qsz - 1;
|
|
|
+ } else if (input[pos] == quote_.on) {
|
|
|
in_quote = !in_quote;
|
|
|
} else if (input.find(divider_.c_str(), pos, divider_.size()) != pos ||
|
|
|
in_quote) {
|
|
|
@@ -63,6 +71,8 @@ std::vector<std::string> tokenizer::operator()(std::string const &input) const {
|
|
|
buffer.clear();
|
|
|
}
|
|
|
}
|
|
|
+ // Due to the special handling rules of the truncate feature, we need
|
|
|
+ // to add an additional layer of handling around empty tokens and buffer
|
|
|
if (ignore_empty_tokens_ && input.find(divider_, from) == from) {
|
|
|
++from;
|
|
|
}
|