|
@@ -39,6 +39,10 @@ static std::size_t countback(std::string const &str, std::size_t p, char c) {
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
std::vector<std::string> tokenizer::operator()(std::string const &input) const {
|
|
std::vector<std::string> tokenizer::operator()(std::string const &input) const {
|
|
|
|
|
+ auto equals_from = [&input](std::string const &token, std::size_t from) {
|
|
|
|
|
+ return token.size() + from < input.size() &&
|
|
|
|
|
+ std::strncmp(input.c_str() + from, token.c_str(), token.size()) == 0;
|
|
|
|
|
+ };
|
|
|
std::vector<std::string> rval;
|
|
std::vector<std::string> rval;
|
|
|
std::string buffer;
|
|
std::string buffer;
|
|
|
buffer.reserve(input.size());
|
|
buffer.reserve(input.size());
|
|
@@ -53,13 +57,12 @@ std::vector<std::string> tokenizer::operator()(std::string const &input) const {
|
|
|
// complexity. Once in a quote, we simply append everything without checking
|
|
// complexity. Once in a quote, we simply append everything without checking
|
|
|
// for the divider until the end quote is encountered (escaped quotes are
|
|
// for the divider until the end quote is encountered (escaped quotes are
|
|
|
// processed normally).
|
|
// processed normally).
|
|
|
- if (qsz > 0 && input.find(quote_.escaped.c_str(), pos, qsz) == pos) {
|
|
|
|
|
|
|
+ if (qsz > 0 && equals_from(quote_.escaped, pos)) {
|
|
|
buffer.append(1, quote_.on);
|
|
buffer.append(1, quote_.on);
|
|
|
pos += qsz - 1;
|
|
pos += qsz - 1;
|
|
|
} else if (input[pos] == quote_.on) {
|
|
} else if (input[pos] == quote_.on) {
|
|
|
in_quote = !in_quote;
|
|
in_quote = !in_quote;
|
|
|
- } else if (input.find(divider_.c_str(), pos, divider_.size()) != pos ||
|
|
|
|
|
- in_quote) {
|
|
|
|
|
|
|
+ } else if (in_quote || !equals_from(divider_, pos)) {
|
|
|
buffer.append(1, input[pos]);
|
|
buffer.append(1, input[pos]);
|
|
|
} else if (escapable_ && countback(input, pos, '\\') % 2) {
|
|
} else if (escapable_ && countback(input, pos, '\\') % 2) {
|
|
|
buffer.back() = input[pos];
|
|
buffer.back() = input[pos];
|
|
@@ -73,7 +76,7 @@ std::vector<std::string> tokenizer::operator()(std::string const &input) const {
|
|
|
}
|
|
}
|
|
|
// Due to the special handling rules of the truncate feature, we need
|
|
// Due to the special handling rules of the truncate feature, we need
|
|
|
// to add an additional layer of handling around empty tokens and buffer
|
|
// to add an additional layer of handling around empty tokens and buffer
|
|
|
- if (ignore_empty_tokens_ && input.find(divider_, from) == from) {
|
|
|
|
|
|
|
+ if (ignore_empty_tokens_ && equals_from(divider_, from)) {
|
|
|
++from;
|
|
++from;
|
|
|
}
|
|
}
|
|
|
if (rval.size() < max_outputs_) {
|
|
if (rval.size() < max_outputs_) {
|