// // tokenizer.cxx // string-utils // // Created by Sam Jaffe on 10/8/20. // Copyright © 2020 Sam Jaffe. All rights reserved. // #include "string_utils/tokenizer.h" namespace string_utils { tokenizer::tokenizer(std::string divider, struct quote quote) : divider_(std::move(divider)), quote_(std::move(quote)) {} tokenizer &tokenizer::max_outputs(size_t new_max_outputs) { max_outputs_ = new_max_outputs; return *this; } tokenizer &tokenizer::truncate(bool new_truncate) { truncate_ = new_truncate; return *this; } tokenizer &tokenizer::ignore_empty_tokens(bool new_ignore_empty_tokens) { ignore_empty_tokens_ = new_ignore_empty_tokens; return *this; } tokenizer &tokenizer::escapable(bool new_escapable) { escapable_ = new_escapable; return *this; } static std::size_t countback(std::string const &str, std::size_t p, char c) { if (p == 0 || str[p - 1] != c) return 0; return p - str.find_last_not_of(c, p - 1) - 1; } std::size_t tokenizer::find(std::string &input, std::size_t from) const { auto pos = input.find(divider_, from); while (escapable_ && pos != std::string::npos && countback(input, pos, '\\') % 2) { input.erase(pos - 1, 1); pos = input.find(divider_, pos); } return pos; } std::vector tokenizer::operator()(std::string input) const { std::vector rval; // If max_outputs_ == infinite_outputs, this will be infinite enough to work // since we'll hit overflow on the string itself before this. size_t const max = max_outputs_ - !truncate_; size_t i = 0; for (size_t n = find(input, i); n != std::string::npos && rval.size() < max; i = n + 1, n = find(input, i)) { if (ignore_empty_tokens_ && i == n) { continue; } rval.emplace_back(input.substr(i, n - i)); } // Special Handling for the final token if (ignore_empty_tokens_ && input.find(divider_, i) == i) { ++i; } if (rval.size() < max_outputs_) { rval.emplace_back(input.substr(i)); } return rval; } }