| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475 |
- //
- // tokenizer.cxx
- // string-utils
- //
- // Created by Sam Jaffe on 10/8/20.
- // Copyright © 2020 Sam Jaffe. All rights reserved.
- //
- #include "string_utils/tokenizer.h"
- namespace string_utils {
- tokenizer::tokenizer(std::string divider, struct quote quote)
- : divider_(std::move(divider)), quote_(std::move(quote)) {}
- tokenizer &tokenizer::max_outputs(size_t new_max_outputs) {
- max_outputs_ = new_max_outputs;
- return *this;
- }
- tokenizer &tokenizer::truncate(bool new_truncate) {
- truncate_ = new_truncate;
- return *this;
- }
- tokenizer &tokenizer::ignore_empty_tokens(bool new_ignore_empty_tokens) {
- ignore_empty_tokens_ = new_ignore_empty_tokens;
- return *this;
- }
- tokenizer &tokenizer::escapable(bool new_escapable) {
- escapable_ = new_escapable;
- return *this;
- }
- static std::size_t countback(std::string const &str, std::size_t p, char c) {
- if (p == 0 || str[p - 1] != c) return 0;
- return p - str.find_last_not_of(c, p - 1) - 1;
- }
- std::size_t tokenizer::find(std::string &input, std::size_t from) const {
- auto pos = input.find(divider_, from);
- while (escapable_ && pos != std::string::npos &&
- countback(input, pos, '\\') % 2) {
- input.erase(pos - 1, 1);
- pos = input.find(divider_, pos);
- }
- return pos;
- }
- std::vector<std::string> tokenizer::operator()(std::string input) const {
- std::vector<std::string> rval;
- // If max_outputs_ == infinite_outputs, this will be infinite enough to work
- // since we'll hit overflow on the string itself before this.
- size_t const max = max_outputs_ - !truncate_;
- size_t i = 0;
- for (size_t n = find(input, i);
- n != std::string::npos && rval.size() < max;
- i = n + 1, n = find(input, i)) {
- if (ignore_empty_tokens_ && i == n) {
- continue;
- }
- rval.emplace_back(input.substr(i, n - i));
- }
- // Special Handling for the final token
- if (ignore_empty_tokens_ && input.find(divider_, i) == i) {
- ++i;
- }
- if (rval.size() < max_outputs_) {
- rval.emplace_back(input.substr(i));
- }
- return rval;
- }
- }
|