// // tokenizer.hpp // string-utils // // Created by Sam Jaffe on 10/8/20. // Copyright © 2020 Sam Jaffe. All rights reserved. // #pragma once #include #include #include "string_utils/forwards.h" namespace string_utils { class Tokenizer { public: static constexpr size_t const UNLIMITED = ~0ul; protected: struct Quote { char on; std::string_view escaped; }; private: std::string_view divider_; Quote quote_{'\0', ""}; size_t max_outputs_{UNLIMITED}; bool truncate_{false}; bool ignore_empty_tokens_{true}; bool escapable_{false}; bool reverse_search_{false}; public: explicit Tokenizer(std::string_view divider); Tokenizer & max_outputs(size_t new_max_outputs); Tokenizer & truncate(bool new_truncate_overage); Tokenizer & ignore_empty_tokens(bool new_ignore_empty_tokens); Tokenizer & reverse_search(bool new_reverse); [[nodiscard]] EscapedTokenizer escapable(Quote quote = Quote{'\0', ""}) const; std::vector operator()(std::string && input) const; std::vector operator()(std::string_view input) const; protected: Tokenizer & quote(Quote quote); std::string unescape(std::string_view token) const; }; class EscapedTokenizer : public Tokenizer { public: explicit EscapedTokenizer(std::string_view divider, Quote quote = Quote{'\0', ""}); explicit EscapedTokenizer(Tokenizer impl, Quote quote); EscapedTokenizer & max_outputs(size_t new_max_outputs); EscapedTokenizer & truncate(bool new_truncate_overage); EscapedTokenizer & ignore_empty_tokens(bool new_ignore_empty_tokens); EscapedTokenizer & reverse_search(bool new_reverse); std::vector operator()(std::string && input) const; std::vector operator()(std::string_view input) const; }; inline auto split(std::string const & data, std::string const & on, size_t max = Tokenizer::UNLIMITED) { return Tokenizer{on}.max_outputs(max)(data); } inline auto rsplit(std::string const & data, std::string const & on, size_t max = Tokenizer::UNLIMITED) { return Tokenizer{on}.reverse_search(true).max_outputs(max)(data); } }