tokenizer.h 1.1 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546
  1. //
  2. // tokenizer.hpp
  3. // string-utils
  4. //
  5. // Created by Sam Jaffe on 10/8/20.
  6. // Copyright © 2020 Sam Jaffe. All rights reserved.
  7. //
  8. #pragma once
  9. #include <string>
  10. #include <vector>
  11. namespace string_utils {
  12. class tokenizer {
  13. public:
  14. static constexpr size_t const infinite_outputs{~size_t(0)};
  15. private:
  16. std::string divider_;
  17. size_t max_outputs_{infinite_outputs};
  18. bool truncate_{false};
  19. bool ignore_empty_tokens_{true};
  20. bool escapable_{false};
  21. public:
  22. tokenizer(std::string const & divider);
  23. tokenizer &max_outputs(size_t new_max_outputs);
  24. tokenizer &truncate(bool new_truncate_overage);
  25. tokenizer &ignore_empty_tokens(bool new_ignore_empty_tokens);
  26. tokenizer &escapable(bool new_escapable);
  27. std::vector<std::string> operator()(std::string input) const;
  28. private:
  29. std::size_t find(std::string &input, std::size_t from) const;
  30. size_t max_outputs() const;
  31. };
  32. inline auto split(std::string const & data, std::string const & on,
  33. size_t max = tokenizer::infinite_outputs) {
  34. return tokenizer{on}.max_outputs(max)(data);
  35. }
  36. }