tokenizer.h 956 B

12345678910111213141516171819202122232425262728293031323334353637383940414243
  1. //
  2. // tokenizer.hpp
  3. // string-utils
  4. //
  5. // Created by Sam Jaffe on 10/8/20.
  6. // Copyright © 2020 Sam Jaffe. All rights reserved.
  7. //
  8. #pragma once
  9. #include <string>
  10. #include <vector>
  11. namespace string_utils {
  12. class tokenizer {
  13. public:
  14. static constexpr size_t const infinite_outputs{~size_t(0)};
  15. private:
  16. std::string divider_;
  17. size_t max_outputs_{infinite_outputs};
  18. bool truncate_{true};
  19. bool ignore_empty_tokens_{true};
  20. public:
  21. tokenizer(std::string const & divider);
  22. tokenizer &max_outputs(size_t new_max_outputs);
  23. tokenizer &truncate(bool new_truncate_overage);
  24. tokenizer &ignore_empty_tokens(bool new_ignore_empty_tokens);
  25. std::vector<std::string> operator()(std::string const &input) const;
  26. private:
  27. size_t max_outputs() const;
  28. };
  29. inline auto split(std::string const & data, std::string const & on,
  30. size_t max = tokenizer::infinite_outputs) {
  31. return tokenizer{on}.max_outputs(max)(data);
  32. }
  33. }