tokenizer.h 1.1 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647
  1. //
  2. // tokenizer.hpp
  3. // string-utils
  4. //
  5. // Created by Sam Jaffe on 10/8/20.
  6. // Copyright © 2020 Sam Jaffe. All rights reserved.
  7. //
  8. #pragma once
  9. #include <string>
  10. #include <vector>
  11. namespace string_utils {
  12. class tokenizer {
  13. public:
  14. static constexpr size_t const infinite_outputs{~size_t(0)};
  15. private:
  16. std::string divider_;
  17. std::string quotes_;
  18. size_t max_outputs_{infinite_outputs};
  19. bool truncate_{false};
  20. bool ignore_empty_tokens_{true};
  21. bool escapable_{false};
  22. public:
  23. tokenizer(std::string divider, std::string quotes = "");
  24. tokenizer &max_outputs(size_t new_max_outputs);
  25. tokenizer &truncate(bool new_truncate_overage);
  26. tokenizer &ignore_empty_tokens(bool new_ignore_empty_tokens);
  27. tokenizer &escapable(bool new_escapable);
  28. std::vector<std::string> operator()(std::string input) const;
  29. private:
  30. std::size_t find(std::string &input, std::size_t from) const;
  31. size_t max_outputs() const;
  32. };
  33. inline auto split(std::string const & data, std::string const & on,
  34. size_t max = tokenizer::infinite_outputs) {
  35. return tokenizer{on}.max_outputs(max)(data);
  36. }
  37. }