tokenizer.h 1.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051
  1. //
  2. // tokenizer.hpp
  3. // string-utils
  4. //
  5. // Created by Sam Jaffe on 10/8/20.
  6. // Copyright © 2020 Sam Jaffe. All rights reserved.
  7. //
  8. #pragma once
  9. #include <string>
  10. #include <vector>
  11. namespace string_utils {
  12. class tokenizer {
  13. public:
  14. static constexpr size_t const infinite_outputs{~size_t(0)};
  15. struct quote {
  16. char on;
  17. std::string escaped;
  18. };
  19. private:
  20. std::string divider_;
  21. quote quote_;
  22. size_t max_outputs_{infinite_outputs};
  23. bool truncate_{false};
  24. bool ignore_empty_tokens_{true};
  25. bool escapable_{false};
  26. public:
  27. tokenizer(std::string divider, struct quote quote = {'\0', ""});
  28. tokenizer &max_outputs(size_t new_max_outputs);
  29. tokenizer &truncate(bool new_truncate_overage);
  30. tokenizer &ignore_empty_tokens(bool new_ignore_empty_tokens);
  31. tokenizer &escapable(bool new_escapable);
  32. std::vector<std::string> operator()(std::string input) const;
  33. private:
  34. std::size_t find(std::string &input, std::size_t from) const;
  35. size_t max_outputs() const;
  36. };
  37. inline auto split(std::string const & data, std::string const & on,
  38. size_t max = tokenizer::infinite_outputs) {
  39. return tokenizer{on}.max_outputs(max)(data);
  40. }
  41. }