tokenizer.h 1.1 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950
  1. //
  2. // tokenizer.hpp
  3. // string-utils
  4. //
  5. // Created by Sam Jaffe on 10/8/20.
  6. // Copyright © 2020 Sam Jaffe. All rights reserved.
  7. //
  8. #pragma once
  9. #include <string>
  10. #include <vector>
  11. namespace string_utils {
  12. class tokenizer {
  13. public:
  14. static constexpr size_t const infinite_outputs{~size_t(0)};
  15. struct quote {
  16. char on;
  17. std::string escaped;
  18. };
  19. private:
  20. std::string divider_;
  21. quote quote_;
  22. size_t max_outputs_{infinite_outputs};
  23. bool truncate_{false};
  24. bool ignore_empty_tokens_{true};
  25. bool escapable_{false};
  26. public:
  27. tokenizer(std::string divider, struct quote quote = {'\0', ""});
  28. tokenizer &max_outputs(size_t new_max_outputs);
  29. tokenizer &truncate(bool new_truncate_overage);
  30. tokenizer &ignore_empty_tokens(bool new_ignore_empty_tokens);
  31. tokenizer &escapable(bool new_escapable);
  32. std::vector<std::string> operator()(std::string const &input) const;
  33. private:
  34. size_t max_outputs() const;
  35. };
  36. inline auto split(std::string const & data, std::string const & on,
  37. size_t max = tokenizer::infinite_outputs) {
  38. return tokenizer{on}.max_outputs(max)(data);
  39. }
  40. }