tokenizer_test.cxx 2.4 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576
  1. //
  2. // tokenizer_test.cxx
  3. // string-utils
  4. //
  5. // Created by Sam Jaffe on 10/8/20.
  6. // Copyright © 2020 Sam Jaffe. All rights reserved.
  7. //
  8. #include "string_utils/tokenizer.h"
  9. #include "xcode_gtest_helper.h"
  10. using namespace string_utils;
  11. TEST(TokenizerTest, SplitsStringOverToken) {
  12. std::string const input = "A.B.C.D";
  13. std::vector<std::string> const expected{"A", "B", "C", "D"};
  14. EXPECT_THAT(split(input, "."), expected);
  15. }
  16. TEST(TokenizerTest, SplitsStringUpToNTimes) {
  17. std::string const input = "A.B.C.D";
  18. std::vector<std::string> const expected{"A", "B", "C.D"};
  19. EXPECT_THAT(split(input, ".", 3), expected);
  20. }
  21. TEST(TokenizerTest, IgnoresEmptyElementsAtStart) {
  22. std::string const input = ".A.B.C";
  23. std::vector<std::string> const expected{"A", "B", "C"};
  24. EXPECT_THAT(split(input, ".", 3), expected);
  25. }
  26. TEST(TokenizerTest, IgnoresEmptyElements) {
  27. std::string const input = "A..B.C";
  28. std::vector<std::string> const expected{"A", "B", "C"};
  29. EXPECT_THAT(split(input, ".", 3), expected);
  30. }
  31. TEST(TokenizerTest, IgnoresEmptyElementsOnEnd) {
  32. std::string const input = "A.B..C";
  33. std::vector<std::string> const expected{"A", "B", "C"};
  34. EXPECT_THAT(split(input, ".", 3), expected);
  35. }
  36. TEST(TokenizerTest, TruncateDiscardsOverageInsteadOfNotParsingPast) {
  37. std::string const input = "A.B.C.D";
  38. std::vector<std::string> const expected{"A", "B", "C"};
  39. EXPECT_THAT(tokenizer(".").max_outputs(3).truncate(true)(input),
  40. expected);
  41. }
  42. TEST(TokenizerTest, EmptyIsPlacedCorrectlyWhenEnabled) {
  43. std::string const input = "A..B.C";
  44. std::vector<std::string> const expected{"A", "", "B.C"};
  45. EXPECT_THAT(tokenizer(".").max_outputs(3).ignore_empty_tokens(false)(input),
  46. expected);
  47. }
  48. TEST(TokenizerTest, MaxSizeWithEmptyCanResultInTokenWithDividerPrefix) {
  49. std::string const input = "A.B..C";
  50. std::vector<std::string> const expected{"A", "B", ".C"};
  51. EXPECT_THAT(tokenizer(".").max_outputs(3).ignore_empty_tokens(false)(input),
  52. expected);
  53. }
  54. TEST(TokenizerTest, EscapableTokensStickTogether) {
  55. std::string const input = R"(A.B\.C)";
  56. std::vector<std::string> const expected{"A", "B.C"};
  57. EXPECT_THAT(tokenizer(".").escapable(true)(input), expected);
  58. }
  59. TEST(TokenizerTest, CorrectlySplitsWhenEvenEscapes) {
  60. std::string const input = R"(A.B\\.C)";
  61. std::vector<std::string> const expected{"A", R"(B\\)", "C"};
  62. EXPECT_THAT(tokenizer(".").escapable(true)(input), expected);
  63. }