tokenizer_test.cxx 3.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100
  1. //
  2. // tokenizer_test.cxx
  3. // string-utils
  4. //
  5. // Created by Sam Jaffe on 10/8/20.
  6. // Copyright © 2020 Sam Jaffe. All rights reserved.
  7. //
  8. #include "string_utils/tokenizer.h"
  9. #include "xcode_gtest_helper.h"
  10. using namespace string_utils;
  11. TEST(TokenizerTest, SplitsStringOverToken) {
  12. std::string const input = "A.B.C.D";
  13. std::vector<std::string> const expected{"A", "B", "C", "D"};
  14. EXPECT_THAT(split(input, "."), expected);
  15. }
  16. TEST(TokenizerTest, SplitsStringUpToNTimes) {
  17. std::string const input = "A.B.C.D";
  18. std::vector<std::string> const expected{"A", "B", "C.D"};
  19. EXPECT_THAT(split(input, ".", 3), expected);
  20. }
  21. TEST(TokenizerTest, IgnoresEmptyElementsAtStart) {
  22. std::string const input = ".A.B.C";
  23. std::vector<std::string> const expected{"A", "B", "C"};
  24. EXPECT_THAT(split(input, ".", 3), expected);
  25. }
  26. TEST(TokenizerTest, IgnoresEmptyElements) {
  27. std::string const input = "A..B.C";
  28. std::vector<std::string> const expected{"A", "B", "C"};
  29. EXPECT_THAT(split(input, ".", 3), expected);
  30. }
  31. TEST(TokenizerTest, IgnoresEmptyElementsOnEnd) {
  32. std::string const input = "A.B..C";
  33. std::vector<std::string> const expected{"A", "B", "C"};
  34. EXPECT_THAT(split(input, ".", 3), expected);
  35. }
  36. TEST(TokenizerTest, TruncateDiscardsOverageInsteadOfNotParsingPast) {
  37. std::string const input = "A.B.C.D";
  38. std::vector<std::string> const expected{"A", "B", "C"};
  39. EXPECT_THAT(tokenizer(".").max_outputs(3).truncate(true)(input),
  40. expected);
  41. }
  42. TEST(TokenizerTest, EmptyIsPlacedCorrectlyWhenEnabled) {
  43. std::string const input = "A..B.C";
  44. std::vector<std::string> const expected{"A", "", "B.C"};
  45. EXPECT_THAT(tokenizer(".").max_outputs(3).ignore_empty_tokens(false)(input),
  46. expected);
  47. }
  48. TEST(TokenizerTest, MaxSizeWithEmptyCanResultInTokenWithDividerPrefix) {
  49. std::string const input = "A.B..C";
  50. std::vector<std::string> const expected{"A", "B", ".C"};
  51. EXPECT_THAT(tokenizer(".").max_outputs(3).ignore_empty_tokens(false)(input),
  52. expected);
  53. }
  54. TEST(TokenizerTest, EscapableTokensStickTogether) {
  55. std::string const input = R"(A B\ C)";
  56. std::vector<std::string> const expected{"A", "B C"};
  57. EXPECT_THAT(tokenizer(" ").escapable(true)(input), expected);
  58. }
  59. TEST(TokenizerTest, CorrectlySplitsWhenEvenEscapes) {
  60. std::string const input = R"(A B\\ C)";
  61. std::vector<std::string> const expected{"A", R"(B\\)", "C"};
  62. EXPECT_THAT(tokenizer(" ").escapable(true)(input), expected);
  63. }
  64. TEST(TokenizerTest, QuotesAreDiscarded) {
  65. std::string const input = R"(A,"B",C)";
  66. std::vector<std::string> const expected{"A", "B", "C"};
  67. EXPECT_THAT(tokenizer(",", "\"")(input), expected);
  68. }
  69. TEST(TokenizerTest, QuotedTokensStickTogether) {
  70. std::string const input = R"(A,"B,C")";
  71. std::vector<std::string> const expected{"A", "B,C"};
  72. EXPECT_THAT(tokenizer(",", "\"")(input), expected);
  73. }
  74. TEST(TokenizerTest, QuotedTokensAreAlwaysEscapable) {
  75. std::string const input = R"(A,"B\",C")";
  76. std::vector<std::string> const expected{"A", "B\",C"};
  77. EXPECT_THAT(tokenizer(",", "\"")(input), expected);
  78. }
  79. TEST(TokenizerTest, QuotedTokensDontApplyOutOfFirstChar) {
  80. std::string const input = R"(A,B",C")";
  81. std::vector<std::string> const expected{"A", "B\"", "C\""};
  82. EXPECT_THAT(tokenizer(",", "\"")(input), expected);
  83. }