tokenizer_test.cxx 3.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105
  1. //
  2. // tokenizer_test.cxx
  3. // string-utils
  4. //
  5. // Created by Sam Jaffe on 10/8/20.
  6. // Copyright © 2020 Sam Jaffe. All rights reserved.
  7. //
  8. #include "string_utils/tokenizer.h"
  9. #include "xcode_gtest_helper.h"
  10. using namespace string_utils;
  11. TEST(TokenizerTest, SplitsStringOverToken) {
  12. std::string const input = "A.B.C.D";
  13. std::vector<std::string> const expected{"A", "B", "C", "D"};
  14. EXPECT_THAT(split(input, "."), expected);
  15. }
  16. TEST(TokenizerTest, SplitsStringUpToNTimes) {
  17. std::string const input = "A.B.C.D";
  18. std::vector<std::string> const expected{"A", "B", "C.D"};
  19. EXPECT_THAT(split(input, ".", 3), expected);
  20. }
  21. TEST(TokenizerTest, IgnoresEmptyElementsAtStart) {
  22. std::string const input = ".A.B.C";
  23. std::vector<std::string> const expected{"A", "B", "C"};
  24. EXPECT_THAT(split(input, ".", 3), expected);
  25. }
  26. TEST(TokenizerTest, IgnoresEmptyElements) {
  27. std::string const input = "A..B.C";
  28. std::vector<std::string> const expected{"A", "B", "C"};
  29. EXPECT_THAT(split(input, ".", 3), expected);
  30. }
  31. TEST(TokenizerTest, IgnoresEmptyElementsOnEnd) {
  32. std::string const input = "A.B..C";
  33. std::vector<std::string> const expected{"A", "B", "C"};
  34. EXPECT_THAT(split(input, ".", 3), expected);
  35. }
  36. TEST(TokenizerTest, TruncateDiscardsOverageInsteadOfNotParsingPast) {
  37. std::string const input = "A.B.C.D";
  38. std::vector<std::string> const expected{"A", "B", "C"};
  39. EXPECT_THAT(tokenizer(".").max_outputs(3).truncate(true)(input), expected);
  40. }
  41. TEST(TokenizerTest, EmptyIsPlacedCorrectlyWhenEnabled) {
  42. std::string const input = "A..B.C";
  43. std::vector<std::string> const expected{"A", "", "B.C"};
  44. EXPECT_THAT(tokenizer(".").max_outputs(3).ignore_empty_tokens(false)(input),
  45. expected);
  46. }
  47. TEST(TokenizerTest, MaxSizeWithEmptyCanResultInTokenWithDividerPrefix) {
  48. std::string const input = "A.B..C";
  49. std::vector<std::string> const expected{"A", "B", ".C"};
  50. EXPECT_THAT(tokenizer(".").max_outputs(3).ignore_empty_tokens(false)(input),
  51. expected);
  52. }
  53. TEST(TokenizerTest, EscapableTokensStickTogether) {
  54. std::string const input = R"(A B\ C)";
  55. std::vector<std::string> const expected{"A", "B C"};
  56. EXPECT_THAT(tokenizer(" ").escapable(true)(input), expected);
  57. }
  58. TEST(TokenizerTest, CorrectlySplitsWhenEvenEscapes) {
  59. std::string const input = R"(A B\\ C)";
  60. std::vector<std::string> const expected{"A", R"(B\\)", "C"};
  61. EXPECT_THAT(tokenizer(" ").escapable(true)(input), expected);
  62. }
  63. TEST(TokenizerTest, QuotesAreDiscarded) {
  64. std::string const input = R"(A,"B",C)";
  65. std::vector<std::string> const expected{"A", "B", "C"};
  66. EXPECT_THAT(tokenizer(",", {'"'})(input), expected);
  67. }
  68. TEST(TokenizerTest, QuotedTokensStickTogether) {
  69. std::string const input = R"(A,"B,C")";
  70. std::vector<std::string> const expected{"A", "B,C"};
  71. EXPECT_THAT(tokenizer(",", {'"'})(input), expected);
  72. }
  73. TEST(TokenizerTest, QuotedTokensAreEscapable) {
  74. std::string const input = R"(A,"B\",C")";
  75. std::vector<std::string> const expected{"A", "B\",C"};
  76. EXPECT_THAT(tokenizer(",", {'"', "\\\""})(input), expected);
  77. }
  78. TEST(TokenizerTest, QuoteTokenLiteralIsApplicable) {
  79. std::string const input = R"(A,"B"",C")";
  80. std::vector<std::string> const expected{"A", "B\",C"};
  81. EXPECT_THAT(tokenizer(",", {'"', "\"\""})(input), expected);
  82. }
  83. TEST(TokenizerTest, QuotesDontNeedToBeAtStartAndEnd) {
  84. std::string const input = R"(A,B",C")";
  85. std::vector<std::string> const expected{"A", "B,C"};
  86. EXPECT_THAT(tokenizer(",", {'"'})(input), expected);
  87. }