tokenizer_test.cxx 2.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384
  1. //
  2. // tokenizer_test.cxx
  3. // string-utils
  4. //
  5. // Created by Sam Jaffe on 10/8/20.
  6. // Copyright © 2020 Sam Jaffe. All rights reserved.
  7. //
  8. #include "string_utils/tokenizer.h"
  9. #include <gmock/gmock.h>
  10. #include <gtest/gtest.h>
  11. using namespace string_utils;
  12. #if XCODE_UNIT_TEST
  13. // This is a hack to allow XCode to properly display failures when running
  14. // unit tests.
  15. #undef EXPECT_THAT
  16. #define EXPECT_THAT ASSERT_THAT
  17. #endif
  18. TEST(TokenizerTest, SplitsStringOverToken) {
  19. std::string const input = "A.B.C.D";
  20. std::vector<std::string> const expected{"A", "B", "C", "D"};
  21. EXPECT_THAT(split(input, "."), expected);
  22. }
  23. TEST(TokenizerTest, SplitsStringUpToNTimes) {
  24. std::string const input = "A.B.C.D";
  25. std::vector<std::string> const expected{"A", "B", "C.D"};
  26. EXPECT_THAT(split(input, ".", 3), expected);
  27. }
  28. TEST(TokenizerTest, IgnoresEmptyElementsAtStart) {
  29. std::string const input = ".A.B.C";
  30. std::vector<std::string> const expected{"A", "B", "C"};
  31. EXPECT_THAT(split(input, ".", 3), expected);
  32. }
  33. TEST(TokenizerTest, IgnoresEmptyElements) {
  34. std::string const input = "A..B.C";
  35. std::vector<std::string> const expected{"A", "B", "C"};
  36. EXPECT_THAT(split(input, ".", 3), expected);
  37. }
  38. TEST(TokenizerTest, IgnoresEmptyElementsOnEnd) {
  39. std::string const input = "A.B..C";
  40. std::vector<std::string> const expected{"A", "B", "C"};
  41. EXPECT_THAT(split(input, ".", 3), expected);
  42. }
  43. TEST(TokenizerTest, TruncateDiscardsOverageInsteadOfNotParsingPast) {
  44. std::string const input = "A.B.C.D";
  45. std::vector<std::string> const expected{"A", "B", "C"};
  46. EXPECT_THAT(tokenizer(".").max_outputs(3).truncate(true)(input),
  47. expected);
  48. }
  49. TEST(TokenizerTest, EmptyIsPlacedCorrectlyWhenEnabled) {
  50. std::string const input = "A..B.C";
  51. std::vector<std::string> const expected{"A", "", "B.C"};
  52. EXPECT_THAT(tokenizer(".").max_outputs(3).ignore_empty_tokens(false)(input),
  53. expected);
  54. }
  55. TEST(TokenizerTest, MaxSizeWithEmptyCanResultInTokenWithDividerPrefix) {
  56. std::string const input = "A.B..C";
  57. std::vector<std::string> const expected{"A", "B", ".C"};
  58. EXPECT_THAT(tokenizer(".").max_outputs(3).ignore_empty_tokens(false)(input),
  59. expected);
  60. }
  61. TEST(TokenizerTest, EscapableTokensStickTogether) {
  62. std::string const input = R"(A.B\.C)";
  63. std::vector<std::string> const expected{"A", "B.C"};
  64. EXPECT_THAT(tokenizer(".").escapable(true)(input), expected);
  65. }
  66. TEST(TokenizerTest, CorrectlySplitsWhenEvenEscapes) {
  67. std::string const input = R"(A.B\\.C)";
  68. std::vector<std::string> const expected{"A", R"(B\\)", "C"};
  69. EXPECT_THAT(tokenizer(".").escapable(true)(input), expected);
  70. }