// // tokenizer_test.cxx // string-utils // // Created by Sam Jaffe on 10/8/20. // Copyright © 2020 Sam Jaffe. All rights reserved. // #include "string_utils/tokenizer.h" #include "xcode_gtest_helper.h" using namespace string_utils; TEST(TokenizerTest, SplitsStringOverToken) { std::string const input = "A.B.C.D"; std::vector const expected{"A", "B", "C", "D"}; EXPECT_THAT(split(input, "."), expected); } TEST(TokenizerTest, SplitsStringUpToNTimes) { std::string const input = "A.B.C.D"; std::vector const expected{"A", "B", "C.D"}; EXPECT_THAT(split(input, ".", 3), expected); } TEST(TokenizerTest, IgnoresEmptyElementsAtStart) { std::string const input = ".A.B.C"; std::vector const expected{"A", "B", "C"}; EXPECT_THAT(split(input, ".", 3), expected); } TEST(TokenizerTest, IgnoresEmptyElements) { std::string const input = "A..B.C"; std::vector const expected{"A", "B", "C"}; EXPECT_THAT(split(input, ".", 3), expected); } TEST(TokenizerTest, IgnoresEmptyElementsOnEnd) { std::string const input = "A.B..C"; std::vector const expected{"A", "B", "C"}; EXPECT_THAT(split(input, ".", 3), expected); } TEST(TokenizerTest, TruncateDiscardsOverageInsteadOfNotParsingPast) { std::string const input = "A.B.C.D"; std::vector const expected{"A", "B", "C"}; EXPECT_THAT(tokenizer(".").max_outputs(3).truncate(true)(input), expected); } TEST(TokenizerTest, EmptyIsPlacedCorrectlyWhenEnabled) { std::string const input = "A..B.C"; std::vector const expected{"A", "", "B.C"}; EXPECT_THAT(tokenizer(".").max_outputs(3).ignore_empty_tokens(false)(input), expected); } TEST(TokenizerTest, MaxSizeWithEmptyCanResultInTokenWithDividerPrefix) { std::string const input = "A.B..C"; std::vector const expected{"A", "B", ".C"}; EXPECT_THAT(tokenizer(".").max_outputs(3).ignore_empty_tokens(false)(input), expected); } TEST(TokenizerTest, EscapableTokensStickTogether) { std::string const input = R"(A B\ C)"; std::vector const expected{"A", "B C"}; EXPECT_THAT(tokenizer(" ").escapable(true)(input), expected); } TEST(TokenizerTest, CorrectlySplitsWhenEvenEscapes) { std::string const input = R"(A B\\ C)"; std::vector const expected{"A", R"(B\\)", "C"}; EXPECT_THAT(tokenizer(" ").escapable(true)(input), expected); } TEST(TokenizerTest, QuotesAreDiscarded) { std::string const input = R"(A,"B",C)"; std::vector const expected{"A", "B", "C"}; EXPECT_THAT(tokenizer(",", {'"'})(input), expected); } TEST(TokenizerTest, QuotedTokensStickTogether) { std::string const input = R"(A,"B,C")"; std::vector const expected{"A", "B,C"}; EXPECT_THAT(tokenizer(",", {'"'})(input), expected); } TEST(TokenizerTest, QuotedTokensAreEscapable) { std::string const input = R"(A,"B\",C")"; std::vector const expected{"A", "B\",C"}; EXPECT_THAT(tokenizer(",", {'"', "\\\""})(input), expected); } TEST(TokenizerTest, QuotesDontNeedToBeAtStartAndEnd) { std::string const input = R"(A,B",C")"; std::vector const expected{"A", "B,C"}; EXPECT_THAT(tokenizer(",", {'"', "\\\""})(input), expected); }