| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100 |
- //
- // tokenizer_test.cxx
- // string-utils
- //
- // Created by Sam Jaffe on 10/8/20.
- // Copyright © 2020 Sam Jaffe. All rights reserved.
- //
- #include "string_utils/tokenizer.h"
- #include "xcode_gtest_helper.h"
- using namespace string_utils;
- TEST(TokenizerTest, SplitsStringOverToken) {
- std::string const input = "A.B.C.D";
- std::vector<std::string> const expected{"A", "B", "C", "D"};
- EXPECT_THAT(split(input, "."), expected);
- }
- TEST(TokenizerTest, SplitsStringUpToNTimes) {
- std::string const input = "A.B.C.D";
- std::vector<std::string> const expected{"A", "B", "C.D"};
- EXPECT_THAT(split(input, ".", 3), expected);
- }
- TEST(TokenizerTest, IgnoresEmptyElementsAtStart) {
- std::string const input = ".A.B.C";
- std::vector<std::string> const expected{"A", "B", "C"};
- EXPECT_THAT(split(input, ".", 3), expected);
- }
- TEST(TokenizerTest, IgnoresEmptyElements) {
- std::string const input = "A..B.C";
- std::vector<std::string> const expected{"A", "B", "C"};
- EXPECT_THAT(split(input, ".", 3), expected);
- }
- TEST(TokenizerTest, IgnoresEmptyElementsOnEnd) {
- std::string const input = "A.B..C";
- std::vector<std::string> const expected{"A", "B", "C"};
- EXPECT_THAT(split(input, ".", 3), expected);
- }
- TEST(TokenizerTest, TruncateDiscardsOverageInsteadOfNotParsingPast) {
- std::string const input = "A.B.C.D";
- std::vector<std::string> const expected{"A", "B", "C"};
- EXPECT_THAT(tokenizer(".").max_outputs(3).truncate(true)(input),
- expected);
- }
- TEST(TokenizerTest, EmptyIsPlacedCorrectlyWhenEnabled) {
- std::string const input = "A..B.C";
- std::vector<std::string> const expected{"A", "", "B.C"};
- EXPECT_THAT(tokenizer(".").max_outputs(3).ignore_empty_tokens(false)(input),
- expected);
- }
- TEST(TokenizerTest, MaxSizeWithEmptyCanResultInTokenWithDividerPrefix) {
- std::string const input = "A.B..C";
- std::vector<std::string> const expected{"A", "B", ".C"};
- EXPECT_THAT(tokenizer(".").max_outputs(3).ignore_empty_tokens(false)(input),
- expected);
- }
- TEST(TokenizerTest, EscapableTokensStickTogether) {
- std::string const input = R"(A B\ C)";
- std::vector<std::string> const expected{"A", "B C"};
- EXPECT_THAT(tokenizer(" ").escapable(true)(input), expected);
- }
- TEST(TokenizerTest, CorrectlySplitsWhenEvenEscapes) {
- std::string const input = R"(A B\\ C)";
- std::vector<std::string> const expected{"A", R"(B\\)", "C"};
- EXPECT_THAT(tokenizer(" ").escapable(true)(input), expected);
- }
- TEST(TokenizerTest, QuotesAreDiscarded) {
- std::string const input = R"(A,"B",C)";
- std::vector<std::string> const expected{"A", "B", "C"};
- EXPECT_THAT(tokenizer(",", {'"'})(input), expected);
- }
- TEST(TokenizerTest, QuotedTokensStickTogether) {
- std::string const input = R"(A,"B,C")";
- std::vector<std::string> const expected{"A", "B,C"};
- EXPECT_THAT(tokenizer(",", {'"'})(input), expected);
- }
- TEST(TokenizerTest, QuotedTokensAreEscapable) {
- std::string const input = R"(A,"B\",C")";
- std::vector<std::string> const expected{"A", "B\",C"};
- EXPECT_THAT(tokenizer(",", {'"', "\\\""})(input), expected);
- }
- TEST(TokenizerTest, QuotesDontNeedToBeAtStartAndEnd) {
- std::string const input = R"(A,B",C")";
- std::vector<std::string> const expected{"A", "B,C"};
- EXPECT_THAT(tokenizer(",", {'"'})(input), expected);
- }
|