|
@@ -14,92 +14,92 @@ using namespace string_utils;
|
|
|
|
|
|
|
|
TEST(TokenizerTest, SplitsStringOverToken) {
|
|
TEST(TokenizerTest, SplitsStringOverToken) {
|
|
|
std::string const input = "A.B.C.D";
|
|
std::string const input = "A.B.C.D";
|
|
|
- std::vector<std::string> const expected{"A", "B", "C", "D"};
|
|
|
|
|
|
|
+ std::vector<std::string_view> const expected{"A", "B", "C", "D"};
|
|
|
EXPECT_THAT(split(input, "."), expected);
|
|
EXPECT_THAT(split(input, "."), expected);
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
TEST(TokenizerTest, SplitsStringUpToNTimes) {
|
|
TEST(TokenizerTest, SplitsStringUpToNTimes) {
|
|
|
std::string const input = "A.B.C.D";
|
|
std::string const input = "A.B.C.D";
|
|
|
- std::vector<std::string> const expected{"A", "B", "C.D"};
|
|
|
|
|
|
|
+ std::vector<std::string_view> const expected{"A", "B", "C.D"};
|
|
|
EXPECT_THAT(split(input, ".", 3), expected);
|
|
EXPECT_THAT(split(input, ".", 3), expected);
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
TEST(TokenizerTest, IgnoresEmptyElementsAtStart) {
|
|
TEST(TokenizerTest, IgnoresEmptyElementsAtStart) {
|
|
|
std::string const input = ".A.B.C";
|
|
std::string const input = ".A.B.C";
|
|
|
- std::vector<std::string> const expected{"A", "B", "C"};
|
|
|
|
|
|
|
+ std::vector<std::string_view> const expected{"A", "B", "C"};
|
|
|
EXPECT_THAT(split(input, ".", 3), expected);
|
|
EXPECT_THAT(split(input, ".", 3), expected);
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
TEST(TokenizerTest, IgnoresEmptyElements) {
|
|
TEST(TokenizerTest, IgnoresEmptyElements) {
|
|
|
std::string const input = "A..B.C";
|
|
std::string const input = "A..B.C";
|
|
|
- std::vector<std::string> const expected{"A", "B", "C"};
|
|
|
|
|
|
|
+ std::vector<std::string_view> const expected{"A", "B", "C"};
|
|
|
EXPECT_THAT(split(input, ".", 3), expected);
|
|
EXPECT_THAT(split(input, ".", 3), expected);
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
TEST(TokenizerTest, IgnoresEmptyElementsOnEnd) {
|
|
TEST(TokenizerTest, IgnoresEmptyElementsOnEnd) {
|
|
|
std::string const input = "A.B..C";
|
|
std::string const input = "A.B..C";
|
|
|
- std::vector<std::string> const expected{"A", "B", "C"};
|
|
|
|
|
|
|
+ std::vector<std::string_view> const expected{"A", "B", "C"};
|
|
|
EXPECT_THAT(split(input, ".", 3), expected);
|
|
EXPECT_THAT(split(input, ".", 3), expected);
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
TEST(TokenizerTest, TruncateDiscardsOverageInsteadOfNotParsingPast) {
|
|
TEST(TokenizerTest, TruncateDiscardsOverageInsteadOfNotParsingPast) {
|
|
|
std::string const input = "A.B.C.D";
|
|
std::string const input = "A.B.C.D";
|
|
|
- std::vector<std::string> const expected{"A", "B", "C"};
|
|
|
|
|
- EXPECT_THAT(tokenizer(".").max_outputs(3).truncate(true)(input), expected);
|
|
|
|
|
|
|
+ std::vector<std::string_view> const expected{"A", "B", "C"};
|
|
|
|
|
+ EXPECT_THAT(Tokenizer(".").max_outputs(3).truncate(true)(input), expected);
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
TEST(TokenizerTest, EmptyIsPlacedCorrectlyWhenEnabled) {
|
|
TEST(TokenizerTest, EmptyIsPlacedCorrectlyWhenEnabled) {
|
|
|
std::string const input = "A..B.C";
|
|
std::string const input = "A..B.C";
|
|
|
- std::vector<std::string> const expected{"A", "", "B.C"};
|
|
|
|
|
- EXPECT_THAT(tokenizer(".").max_outputs(3).ignore_empty_tokens(false)(input),
|
|
|
|
|
|
|
+ std::vector<std::string_view> const expected{"A", "", "B.C"};
|
|
|
|
|
+ EXPECT_THAT(Tokenizer(".").max_outputs(3).ignore_empty_tokens(false)(input),
|
|
|
expected);
|
|
expected);
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
TEST(TokenizerTest, MaxSizeWithEmptyCanResultInTokenWithDividerPrefix) {
|
|
TEST(TokenizerTest, MaxSizeWithEmptyCanResultInTokenWithDividerPrefix) {
|
|
|
std::string const input = "A.B..C";
|
|
std::string const input = "A.B..C";
|
|
|
- std::vector<std::string> const expected{"A", "B", ".C"};
|
|
|
|
|
- EXPECT_THAT(tokenizer(".").max_outputs(3).ignore_empty_tokens(false)(input),
|
|
|
|
|
|
|
+ std::vector<std::string_view> const expected{"A", "B", ".C"};
|
|
|
|
|
+ EXPECT_THAT(Tokenizer(".").max_outputs(3).ignore_empty_tokens(false)(input),
|
|
|
expected);
|
|
expected);
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
TEST(TokenizerTest, EscapableTokensStickTogether) {
|
|
TEST(TokenizerTest, EscapableTokensStickTogether) {
|
|
|
std::string const input = R"(A B\ C)";
|
|
std::string const input = R"(A B\ C)";
|
|
|
std::vector<std::string> const expected{"A", "B C"};
|
|
std::vector<std::string> const expected{"A", "B C"};
|
|
|
- EXPECT_THAT(tokenizer(" ").escapable(true)(input), expected);
|
|
|
|
|
|
|
+ EXPECT_THAT(EscapedTokenizer(" ")(input), expected);
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
TEST(TokenizerTest, CorrectlySplitsWhenEvenEscapes) {
|
|
TEST(TokenizerTest, CorrectlySplitsWhenEvenEscapes) {
|
|
|
std::string const input = R"(A B\\ C)";
|
|
std::string const input = R"(A B\\ C)";
|
|
|
std::vector<std::string> const expected{"A", R"(B\\)", "C"};
|
|
std::vector<std::string> const expected{"A", R"(B\\)", "C"};
|
|
|
- EXPECT_THAT(tokenizer(" ").escapable(true)(input), expected);
|
|
|
|
|
|
|
+ EXPECT_THAT(EscapedTokenizer(" ")(input), expected);
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
TEST(TokenizerTest, QuotesAreDiscarded) {
|
|
TEST(TokenizerTest, QuotesAreDiscarded) {
|
|
|
std::string const input = R"(A,"B",C)";
|
|
std::string const input = R"(A,"B",C)";
|
|
|
std::vector<std::string> const expected{"A", "B", "C"};
|
|
std::vector<std::string> const expected{"A", "B", "C"};
|
|
|
- EXPECT_THAT(tokenizer(",", {'"'})(input), expected);
|
|
|
|
|
|
|
+ EXPECT_THAT(EscapedTokenizer(",", {'"'})(input), expected);
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
TEST(TokenizerTest, QuotedTokensStickTogether) {
|
|
TEST(TokenizerTest, QuotedTokensStickTogether) {
|
|
|
std::string const input = R"(A,"B,C")";
|
|
std::string const input = R"(A,"B,C")";
|
|
|
std::vector<std::string> const expected{"A", "B,C"};
|
|
std::vector<std::string> const expected{"A", "B,C"};
|
|
|
- EXPECT_THAT(tokenizer(",", {'"'})(input), expected);
|
|
|
|
|
|
|
+ EXPECT_THAT(EscapedTokenizer(",", {'"'})(input), expected);
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
TEST(TokenizerTest, QuotedTokensAreEscapable) {
|
|
TEST(TokenizerTest, QuotedTokensAreEscapable) {
|
|
|
std::string const input = R"(A,"B\",C")";
|
|
std::string const input = R"(A,"B\",C")";
|
|
|
std::vector<std::string> const expected{"A", "B\",C"};
|
|
std::vector<std::string> const expected{"A", "B\",C"};
|
|
|
- EXPECT_THAT(tokenizer(",", {'"', "\\\""})(input), expected);
|
|
|
|
|
|
|
+ EXPECT_THAT(EscapedTokenizer(",", {'"', "\\\""})(input), expected);
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
TEST(TokenizerTest, QuoteTokenLiteralIsApplicable) {
|
|
TEST(TokenizerTest, QuoteTokenLiteralIsApplicable) {
|
|
|
std::string const input = R"(A,"B"",C")";
|
|
std::string const input = R"(A,"B"",C")";
|
|
|
std::vector<std::string> const expected{"A", "B\",C"};
|
|
std::vector<std::string> const expected{"A", "B\",C"};
|
|
|
- EXPECT_THAT(tokenizer(",", {'"', "\"\""})(input), expected);
|
|
|
|
|
|
|
+ EXPECT_THAT(EscapedTokenizer(",", {'"', "\"\""})(input), expected);
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
TEST(TokenizerTest, QuotesDontNeedToBeAtStartAndEnd) {
|
|
TEST(TokenizerTest, QuotesDontNeedToBeAtStartAndEnd) {
|
|
|
std::string const input = R"(A,B",C")";
|
|
std::string const input = R"(A,B",C")";
|
|
|
std::vector<std::string> const expected{"A", "B,C"};
|
|
std::vector<std::string> const expected{"A", "B,C"};
|
|
|
- EXPECT_THAT(tokenizer(",", {'"'})(input), expected);
|
|
|
|
|
|
|
+ EXPECT_THAT(EscapedTokenizer(",", {'"'})(input), expected);
|
|
|
}
|
|
}
|