|
|
@@ -25,3 +25,48 @@ TEST(TokenizerTest, SplitsStringOverToken) {
|
|
|
std::vector<std::string> const expected{"A", "B", "C", "D"};
|
|
|
EXPECT_THAT(split(input, "."), expected);
|
|
|
}
|
|
|
+
|
|
|
+TEST(TokenizerTest, SplitsStringUpToNTimes) {
|
|
|
+ std::string const input = "A.B.C.D";
|
|
|
+ std::vector<std::string> const expected{"A", "B", "C.D"};
|
|
|
+ EXPECT_THAT(split(input, ".", 3), expected);
|
|
|
+}
|
|
|
+
|
|
|
+TEST(TokenizerTest, IgnoresEmptyElementsAtStart) {
|
|
|
+ std::string const input = ".A.B.C";
|
|
|
+ std::vector<std::string> const expected{"A", "B", "C"};
|
|
|
+ EXPECT_THAT(split(input, ".", 3), expected);
|
|
|
+}
|
|
|
+
|
|
|
+TEST(TokenizerTest, IgnoresEmptyElements) {
|
|
|
+ std::string const input = "A..B.C";
|
|
|
+ std::vector<std::string> const expected{"A", "B", "C"};
|
|
|
+ EXPECT_THAT(split(input, ".", 3), expected);
|
|
|
+}
|
|
|
+
|
|
|
+TEST(TokenizerTest, IgnoresEmptyElementsOnEnd) {
|
|
|
+ std::string const input = "A.B..C";
|
|
|
+ std::vector<std::string> const expected{"A", "B", "C"};
|
|
|
+ EXPECT_THAT(split(input, ".", 3), expected);
|
|
|
+}
|
|
|
+
|
|
|
+TEST(TokenizerTest, TruncateDiscardsOverageInsteadOfNotParsingPast) {
|
|
|
+ std::string const input = "A.B.C.D";
|
|
|
+ std::vector<std::string> const expected{"A", "B", "C"};
|
|
|
+ EXPECT_THAT(tokenizer(".").max_outputs(3).truncate(true)(input),
|
|
|
+ expected);
|
|
|
+}
|
|
|
+
|
|
|
+TEST(TokenizerTest, EmptyIsPlacedCorrectlyWhenEnabled) {
|
|
|
+ std::string const input = "A..B.C";
|
|
|
+ std::vector<std::string> const expected{"A", "", "B.C"};
|
|
|
+ EXPECT_THAT(tokenizer(".").max_outputs(3).ignore_empty_tokens(false)(input),
|
|
|
+ expected);
|
|
|
+}
|
|
|
+
|
|
|
+TEST(TokenizerTest, MaxSizeWithEmptyCanResultInTokenWithDividerPrefix) {
|
|
|
+ std::string const input = "A.B..C";
|
|
|
+ std::vector<std::string> const expected{"A", "B", ".C"};
|
|
|
+ EXPECT_THAT(tokenizer(".").max_outputs(3).ignore_empty_tokens(false)(input),
|
|
|
+ expected);
|
|
|
+}
|