| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576 |
- //
- // tokenizer_test.cxx
- // string-utils
- //
- // Created by Sam Jaffe on 10/8/20.
- // Copyright © 2020 Sam Jaffe. All rights reserved.
- //
- #include "string_utils/tokenizer.h"
- #include "xcode_gtest_helper.h"
- using namespace string_utils;
- TEST(TokenizerTest, SplitsStringOverToken) {
- std::string const input = "A.B.C.D";
- std::vector<std::string> const expected{"A", "B", "C", "D"};
- EXPECT_THAT(split(input, "."), expected);
- }
- TEST(TokenizerTest, SplitsStringUpToNTimes) {
- std::string const input = "A.B.C.D";
- std::vector<std::string> const expected{"A", "B", "C.D"};
- EXPECT_THAT(split(input, ".", 3), expected);
- }
- TEST(TokenizerTest, IgnoresEmptyElementsAtStart) {
- std::string const input = ".A.B.C";
- std::vector<std::string> const expected{"A", "B", "C"};
- EXPECT_THAT(split(input, ".", 3), expected);
- }
- TEST(TokenizerTest, IgnoresEmptyElements) {
- std::string const input = "A..B.C";
- std::vector<std::string> const expected{"A", "B", "C"};
- EXPECT_THAT(split(input, ".", 3), expected);
- }
- TEST(TokenizerTest, IgnoresEmptyElementsOnEnd) {
- std::string const input = "A.B..C";
- std::vector<std::string> const expected{"A", "B", "C"};
- EXPECT_THAT(split(input, ".", 3), expected);
- }
- TEST(TokenizerTest, TruncateDiscardsOverageInsteadOfNotParsingPast) {
- std::string const input = "A.B.C.D";
- std::vector<std::string> const expected{"A", "B", "C"};
- EXPECT_THAT(tokenizer(".").max_outputs(3).truncate(true)(input),
- expected);
- }
- TEST(TokenizerTest, EmptyIsPlacedCorrectlyWhenEnabled) {
- std::string const input = "A..B.C";
- std::vector<std::string> const expected{"A", "", "B.C"};
- EXPECT_THAT(tokenizer(".").max_outputs(3).ignore_empty_tokens(false)(input),
- expected);
- }
- TEST(TokenizerTest, MaxSizeWithEmptyCanResultInTokenWithDividerPrefix) {
- std::string const input = "A.B..C";
- std::vector<std::string> const expected{"A", "B", ".C"};
- EXPECT_THAT(tokenizer(".").max_outputs(3).ignore_empty_tokens(false)(input),
- expected);
- }
- TEST(TokenizerTest, EscapableTokensStickTogether) {
- std::string const input = R"(A.B\.C)";
- std::vector<std::string> const expected{"A", "B.C"};
- EXPECT_THAT(tokenizer(".").escapable(true)(input), expected);
- }
- TEST(TokenizerTest, CorrectlySplitsWhenEvenEscapes) {
- std::string const input = R"(A.B\\.C)";
- std::vector<std::string> const expected{"A", R"(B\\)", "C"};
- EXPECT_THAT(tokenizer(".").escapable(true)(input), expected);
- }
|