| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384 |
- //
- // tokenizer_test.cxx
- // string-utils
- //
- // Created by Sam Jaffe on 10/8/20.
- // Copyright © 2020 Sam Jaffe. All rights reserved.
- //
- #include "string_utils/tokenizer.h"
- #include <gmock/gmock.h>
- #include <gtest/gtest.h>
- using namespace string_utils;
- #if XCODE_UNIT_TEST
- // This is a hack to allow XCode to properly display failures when running
- // unit tests.
- #undef EXPECT_THAT
- #define EXPECT_THAT ASSERT_THAT
- #endif
- TEST(TokenizerTest, SplitsStringOverToken) {
- std::string const input = "A.B.C.D";
- std::vector<std::string> const expected{"A", "B", "C", "D"};
- EXPECT_THAT(split(input, "."), expected);
- }
- TEST(TokenizerTest, SplitsStringUpToNTimes) {
- std::string const input = "A.B.C.D";
- std::vector<std::string> const expected{"A", "B", "C.D"};
- EXPECT_THAT(split(input, ".", 3), expected);
- }
- TEST(TokenizerTest, IgnoresEmptyElementsAtStart) {
- std::string const input = ".A.B.C";
- std::vector<std::string> const expected{"A", "B", "C"};
- EXPECT_THAT(split(input, ".", 3), expected);
- }
- TEST(TokenizerTest, IgnoresEmptyElements) {
- std::string const input = "A..B.C";
- std::vector<std::string> const expected{"A", "B", "C"};
- EXPECT_THAT(split(input, ".", 3), expected);
- }
- TEST(TokenizerTest, IgnoresEmptyElementsOnEnd) {
- std::string const input = "A.B..C";
- std::vector<std::string> const expected{"A", "B", "C"};
- EXPECT_THAT(split(input, ".", 3), expected);
- }
- TEST(TokenizerTest, TruncateDiscardsOverageInsteadOfNotParsingPast) {
- std::string const input = "A.B.C.D";
- std::vector<std::string> const expected{"A", "B", "C"};
- EXPECT_THAT(tokenizer(".").max_outputs(3).truncate(true)(input),
- expected);
- }
- TEST(TokenizerTest, EmptyIsPlacedCorrectlyWhenEnabled) {
- std::string const input = "A..B.C";
- std::vector<std::string> const expected{"A", "", "B.C"};
- EXPECT_THAT(tokenizer(".").max_outputs(3).ignore_empty_tokens(false)(input),
- expected);
- }
- TEST(TokenizerTest, MaxSizeWithEmptyCanResultInTokenWithDividerPrefix) {
- std::string const input = "A.B..C";
- std::vector<std::string> const expected{"A", "B", ".C"};
- EXPECT_THAT(tokenizer(".").max_outputs(3).ignore_empty_tokens(false)(input),
- expected);
- }
- TEST(TokenizerTest, EscapableTokensStickTogether) {
- std::string const input = R"(A.B\.C)";
- std::vector<std::string> const expected{"A", "B.C"};
- EXPECT_THAT(tokenizer(".").escapable(true)(input), expected);
- }
- TEST(TokenizerTest, CorrectlySplitsWhenEvenEscapes) {
- std::string const input = R"(A.B\\.C)";
- std::vector<std::string> const expected{"A", R"(B\\)", "C"};
- EXPECT_THAT(tokenizer(".").escapable(true)(input), expected);
- }
|