ソースを参照

Create the tokenize function in its entirety.
Write first test case on it.

Sam Jaffe 5 年 前
コミット
cb0a1b37ac

+ 43 - 0
include/string_utils/tokenizer.h

@@ -0,0 +1,43 @@
+//
+//  tokenizer.hpp
+//  string-utils
+//
+//  Created by Sam Jaffe on 10/8/20.
+//  Copyright © 2020 Sam Jaffe. All rights reserved.
+//
+
+#pragma once
+
+#include <string>
+#include <vector>
+
+namespace string_utils {
+
+class tokenizer {
+public:
+  static constexpr size_t const infinite_outputs{0};
+private:
+  std::string divider_;
+  size_t max_outputs_{infinite_outputs};
+  bool truncate_{true};
+  bool ignore_empty_tokens_{true};
+
+public:
+  tokenizer(std::string const & divider);
+    
+  tokenizer &max_outputs(size_t new_max_outputs);
+  tokenizer &truncate(bool new_truncate_overage);
+  tokenizer &ignore_empty_tokens(bool new_ignore_empty_tokens);
+  
+  std::vector<std::string> operator()(std::string const &input) const;
+  
+private:
+  size_t max_outputs() const;
+};
+
+inline auto split(std::string const & data, std::string const & on,
+                  size_t max = tokenizer::infinite_outputs) {
+  return tokenizer{on}.max_outputs(max)(data);
+}
+
+}

+ 48 - 0
src/tokenizer.cxx

@@ -0,0 +1,48 @@
+//
+//  tokenizer.cxx
+//  string-utils
+//
+//  Created by Sam Jaffe on 10/8/20.
+//  Copyright © 2020 Sam Jaffe. All rights reserved.
+//
+
+#include "string_utils/tokenizer.h"
+
+namespace string_utils {
+
+tokenizer::tokenizer(std::string const & divider) : divider_(divider) {}
+
+tokenizer &tokenizer::max_outputs(size_t new_max_outputs) {
+  max_outputs_ = new_max_outputs;
+  return *this;
+}
+
+tokenizer &tokenizer::truncate(bool new_truncate) {
+  truncate_ = new_truncate;
+  return *this;
+}
+
+tokenizer &tokenizer::ignore_empty_tokens(bool new_ignore_empty_tokens) {
+  ignore_empty_tokens_ = new_ignore_empty_tokens;
+  return *this;
+}
+
+std::vector<std::string> tokenizer::operator()(std::string const &input) const {
+  std::vector<std::string> rval;
+  size_t const max = max_outputs_ ? max_outputs_ - truncate_ : 0xFFFFFFFF;
+  size_t i = 0;
+  for (size_t n = input.find(divider_);
+       n != std::string::npos && rval.size() < max;
+       i = n + 1, n = input.find(divider_, i)) {
+    if (ignore_empty_tokens_ && i == n) {
+      continue;
+    }
+    rval.emplace_back(input.substr(i, n - i));
+  }
+  if (rval.size() < max_outputs_ || !max_outputs_) {
+    rval.emplace_back(input.substr(i));
+  }
+  return rval;
+}
+
+}

+ 136 - 0
string-utils.xcodeproj/project.pbxproj

@@ -8,6 +8,10 @@
 
 /* Begin PBXBuildFile section */
 		CD26686B252FF4E800B3E667 /* string_utils in Headers */ = {isa = PBXBuildFile; fileRef = CD26686A252FF4E100B3E667 /* string_utils */; settings = {ATTRIBUTES = (Public, ); }; };
+		CD26687E252FF62F00B3E667 /* tokenizer.cxx in Sources */ = {isa = PBXBuildFile; fileRef = CD26687C252FF62F00B3E667 /* tokenizer.cxx */; };
+		CD26688B252FFAAE00B3E667 /* libstring-utils.a in Frameworks */ = {isa = PBXBuildFile; fileRef = CD266862252FF4B600B3E667 /* libstring-utils.a */; };
+		CD2668B4252FFACB00B3E667 /* GoogleMock.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = CD266875252FF51F00B3E667 /* GoogleMock.framework */; };
+		CD2668B5252FFAD200B3E667 /* tokenizer_test.cxx in Sources */ = {isa = PBXBuildFile; fileRef = CD266880252FFA7E00B3E667 /* tokenizer_test.cxx */; };
 /* End PBXBuildFile section */
 
 /* Begin PBXContainerItemProxy section */
@@ -39,12 +43,23 @@
 			remoteGlobalIDString = 05818F901A685AEA0072A469;
 			remoteInfo = GoogleMockTests;
 		};
+		CD26688C252FFAAE00B3E667 /* PBXContainerItemProxy */ = {
+			isa = PBXContainerItemProxy;
+			containerPortal = CD26685A252FF4B600B3E667 /* Project object */;
+			proxyType = 1;
+			remoteGlobalIDString = CD266861252FF4B600B3E667;
+			remoteInfo = "string-utils";
+		};
 /* End PBXContainerItemProxy section */
 
 /* Begin PBXFileReference section */
 		CD266862252FF4B600B3E667 /* libstring-utils.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = "libstring-utils.a"; sourceTree = BUILT_PRODUCTS_DIR; };
 		CD26686A252FF4E100B3E667 /* string_utils */ = {isa = PBXFileReference; lastKnownFileType = folder; name = string_utils; path = include/string_utils; sourceTree = "<group>"; };
 		CD26686D252FF51F00B3E667 /* GoogleMock.xcodeproj */ = {isa = PBXFileReference; lastKnownFileType = "wrapper.pb-project"; name = GoogleMock.xcodeproj; path = "../../../gmock-xcode-master/GoogleMock.xcodeproj"; sourceTree = "<group>"; };
+		CD26687C252FF62F00B3E667 /* tokenizer.cxx */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = tokenizer.cxx; sourceTree = "<group>"; };
+		CD266880252FFA7E00B3E667 /* tokenizer_test.cxx */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = tokenizer_test.cxx; sourceTree = "<group>"; };
+		CD266886252FFAAE00B3E667 /* string_utils-test.xctest */ = {isa = PBXFileReference; explicitFileType = wrapper.cfbundle; includeInIndex = 0; path = "string_utils-test.xctest"; sourceTree = BUILT_PRODUCTS_DIR; };
+		CD26688A252FFAAE00B3E667 /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = "<group>"; };
 /* End PBXFileReference section */
 
 /* Begin PBXFrameworksBuildPhase section */
@@ -55,6 +70,15 @@
 			);
 			runOnlyForDeploymentPostprocessing = 0;
 		};
+		CD266883252FFAAE00B3E667 /* Frameworks */ = {
+			isa = PBXFrameworksBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				CD2668B4252FFACB00B3E667 /* GoogleMock.framework in Frameworks */,
+				CD26688B252FFAAE00B3E667 /* libstring-utils.a in Frameworks */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
 /* End PBXFrameworksBuildPhase section */
 
 /* Begin PBXGroup section */
@@ -65,7 +89,9 @@
 				CD26686A252FF4E100B3E667 /* string_utils */,
 				CD26686C252FF4F300B3E667 /* src */,
 				CD266869252FF4C100B3E667 /* test */,
+				CD266887252FFAAE00B3E667 /* string_utils-test */,
 				CD266863252FF4B600B3E667 /* Products */,
+				CD2668AF252FFACB00B3E667 /* Frameworks */,
 			);
 			sourceTree = "<group>";
 		};
@@ -73,6 +99,7 @@
 			isa = PBXGroup;
 			children = (
 				CD266862252FF4B600B3E667 /* libstring-utils.a */,
+				CD266886252FFAAE00B3E667 /* string_utils-test.xctest */,
 			);
 			name = Products;
 			sourceTree = "<group>";
@@ -80,6 +107,7 @@
 		CD266869252FF4C100B3E667 /* test */ = {
 			isa = PBXGroup;
 			children = (
+				CD266880252FFA7E00B3E667 /* tokenizer_test.cxx */,
 			);
 			path = test;
 			sourceTree = "<group>";
@@ -87,6 +115,7 @@
 		CD26686C252FF4F300B3E667 /* src */ = {
 			isa = PBXGroup;
 			children = (
+				CD26687C252FF62F00B3E667 /* tokenizer.cxx */,
 			);
 			path = src;
 			sourceTree = "<group>";
@@ -102,6 +131,21 @@
 			name = Products;
 			sourceTree = "<group>";
 		};
+		CD266887252FFAAE00B3E667 /* string_utils-test */ = {
+			isa = PBXGroup;
+			children = (
+				CD26688A252FFAAE00B3E667 /* Info.plist */,
+			);
+			path = "string_utils-test";
+			sourceTree = "<group>";
+		};
+		CD2668AF252FFACB00B3E667 /* Frameworks */ = {
+			isa = PBXGroup;
+			children = (
+			);
+			name = Frameworks;
+			sourceTree = "<group>";
+		};
 /* End PBXGroup section */
 
 /* Begin PBXHeadersBuildPhase section */
@@ -133,6 +177,24 @@
 			productReference = CD266862252FF4B600B3E667 /* libstring-utils.a */;
 			productType = "com.apple.product-type.library.static";
 		};
+		CD266885252FFAAE00B3E667 /* string_utils-test */ = {
+			isa = PBXNativeTarget;
+			buildConfigurationList = CD26688E252FFAAE00B3E667 /* Build configuration list for PBXNativeTarget "string_utils-test" */;
+			buildPhases = (
+				CD266882252FFAAE00B3E667 /* Sources */,
+				CD266883252FFAAE00B3E667 /* Frameworks */,
+				CD266884252FFAAE00B3E667 /* Resources */,
+			);
+			buildRules = (
+			);
+			dependencies = (
+				CD26688D252FFAAE00B3E667 /* PBXTargetDependency */,
+			);
+			name = "string_utils-test";
+			productName = "string_utils-test";
+			productReference = CD266886252FFAAE00B3E667 /* string_utils-test.xctest */;
+			productType = "com.apple.product-type.bundle.unit-test";
+		};
 /* End PBXNativeTarget section */
 
 /* Begin PBXProject section */
@@ -145,6 +207,9 @@
 					CD266861252FF4B600B3E667 = {
 						CreatedOnToolsVersion = 11.3.1;
 					};
+					CD266885252FFAAE00B3E667 = {
+						CreatedOnToolsVersion = 11.3.1;
+					};
 				};
 			};
 			buildConfigurationList = CD26685D252FF4B600B3E667 /* Build configuration list for PBXProject "string-utils" */;
@@ -167,6 +232,7 @@
 			projectRoot = "";
 			targets = (
 				CD266861252FF4B600B3E667 /* string-utils */,
+				CD266885252FFAAE00B3E667 /* string_utils-test */,
 			);
 		};
 /* End PBXProject section */
@@ -202,16 +268,43 @@
 		};
 /* End PBXReferenceProxy section */
 
+/* Begin PBXResourcesBuildPhase section */
+		CD266884252FFAAE00B3E667 /* Resources */ = {
+			isa = PBXResourcesBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+/* End PBXResourcesBuildPhase section */
+
 /* Begin PBXSourcesBuildPhase section */
 		CD26685F252FF4B600B3E667 /* Sources */ = {
 			isa = PBXSourcesBuildPhase;
 			buildActionMask = 2147483647;
 			files = (
+				CD26687E252FF62F00B3E667 /* tokenizer.cxx in Sources */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+		CD266882252FFAAE00B3E667 /* Sources */ = {
+			isa = PBXSourcesBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				CD2668B5252FFAD200B3E667 /* tokenizer_test.cxx in Sources */,
 			);
 			runOnlyForDeploymentPostprocessing = 0;
 		};
 /* End PBXSourcesBuildPhase section */
 
+/* Begin PBXTargetDependency section */
+		CD26688D252FFAAE00B3E667 /* PBXTargetDependency */ = {
+			isa = PBXTargetDependency;
+			target = CD266861252FF4B600B3E667 /* string-utils */;
+			targetProxy = CD26688C252FFAAE00B3E667 /* PBXContainerItemProxy */;
+		};
+/* End PBXTargetDependency section */
+
 /* Begin XCBuildConfiguration section */
 		CD266864252FF4B600B3E667 /* Debug */ = {
 			isa = XCBuildConfiguration;
@@ -268,6 +361,7 @@
 				MTL_FAST_MATH = YES;
 				ONLY_ACTIVE_ARCH = YES;
 				SDKROOT = macosx;
+				USER_HEADER_SEARCH_PATHS = include/;
 			};
 			name = Debug;
 		};
@@ -319,6 +413,7 @@
 				MTL_ENABLE_DEBUG_INFO = NO;
 				MTL_FAST_MATH = YES;
 				SDKROOT = macosx;
+				USER_HEADER_SEARCH_PATHS = include/;
 			};
 			name = Release;
 		};
@@ -342,6 +437,38 @@
 			};
 			name = Release;
 		};
+		CD26688F252FFAAE00B3E667 /* Debug */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				CODE_SIGN_STYLE = Automatic;
+				COMBINE_HIDPI_IMAGES = YES;
+				INFOPLIST_FILE = "string_utils-test/Info.plist";
+				LD_RUNPATH_SEARCH_PATHS = (
+					"$(inherited)",
+					"@executable_path/../Frameworks",
+					"@loader_path/../Frameworks",
+				);
+				PRODUCT_BUNDLE_IDENTIFIER = "leumasjaffe.string-utils-test";
+				PRODUCT_NAME = "$(TARGET_NAME)";
+			};
+			name = Debug;
+		};
+		CD266890252FFAAE00B3E667 /* Release */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				CODE_SIGN_STYLE = Automatic;
+				COMBINE_HIDPI_IMAGES = YES;
+				INFOPLIST_FILE = "string_utils-test/Info.plist";
+				LD_RUNPATH_SEARCH_PATHS = (
+					"$(inherited)",
+					"@executable_path/../Frameworks",
+					"@loader_path/../Frameworks",
+				);
+				PRODUCT_BUNDLE_IDENTIFIER = "leumasjaffe.string-utils-test";
+				PRODUCT_NAME = "$(TARGET_NAME)";
+			};
+			name = Release;
+		};
 /* End XCBuildConfiguration section */
 
 /* Begin XCConfigurationList section */
@@ -363,6 +490,15 @@
 			defaultConfigurationIsVisible = 0;
 			defaultConfigurationName = Release;
 		};
+		CD26688E252FFAAE00B3E667 /* Build configuration list for PBXNativeTarget "string_utils-test" */ = {
+			isa = XCConfigurationList;
+			buildConfigurations = (
+				CD26688F252FFAAE00B3E667 /* Debug */,
+				CD266890252FFAAE00B3E667 /* Release */,
+			);
+			defaultConfigurationIsVisible = 0;
+			defaultConfigurationName = Release;
+		};
 /* End XCConfigurationList section */
 	};
 	rootObject = CD26685A252FF4B600B3E667 /* Project object */;

+ 22 - 0
string_utils-test/Info.plist

@@ -0,0 +1,22 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+	<key>CFBundleDevelopmentRegion</key>
+	<string>$(DEVELOPMENT_LANGUAGE)</string>
+	<key>CFBundleExecutable</key>
+	<string>$(EXECUTABLE_NAME)</string>
+	<key>CFBundleIdentifier</key>
+	<string>$(PRODUCT_BUNDLE_IDENTIFIER)</string>
+	<key>CFBundleInfoDictionaryVersion</key>
+	<string>6.0</string>
+	<key>CFBundleName</key>
+	<string>$(PRODUCT_NAME)</string>
+	<key>CFBundlePackageType</key>
+	<string>$(PRODUCT_BUNDLE_PACKAGE_TYPE)</string>
+	<key>CFBundleShortVersionString</key>
+	<string>1.0</string>
+	<key>CFBundleVersion</key>
+	<string>1</string>
+</dict>
+</plist>

+ 20 - 0
test/tokenizer_test.cxx

@@ -0,0 +1,20 @@
+//
+//  tokenizer_test.cxx
+//  string-utils
+//
+//  Created by Sam Jaffe on 10/8/20.
+//  Copyright © 2020 Sam Jaffe. All rights reserved.
+//
+
+#include "string_utils/tokenizer.h"
+
+#include <gmock/gmock.h>
+#include <gtest/gtest.h>
+
+using namespace string_utils;
+
+TEST(TokenizerTest, SplitsStringOverToken) {
+  std::string const input = "A.B.C.D";
+  std::vector<std::string> const expected{"A", "B", "C", "D"};
+  ASSERT_THAT(split(input, "."), expected);
+}