Sfoglia il codice sorgente

Merge branch 'master' into feat/format-matcher

# Conflicts:
#	Makefile
#	include/jvalidate/_config.h
#	include/jvalidate/detail/string.h
#	include/jvalidate/forward.h
Sam Jaffe 2 settimane fa
parent
commit
4db4a02747

+ 39 - 4
include/jvalidate/detail/string.h

@@ -7,9 +7,13 @@
 
 #include <cstring>
 #include <memory>
-#include <string>
 #include <string_view>
 
+#if JVALIDATE_HAS_ICU
+#include <unicode/brkiter.h>
+#include <unicode/unistr.h>
+#endif
+
 #if JVALIDATE_HAS_IDNA
 #include <ada/idna/unicode_transcoding.h>
 #endif
@@ -24,10 +28,10 @@ inline std::string_view to_u8(std::string_view arg) { return arg; }
 inline std::u32string_view to_u32(std::u32string_view arg) { return arg; }
 }
 
-#if JVALIDATE_HAS_IDNA
 namespace jvalidate::detail {
+#if JVALIDATE_HAS_IDNA
 /**
- * @brief Calclates the string-length of the argument, treating multi-byte
+ * @brief Calculates the string-length of the argument, treating multi-byte
  * characters as their individual bytes (as if the string was a std::string).
  *
  * @param arg A string encoded in UTF32
@@ -40,7 +44,7 @@ inline size_t length_u8(std::u32string_view arg) {
 }
 
 /**
- * @brief Calclates the string-length of the argument, treating multi-byte
+ * @brief Calculates the string-length of the argument, treating multi-byte
  * characters and unicode graphemes as single characters (which std::string
  * cannot do).
  *
@@ -64,5 +68,36 @@ inline std::u32string to_u32(std::string_view str) {
   size_t bytes = ada::idna::utf8_to_utf32(str.data(), str.length(), data.get());
   return std::u32string(data.get(), data.get() + bytes);
 }
+#elif JVALIDATE_HAS_ICU
+inline size_t length_u32(std::string_view arg) {
+  icu::UnicodeString ucs = icu::UnicodeString::fromUTF8(icu::StringPiece(arg));
+  return ucs.countChar32();
 }
 #endif
+
+#if JVALIDATE_HAS_IDNA || JVALIDATE_HAS_ICU
+/**
+ * @brief A proxy for jvalidate::detail::length_u32. This method is provided
+ * so that it is possible to use maxLength and minLength constraints even when
+ * building without IDNA or ICU in the toolchain.
+ *
+ * @param arg Any UTF8 compatible string (including a standard ASCII string)
+ *
+ * @returns A number no greater than arg.length(), depending on the number of
+ * graphemes/codepoints in the string.
+ */
+inline size_t length(std::string_view arg) { return length_u32(arg); }
+#else
+/**
+ * @brief Calculates the string-length of the argument, without attempting to
+ * parse out graphemes. This method is provided so that it is possible to use
+ * maxLength and minLength constraints even when building without IDNA or ICU
+ * in the toolchain.
+ *
+ * @param arg Any UTF8 compatible string (including a standard ASCII string)
+ *
+ * @returns arg.length()
+ */
+inline size_t length(std::string_view arg) { return arg.length(); }
+#endif
+}

+ 2 - 2
include/jvalidate/validation_visitor.h

@@ -270,7 +270,7 @@ public:
   Status visit(constraint::MaxLengthConstraint const & cons, Adapter auto const & document) const {
     NOOP_UNLESS_TYPE(String);
     std::string const str = document.as_string();
-    if (int64_t len = detail::length_u32(str); len > cons.value) {
+    if (int64_t len = detail::length(str); len > cons.value) {
       return result(Status::Reject, "string of length ", len, " is >", cons.value);
     } else {
       return result(Status::Accept, "string of length ", len, " is <=", cons.value);
@@ -280,7 +280,7 @@ public:
   Status visit(constraint::MinLengthConstraint const & cons, Adapter auto const & document) const {
     NOOP_UNLESS_TYPE(String);
     std::string const str = document.as_string();
-    if (int64_t len = detail::length_u32(str); len < cons.value) {
+    if (int64_t len = detail::length(str); len < cons.value) {
       return result(Status::Reject, "string of length ", len, " is <", cons.value);
     } else {
       return result(Status::Accept, "string of length ", len, " is >=", cons.value);