Pārlūkot izejas kodu

feat: add ICU Regex Engine option

Sam Jaffe 7 mēneši atpakaļ
vecāks
revīzija
be8d8dcf1d
3 mainītis faili ar 92 papildinājumiem un 36 dzēšanām
  1. 1 1
      Makefile
  2. 89 0
      include/jvalidate/regex.h
  3. 2 35
      include/jvalidate/validator.h

+ 1 - 1
Makefile

@@ -13,7 +13,7 @@ CXX_FLAGS := -Wall -Wextra -Werror -std=c++20 \
 	     -isystem include/ -I/opt/homebrew/opt/icu4c/include \
 	     -DJVALIDATE_USE_EXCEPTIONS -DJVALIDATE_LOAD_FAILURE_AS_FALSE_SCHEMA
 
-LD_FLAGS := -L/opt/homebrew/lib -L/opt/homebrew/opt/icu4c/lib -licuuc -lada-idna
+LD_FLAGS := -L/opt/homebrew/lib -L/opt/homebrew/opt/icu4c/lib -licuuc -licui18n -lada-idna
 
 TEST_DIR := tests/
 INCLUDE_DIR := include/

+ 89 - 0
include/jvalidate/regex.h

@@ -0,0 +1,89 @@
+#pragma once
+
+#include <regex>
+#include <unordered_map>
+
+#include <jvalidate/_macro.h>
+
+#if JVALIDATE_HAS_ICU
+#include <unicode/regex.h>
+#include <unicode/ustring.h>
+#include <unicode/utypes.h>
+#endif
+
+namespace jvalidate {
+/**
+ * @brief An implementation of a regular expression "engine", for use with
+ * constraints like "pattern" and "patternProperties".
+ * Uses std::regex as its underlying implementation.
+ *
+ * While being std::regex means that it is the most sensible choice for a
+ * default RegexEngine, the performance of std::regex is generally the worst
+ * among C++ regex utilities, and it struggles to compile several patterns.
+ * See https://stackoverflow.com/questions/70583395/ for an explaination.
+ *
+ * If you need to use complicated patterns in your json schema, provide a
+ * RegexEngine compatible wrapper for a different library, such as re2.
+ */
+class StdRegexEngine {
+private:
+  std::unordered_map<std::string, std::regex> cache_;
+
+public:
+  static bool is_regex(std::string_view regex) {
+    try {
+      [[maybe_unused]] std::regex _{std::string(regex)};
+      return true;
+    } catch (std::exception const &) { return false; }
+  }
+
+  bool search(std::string const & regex, std::string const & text) {
+    auto const & re = cache_.try_emplace(regex, regex).first->second;
+    return std::regex_search(text, re);
+  }
+};
+}
+
+#if JVALIDATE_HAS_ICU
+namespace jvalidate {
+class ICURegexEngine {
+private:
+  std::unordered_map<std::string, std::unique_ptr<icu::RegexPattern>> cache_;
+
+public:
+  static bool is_regex(std::string_view regex) {
+    icu::UnicodeString const ucs = icu::UnicodeString::fromUTF8(icu::StringPiece(regex));
+
+    UErrorCode status = U_ZERO_ERROR;
+    UParseError pe;
+    std::unique_ptr<icu::RegexPattern> tmp(icu::RegexPattern::compile(ucs, pe, status));
+
+    return not U_FAILURE(status);
+  }
+
+  bool search(std::string const & regex, std::string const & text) {
+    auto [it, created] = cache_.try_emplace(regex, nullptr);
+    if (created) {
+      icu::UnicodeString const ucs = icu::UnicodeString::fromUTF8(icu::StringPiece(regex));
+
+      UErrorCode status = U_ZERO_ERROR;
+      UParseError pe;
+      it->second.reset(icu::RegexPattern::compile(ucs, pe, status));
+
+      if (U_FAILURE(status)) {
+        // TODO: Provide info?
+        return false;
+      }
+    }
+
+    UErrorCode status = U_ZERO_ERROR;
+    icu::UnicodeString const ucs = icu::UnicodeString::fromUTF8(icu::StringPiece(text));
+    std::unique_ptr<icu::RegexMatcher> matcher(it->second->matcher(ucs, status));
+    if (U_FAILURE(status)) {
+      return false;
+    }
+    return matcher->find(status);
+  }
+};
+}
+#endif

+ 2 - 35
include/jvalidate/validator.h

@@ -1,45 +1,12 @@
 #pragma once
 
-#include <regex>
-#include <unordered_map>
-
 #include <jvalidate/forward.h>
+#include <jvalidate/regex.h>
 #include <jvalidate/status.h>
 #include <jvalidate/validation_config.h>
 #include <jvalidate/validation_visitor.h>
 
 namespace jvalidate::detail {
-/**
- * @brief An implementation of a regular expression "engine", for use with
- * constraints like "pattern" and "patternProperties".
- * Uses std::regex as its underlying implementation.
- *
- * While being std::regex means that it is the most sensible choice for a
- * default RegexEngine, the performance of std::regex is generally the worst
- * among C++ regex utilities, and it struggles to compile several patterns.
- * See https://stackoverflow.com/questions/70583395/ for an explaination.
- *
- * If you need to use complicated patterns in your json schema, provide a
- * RegexEngine compatible wrapper for a different library, such as re2.
- */
-class StdRegexEngine {
-private:
-  std::unordered_map<std::string, std::regex> cache_;
-
-public:
-  static bool is_regex(std::string_view regex) {
-    try {
-      [[maybe_unused]] std::regex _{std::string(regex)};
-      return true;
-    } catch (std::exception const &) { return false; }
-  }
-
-  bool search(std::string const & regex, std::string const & text) {
-    auto const & re = cache_.try_emplace(regex, regex).first->second;
-    return std::regex_search(text, re);
-  }
-};
-
 /**
  * @brief An implementation of an "Extension Constraint Visitor" plugin that
  * does nothing.
@@ -54,7 +21,7 @@ namespace jvalidate {
  *
  * @tparam RE A type that can be used to solve regular expressions
  */
-template <RegexEngine RE = detail::StdRegexEngine,
+template <RegexEngine RE = JVALIDATE_IIF(JVALIDATE_HAS_ICU, ICURegexEngine, StdRegexEngine),
           typename ExtensionVisitor = detail::StubExtensionVisitor>
 class Validator {
 private: