|
|
@@ -0,0 +1,89 @@
|
|
|
+#pragma once
|
|
|
+
|
|
|
+#include <regex>
|
|
|
+#include <unordered_map>
|
|
|
+
|
|
|
+#include <jvalidate/_macro.h>
|
|
|
+
|
|
|
+#if JVALIDATE_HAS_ICU
|
|
|
+#include <unicode/regex.h>
|
|
|
+#include <unicode/ustring.h>
|
|
|
+#include <unicode/utypes.h>
|
|
|
+#endif
|
|
|
+
|
|
|
+namespace jvalidate {
|
|
|
+/**
|
|
|
+ * @brief An implementation of a regular expression "engine", for use with
|
|
|
+ * constraints like "pattern" and "patternProperties".
|
|
|
+ * Uses std::regex as its underlying implementation.
|
|
|
+ *
|
|
|
+ * While being std::regex means that it is the most sensible choice for a
|
|
|
+ * default RegexEngine, the performance of std::regex is generally the worst
|
|
|
+ * among C++ regex utilities, and it struggles to compile several patterns.
|
|
|
+ * See https://stackoverflow.com/questions/70583395/ for an explaination.
|
|
|
+ *
|
|
|
+ * If you need to use complicated patterns in your json schema, provide a
|
|
|
+ * RegexEngine compatible wrapper for a different library, such as re2.
|
|
|
+ */
|
|
|
+class StdRegexEngine {
|
|
|
+private:
|
|
|
+ std::unordered_map<std::string, std::regex> cache_;
|
|
|
+
|
|
|
+public:
|
|
|
+ static bool is_regex(std::string_view regex) {
|
|
|
+ try {
|
|
|
+ [[maybe_unused]] std::regex _{std::string(regex)};
|
|
|
+ return true;
|
|
|
+ } catch (std::exception const &) { return false; }
|
|
|
+ }
|
|
|
+
|
|
|
+ bool search(std::string const & regex, std::string const & text) {
|
|
|
+ auto const & re = cache_.try_emplace(regex, regex).first->second;
|
|
|
+ return std::regex_search(text, re);
|
|
|
+ }
|
|
|
+};
|
|
|
+}
|
|
|
+
|
|
|
+#if JVALIDATE_HAS_ICU
|
|
|
+namespace jvalidate {
|
|
|
+class ICURegexEngine {
|
|
|
+private:
|
|
|
+ std::unordered_map<std::string, std::unique_ptr<icu::RegexPattern>> cache_;
|
|
|
+
|
|
|
+public:
|
|
|
+ static bool is_regex(std::string_view regex) {
|
|
|
+ icu::UnicodeString const ucs = icu::UnicodeString::fromUTF8(icu::StringPiece(regex));
|
|
|
+
|
|
|
+ UErrorCode status = U_ZERO_ERROR;
|
|
|
+ UParseError pe;
|
|
|
+ std::unique_ptr<icu::RegexPattern> tmp(icu::RegexPattern::compile(ucs, pe, status));
|
|
|
+
|
|
|
+ return not U_FAILURE(status);
|
|
|
+ }
|
|
|
+
|
|
|
+ bool search(std::string const & regex, std::string const & text) {
|
|
|
+ auto [it, created] = cache_.try_emplace(regex, nullptr);
|
|
|
+ if (created) {
|
|
|
+ icu::UnicodeString const ucs = icu::UnicodeString::fromUTF8(icu::StringPiece(regex));
|
|
|
+
|
|
|
+ UErrorCode status = U_ZERO_ERROR;
|
|
|
+ UParseError pe;
|
|
|
+ it->second.reset(icu::RegexPattern::compile(ucs, pe, status));
|
|
|
+
|
|
|
+ if (U_FAILURE(status)) {
|
|
|
+ // TODO: Provide info?
|
|
|
+ return false;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ UErrorCode status = U_ZERO_ERROR;
|
|
|
+ icu::UnicodeString const ucs = icu::UnicodeString::fromUTF8(icu::StringPiece(text));
|
|
|
+ std::unique_ptr<icu::RegexMatcher> matcher(it->second->matcher(ucs, status));
|
|
|
+ if (U_FAILURE(status)) {
|
|
|
+ return false;
|
|
|
+ }
|
|
|
+ return matcher->find(status);
|
|
|
+ }
|
|
|
+};
|
|
|
+}
|
|
|
+#endif
|