| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293 |
- #pragma once
- #include <regex>
- #include <unordered_map>
- #include <jvalidate/_macro.h>
- #if JVALIDATE_HAS_ICU
- #include <unicode/regex.h>
- #include <unicode/ustring.h>
- #include <unicode/utypes.h>
- #endif
- namespace jvalidate {
- /**
- * @brief An implementation of a regular expression "engine", for use with
- * constraints like "pattern" and "patternProperties".
- * Uses std::regex as its underlying implementation.
- *
- * While being std::regex means that it is the most sensible choice for a
- * default RegexEngine, the performance of std::regex is generally the worst
- * among C++ regex utilities, and it struggles to compile several patterns.
- * See https://stackoverflow.com/questions/70583395/ for an explaination.
- *
- * If you need to use complicated patterns in your json schema, provide a
- * RegexEngine compatible wrapper for a different library, such as re2.
- */
- class StdRegexEngine {
- private:
- std::unordered_map<std::string, std::regex> cache_;
- public:
- static std::string_view engine_name() { return "std::regex[ECMAScript]"; }
- static bool is_regex(std::string_view regex) {
- try {
- [[maybe_unused]] std::regex _{std::string(regex)};
- return true;
- } catch (std::exception const &) { return false; }
- }
- bool search(std::string const & regex, std::string const & text) {
- auto const & re = cache_.try_emplace(regex, regex).first->second;
- return std::regex_search(text, re);
- }
- };
- }
- #if JVALIDATE_HAS_ICU
- namespace jvalidate {
- class ICURegexEngine {
- private:
- std::unordered_map<std::string, std::unique_ptr<icu::RegexPattern>> cache_;
- public:
- static std::string_view engine_name() { return "icu::RegexPattern"; }
- static bool is_regex(std::string_view regex) {
- icu::UnicodeString const ucs = icu::UnicodeString::fromUTF8(icu::StringPiece(regex));
- UErrorCode status = U_ZERO_ERROR;
- UParseError pe;
- std::unique_ptr<icu::RegexPattern> tmp(icu::RegexPattern::compile(ucs, pe, status));
- return not U_FAILURE(status);
- }
- bool search(std::string const & regex, std::string const & text) {
- auto [it, created] = cache_.try_emplace(regex, nullptr);
- if (created) {
- icu::UnicodeString const ucs = icu::UnicodeString::fromUTF8(icu::StringPiece(regex));
- UErrorCode status = U_ZERO_ERROR;
- UParseError pe;
- it->second.reset(icu::RegexPattern::compile(ucs, pe, status));
- if (U_FAILURE(status)) {
- // TODO: Provide info?
- return false;
- }
- }
- UErrorCode status = U_ZERO_ERROR;
- icu::UnicodeString const ucs = icu::UnicodeString::fromUTF8(icu::StringPiece(text));
- std::unique_ptr<icu::RegexMatcher> matcher(it->second->matcher(ucs, status));
- if (U_FAILURE(status)) {
- return false;
- }
- return matcher->find(status);
- }
- };
- }
- #endif
|