regex.h 2.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293
  1. #pragma once
  2. #include <regex>
  3. #include <unordered_map>
  4. #include <jvalidate/_macro.h>
  5. #if JVALIDATE_HAS_ICU
  6. #include <unicode/regex.h>
  7. #include <unicode/ustring.h>
  8. #include <unicode/utypes.h>
  9. #endif
  10. namespace jvalidate {
  11. /**
  12. * @brief An implementation of a regular expression "engine", for use with
  13. * constraints like "pattern" and "patternProperties".
  14. * Uses std::regex as its underlying implementation.
  15. *
  16. * While being std::regex means that it is the most sensible choice for a
  17. * default RegexEngine, the performance of std::regex is generally the worst
  18. * among C++ regex utilities, and it struggles to compile several patterns.
  19. * See https://stackoverflow.com/questions/70583395/ for an explaination.
  20. *
  21. * If you need to use complicated patterns in your json schema, provide a
  22. * RegexEngine compatible wrapper for a different library, such as re2.
  23. */
  24. class StdRegexEngine {
  25. private:
  26. std::unordered_map<std::string, std::regex> cache_;
  27. public:
  28. static std::string_view engine_name() { return "std::regex[ECMAScript]"; }
  29. static bool is_regex(std::string_view regex) {
  30. try {
  31. [[maybe_unused]] std::regex _{std::string(regex)};
  32. return true;
  33. } catch (std::exception const &) { return false; }
  34. }
  35. bool search(std::string const & regex, std::string const & text) {
  36. auto const & re = cache_.try_emplace(regex, regex).first->second;
  37. return std::regex_search(text, re);
  38. }
  39. };
  40. }
  41. #if JVALIDATE_HAS_ICU
  42. namespace jvalidate {
  43. class ICURegexEngine {
  44. private:
  45. std::unordered_map<std::string, std::unique_ptr<icu::RegexPattern>> cache_;
  46. public:
  47. static std::string_view engine_name() { return "icu::RegexPattern"; }
  48. static bool is_regex(std::string_view regex) {
  49. icu::UnicodeString const ucs = icu::UnicodeString::fromUTF8(icu::StringPiece(regex));
  50. UErrorCode status = U_ZERO_ERROR;
  51. UParseError pe;
  52. std::unique_ptr<icu::RegexPattern> tmp(icu::RegexPattern::compile(ucs, pe, status));
  53. return not U_FAILURE(status);
  54. }
  55. bool search(std::string const & regex, std::string const & text) {
  56. auto [it, created] = cache_.try_emplace(regex, nullptr);
  57. if (created) {
  58. icu::UnicodeString const ucs = icu::UnicodeString::fromUTF8(icu::StringPiece(regex));
  59. UErrorCode status = U_ZERO_ERROR;
  60. UParseError pe;
  61. it->second.reset(icu::RegexPattern::compile(ucs, pe, status));
  62. if (U_FAILURE(status)) {
  63. // TODO: Provide info?
  64. return false;
  65. }
  66. }
  67. UErrorCode status = U_ZERO_ERROR;
  68. icu::UnicodeString const ucs = icu::UnicodeString::fromUTF8(icu::StringPiece(text));
  69. std::unique_ptr<icu::RegexMatcher> matcher(it->second->matcher(ucs, status));
  70. if (U_FAILURE(status)) {
  71. return false;
  72. }
  73. return matcher->find(status);
  74. }
  75. };
  76. }
  77. #endif