regex.h 2.6 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889
  1. #pragma once
  2. #include <regex>
  3. #include <unordered_map>
  4. #include <jvalidate/_macro.h>
  5. #if JVALIDATE_HAS_ICU
  6. #include <unicode/regex.h>
  7. #include <unicode/ustring.h>
  8. #include <unicode/utypes.h>
  9. #endif
  10. namespace jvalidate {
  11. /**
  12. * @brief An implementation of a regular expression "engine", for use with
  13. * constraints like "pattern" and "patternProperties".
  14. * Uses std::regex as its underlying implementation.
  15. *
  16. * While being std::regex means that it is the most sensible choice for a
  17. * default RegexEngine, the performance of std::regex is generally the worst
  18. * among C++ regex utilities, and it struggles to compile several patterns.
  19. * See https://stackoverflow.com/questions/70583395/ for an explaination.
  20. *
  21. * If you need to use complicated patterns in your json schema, provide a
  22. * RegexEngine compatible wrapper for a different library, such as re2.
  23. */
  24. class StdRegexEngine {
  25. private:
  26. std::unordered_map<std::string, std::regex> cache_;
  27. public:
  28. static bool is_regex(std::string_view regex) {
  29. try {
  30. [[maybe_unused]] std::regex _{std::string(regex)};
  31. return true;
  32. } catch (std::exception const &) { return false; }
  33. }
  34. bool search(std::string const & regex, std::string const & text) {
  35. auto const & re = cache_.try_emplace(regex, regex).first->second;
  36. return std::regex_search(text, re);
  37. }
  38. };
  39. }
  40. #if JVALIDATE_HAS_ICU
  41. namespace jvalidate {
  42. class ICURegexEngine {
  43. private:
  44. std::unordered_map<std::string, std::unique_ptr<icu::RegexPattern>> cache_;
  45. public:
  46. static bool is_regex(std::string_view regex) {
  47. icu::UnicodeString const ucs = icu::UnicodeString::fromUTF8(icu::StringPiece(regex));
  48. UErrorCode status = U_ZERO_ERROR;
  49. UParseError pe;
  50. std::unique_ptr<icu::RegexPattern> tmp(icu::RegexPattern::compile(ucs, pe, status));
  51. return not U_FAILURE(status);
  52. }
  53. bool search(std::string const & regex, std::string const & text) {
  54. auto [it, created] = cache_.try_emplace(regex, nullptr);
  55. if (created) {
  56. icu::UnicodeString const ucs = icu::UnicodeString::fromUTF8(icu::StringPiece(regex));
  57. UErrorCode status = U_ZERO_ERROR;
  58. UParseError pe;
  59. it->second.reset(icu::RegexPattern::compile(ucs, pe, status));
  60. if (U_FAILURE(status)) {
  61. // TODO: Provide info?
  62. return false;
  63. }
  64. }
  65. UErrorCode status = U_ZERO_ERROR;
  66. icu::UnicodeString const ucs = icu::UnicodeString::fromUTF8(icu::StringPiece(text));
  67. std::unique_ptr<icu::RegexMatcher> matcher(it->second->matcher(ucs, status));
  68. if (U_FAILURE(status)) {
  69. return false;
  70. }
  71. return matcher->find(status);
  72. }
  73. };
  74. }
  75. #endif