#pragma once

#include <regex>
#include <unordered_map>

#include <jvalidate/_macro.h>

#if JVALIDATE_HAS_ICU
#include <unicode/regex.h>
#include <unicode/ustring.h>
#include <unicode/utypes.h>
#endif

namespace jvalidate {
/**
 * @brief An implementation of a regular expression "engine", for use with
 * constraints like "pattern" and "patternProperties".
 * Uses std::regex as its underlying implementation.
 *
 * While being std::regex means that it is the most sensible choice for a
 * default RegexEngine, the performance of std::regex is generally the worst
 * among C++ regex utilities, and it struggles to compile several patterns.
 * See https://stackoverflow.com/questions/70583395/ for an explaination.
 *
 * If you need to use complicated patterns in your json schema, provide a
 * RegexEngine compatible wrapper for a different library, such as re2.
 * std::regex does not support graphemes, meaning that multi-byte characters
 * will need to wrapped in groups if you want to repeat them.
 *
 * Regular expressions are compiled using the default ECMAScript flags, which
 * is almost, but not quite, compliant with the ECMA-262 standard.
 */
class StdRegexEngine {
private:
  std::unordered_map<std::string, std::regex> cache_;

public:
  static std::string_view engine_name() { return "std::regex[ECMAScript]"; }

  static bool is_regex(std::string_view regex) try {
    return (std::regex(std::string(regex)), true);
  } catch (std::exception const &) { return false; }

  bool search(std::string const & regex, std::string const & text) try {
    std::regex const & re = cache_.try_emplace(regex, regex).first->second;
    return std::regex_search(text, re);
  } catch (std::exception const &) { return false; }
};
}

#if JVALIDATE_HAS_ICU
namespace jvalidate {
/**
 * @brief An implementation of a regular expression "engine", for use with
 * constraints like "pattern" and "patternProperties".
 * Uses the "International Components for Unicode" (icu4c) library for its
 * underlying implementation.
 *
 * These regular expressions operate on the level of graphemes, rather than
 * characters. This means that multi-byte characters like emojis will be
 * treated as singular characters for the purpose of "character sets" and
 * repetition operators.
 *
 * This regex engine is not ECMA-262 compliant, which means that certain cases
 * will not be recognized. This is a notice rather than a true issue, since
 * many other languages' regex libraries (e.g. Python) are also not ECMA-262
 * compliant.
 *
 * This means that we pass test cases that ECMAScript rejects, such as:
 * - i18n digit characters are captured by \\d
 * - i18n characters can be matched by \\w (if they are i18nword chars)
 */
class ICURegexEngine {
private:
  std::unordered_map<std::string, std::unique_ptr<icu::RegexPattern>> cache_;

public:
  static std::string_view engine_name() { return "icu::RegexPattern"; }

  static bool is_regex(std::string_view regex) {
    icu::UnicodeString const ucs = icu::UnicodeString::fromUTF8(icu::StringPiece(regex));

    UErrorCode status = U_ZERO_ERROR;
    UParseError pe;
    std::unique_ptr<icu::RegexPattern> tmp(icu::RegexPattern::compile(ucs, pe, status));

    return not U_FAILURE(status);
  }

  bool search(std::string const & regex, std::string const & text) {
    auto [it, created] = cache_.try_emplace(regex, nullptr);
    if (created) {
      icu::UnicodeString const ucs = icu::UnicodeString::fromUTF8(icu::StringPiece(regex));

      UErrorCode status = U_ZERO_ERROR;
      UParseError pe;
      it->second.reset(icu::RegexPattern::compile(ucs, pe, status));

      if (U_FAILURE(status)) {
        // TODO: Provide info?
        return false;
      }
    }

    if (it->second == nullptr) {
      return false; // Regex was invalid - and we cached that
    }

    UErrorCode status = U_ZERO_ERROR;
    icu::UnicodeString const ucs = icu::UnicodeString::fromUTF8(icu::StringPiece(text));
    std::unique_ptr<icu::RegexMatcher> matcher(it->second->matcher(ucs, status));

    JVALIDATE_RETURN_IF(U_FAILURE(status), false); // Doesn't appear possilbe
    return matcher->find(status);
  }
};
}
#endif