|
|
@@ -24,6 +24,11 @@ namespace jvalidate {
|
|
|
*
|
|
|
* If you need to use complicated patterns in your json schema, provide a
|
|
|
* RegexEngine compatible wrapper for a different library, such as re2.
|
|
|
+ * std::regex does not support graphemes, meaning that multi-byte characters
|
|
|
+ * will need to wrapped in groups if you want to repeat them.
|
|
|
+ *
|
|
|
+ * Regular expressions are compiled using the default ECMAScript flags, which
|
|
|
+ * is almost, but not quite, compliant with the ECMA-262 standard.
|
|
|
*/
|
|
|
class StdRegexEngine {
|
|
|
private:
|
|
|
@@ -48,6 +53,26 @@ public:
|
|
|
|
|
|
#if JVALIDATE_HAS_ICU
|
|
|
namespace jvalidate {
|
|
|
+/**
|
|
|
+ * @brief An implementation of a regular expression "engine", for use with
|
|
|
+ * constraints like "pattern" and "patternProperties".
|
|
|
+ * Uses the "International Components for Unicode" (icu4c) library for its
|
|
|
+ * underlying implementation.
|
|
|
+ *
|
|
|
+ * These regular expressions operate on the level of graphemes, rather than
|
|
|
+ * characters. This means that multi-byte characters like emojis will be
|
|
|
+ * treated as singular characters for the purpose of "character sets" and
|
|
|
+ * repetition operators.
|
|
|
+ *
|
|
|
+ * This regex engine is not ECMA-262 compliant, which means that certain cases
|
|
|
+ * will not be recognized. This is a notice rather than a true issue, since
|
|
|
+ * many other languages' regex libraries (e.g. Python) are also not ECMA-262
|
|
|
+ * compliant.
|
|
|
+ *
|
|
|
+ * This means that we pass test cases that ECMAScript rejects, such as:
|
|
|
+ * - i18n digit characters are captured by \\d
|
|
|
+ * - i18n characters can be matched by \\w (if they are i18nword chars)
|
|
|
+ */
|
|
|
class ICURegexEngine {
|
|
|
private:
|
|
|
std::unordered_map<std::string, std::unique_ptr<icu::RegexPattern>> cache_;
|