7 mēneši atpakaļ · 5bea563a43
--- a/include/jvalidate/detail/string.h
+++ b/include/jvalidate/detail/string.h
@@ -32,64 +32,6 @@ inline size_t length(std::string_view arg) {
 
															   return ucs.countChar32();
														
 
															 }
														
 
															-/**
														
 
															- * @brief Ensures that any codepoints/graphemes in the given regular expression
														
 
															- * are wrapped in parenthesis in order to ensure that e.g. <PIRATE-EMOJI>*
														
 
															- * properly matches the entire emoji multiple times, instead of just the last
														
 
															- * byte of the string.
														
 
															- *
														
 
															- * Because we are only performing a regex search, and not matching/capturing
														
 
															- * groups - we don't care that all of these extra parenthesis cause us to
														
 
															- * generate new capture-groups or push some of the groups to a later point.
														
 
															- *
														
 
															- * @param arg A regular expression string, to be sanitized for UTF8 pattern-
														
 
															- * matching.
														
 
															- *
														
 
															- * @returns The regular expression, with some more parenthesis added.
														
 
															- */
														
 
															-inline std::string regex_escape(std::string_view arg) {
														
 
															-  icu::UnicodeString const ucs = icu::UnicodeString::fromUTF8(icu::StringPiece(arg));
														
 
															-  // Short-circuit if there are no multi-byte codepoints or graphemes, since
														
 
															-  // C++ regexes don't have any problems with those.
														
 
															-  if (ucs.countChar32() == arg.size()) {
														
 
															-    return std::string(arg);
														
 
															-  }
														
 
															-
														
 
															-  UErrorCode status = U_ZERO_ERROR;
														
 
															-  // createCharacterInstance directly uses new - without any special allocation
														
 
															-  // rules or cleanup, since the first argument is NULL.
														
 
															-  std::unique_ptr<icu::BreakIterator> iter(
														
 
															-      icu::BreakIterator::createCharacterInstance(NULL, status));
														
 
															-
														
 
															-  // This should never occur - unless there's like an alloc error
														
 
															-  if (U_FAILURE(status)) {
														
 
															-    return std::string(arg);
														
 
															-  }
														
 
															-
														
 
															-  icu::UnicodeString rval;
														
 
															-  iter->setText(ucs);
														
 
															-  int32_t start = iter->first();
														
 
															-  int32_t end = iter->next();
														
 
															-  while (end != icu::BreakIterator::DONE) {
														
 
															-    // 0-or-1, 1-or-more, 0-or-more markings
														
 
															-    // This could be optimized to only operate when on a multibyte character
														
 
															-    if (std::strchr("?*+", ucs.charAt(end))) {
														
 
															-      rval.append('(');
														
 
															-      rval.append(ucs, start, end - start);
														
 
															-      rval.append(')');
														
 
															-      rval.append(ucs.char32At(end));
														
 
															-      end = iter->next();
														
 
															-    } else {
														
 
															-      rval.append(ucs, start, end - start);
														
 
															-    }
														
 
															-    start = end;
														
 
															-    end = iter->next();
														
 
															-  }
														
 
															-
														
 
															-  std::string out;
														
 
															-  return rval.toUTF8String(out);
														
 
															-}
														
 
															-
														
 
															 inline std::string_view to_u8(std::string_view arg) { return arg; }
														
 
															 inline std::string to_u8(std::u32string_view arg) {
														
--- a/include/jvalidate/validator.h
+++ b/include/jvalidate/validator.h
@@ -35,7 +35,6 @@ public:
 
															   }
														
 
															   bool search(std::string const & regex, std::string const & text) {
														
 
															-    // TODO: detail::regex_escape
														
 
															     auto const & re = cache_.try_emplace(regex, regex).first->second;
														
 
															     return std::regex_search(text, re);
														
 
															   }