|
@@ -0,0 +1,137 @@
|
|
|
|
|
+#include <cassert>
|
|
|
|
|
+#include <cctype>
|
|
|
|
|
+#include <charconv>
|
|
|
|
|
+#include <iostream>
|
|
|
|
|
+#include <sstream>
|
|
|
|
|
+#include <string>
|
|
|
|
|
+#include <variant>
|
|
|
|
|
+
|
|
|
|
|
+#include <abnf/forward.h>
|
|
|
|
|
+#include <abnf/grammar.h>
|
|
|
|
|
+
|
|
|
|
|
+namespace abnf {
|
|
|
|
|
+grammar parse(std::istream && in) { return parse(in); }
|
|
|
|
|
+
|
|
|
|
|
+static void append(rule & rule, rule_part const & part, bool is_one_of) {
|
|
|
|
|
+ if (rule.rules.empty()) {
|
|
|
|
|
+ rule.rules.push_back(is_one_of ? one_of(part) : part);
|
|
|
|
|
+ } else if (not is_one_of) {
|
|
|
|
|
+ rule.rules.push_back(part);
|
|
|
|
|
+ } else if (auto * of = std::get_if<one_of>(&rule.rules.back())) {
|
|
|
|
|
+ of->rules.push_back(part);
|
|
|
|
|
+ } else {
|
|
|
|
|
+ rule.rules.back() = one_of{rule.rules.back(), part};
|
|
|
|
|
+ }
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+static repeated parse_repeated(std::string_view token) {
|
|
|
|
|
+ if (token[0] == '[') { return {.min = 0, .max = 1}; }
|
|
|
|
|
+ if (token[0] == '(') {
|
|
|
|
|
+ // TODO: Can I just inline this when is_one_of is false?
|
|
|
|
|
+ return {.min = 1, .max = 1};
|
|
|
|
|
+ }
|
|
|
|
|
+ repeated rval;
|
|
|
|
|
+ size_t idx = 0;
|
|
|
|
|
+ if (not token.starts_with('*')) {
|
|
|
|
|
+ rval.min = std::stoull(std::string(token), &idx);
|
|
|
|
|
+ token.remove_prefix(idx);
|
|
|
|
|
+ }
|
|
|
|
|
+ if (not token.starts_with('*')) {
|
|
|
|
|
+ rval.max = rval.min;
|
|
|
|
|
+ return rval;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ token.remove_prefix(1);
|
|
|
|
|
+ if (not token.empty() && std::strchr("123456789", token[0])) {
|
|
|
|
|
+ rval.max = std::stoull(std::string(token), &idx);
|
|
|
|
|
+ token.remove_prefix(idx);
|
|
|
|
|
+ }
|
|
|
|
|
+ return rval;
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+char_range parse_char_range(std::string_view token) {
|
|
|
|
|
+ char_range rval;
|
|
|
|
|
+ token.remove_prefix(2);
|
|
|
|
|
+ char const * const last = token.end();
|
|
|
|
|
+ auto [end, ec] = std::from_chars(token.data(), last, rval.first, 16);
|
|
|
|
|
+ if (*end == '-') {
|
|
|
|
|
+ ec = std::from_chars(end + 1, last, rval.last, 16).ec;
|
|
|
|
|
+ } else {
|
|
|
|
|
+ rval.last = rval.first;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ return rval;
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+static std::string parse_rule(std::istream & in, std::string const & name,
|
|
|
|
|
+ rule & rule, bool is_one_of = false) {
|
|
|
|
|
+ bool expecting_return = false;
|
|
|
|
|
+ std::string token;
|
|
|
|
|
+
|
|
|
|
|
+ while (in >> token) {
|
|
|
|
|
+ if (std::strchr("])", token[0])) {
|
|
|
|
|
+ return token; // End Sub-Expression
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ static constexpr char const s_repeated_chars[] = "0123456789[(*";
|
|
|
|
|
+ if (std::strchr(s_repeated_chars, token[0])) {
|
|
|
|
|
+ repeated tmp = parse_repeated(token);
|
|
|
|
|
+ if (auto pos = token.find_first_not_of(s_repeated_chars);
|
|
|
|
|
+ pos != std::string::npos) {
|
|
|
|
|
+ std::stringstream ss(token.substr(pos));
|
|
|
|
|
+ parse_rule(ss, name, tmp.rule);
|
|
|
|
|
+ } else {
|
|
|
|
|
+ parse_rule(in, name, tmp.rule);
|
|
|
|
|
+ }
|
|
|
|
|
+ append(rule, tmp, is_one_of);
|
|
|
|
|
+ } else if (token.starts_with("%x")) {
|
|
|
|
|
+ append(rule, parse_char_range(token), is_one_of);
|
|
|
|
|
+ } else if (token.starts_with('"')) {
|
|
|
|
|
+ append(rule, literal{token}, is_one_of);
|
|
|
|
|
+ } else if (token == "/") {
|
|
|
|
|
+ // See Below
|
|
|
|
|
+ } else if (token == ";") {
|
|
|
|
|
+ std::getline(in, token); // Discard the comment
|
|
|
|
|
+ } else if (token.starts_with('<') || std::isalpha(token[0])) {
|
|
|
|
|
+ if (expecting_return) { return token; }
|
|
|
|
|
+ append(rule, reference{token}, is_one_of);
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ expecting_return = false;
|
|
|
|
|
+ is_one_of = (token == "/");
|
|
|
|
|
+ if (std::strchr("\r\n", in.peek())) { expecting_return = true; }
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ return "";
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+grammar parse(std::istream & in) {
|
|
|
|
|
+ grammar rval;
|
|
|
|
|
+ std::string name;
|
|
|
|
|
+ rule rule;
|
|
|
|
|
+
|
|
|
|
|
+ auto push_rule = [&rval, &name, &rule]() {
|
|
|
|
|
+ if (name.empty() || not std::isalpha(name[0])) {
|
|
|
|
|
+ // PASS
|
|
|
|
|
+ } else if (rval.name_.empty()) {
|
|
|
|
|
+ rval.name_ = std::move(name);
|
|
|
|
|
+ rval.base_rule_ = std::move(rule);
|
|
|
|
|
+ } else {
|
|
|
|
|
+ rval.rules_.insert_or_assign(std::move(name), std::move(rule));
|
|
|
|
|
+ }
|
|
|
|
|
+ };
|
|
|
|
|
+
|
|
|
|
|
+ bool one_of = false;
|
|
|
|
|
+ std::string token;
|
|
|
|
|
+ in >> name;
|
|
|
|
|
+ in >> token; // =
|
|
|
|
|
+ while (not(token = parse_rule(in, name, rule, one_of)).empty()) {
|
|
|
|
|
+ if (token != name) { push_rule(); }
|
|
|
|
|
+ name = token;
|
|
|
|
|
+ in >> token; // = OR /=
|
|
|
|
|
+ if ((one_of = (token == "/="))) { rule = rval.rules_[name]; }
|
|
|
|
|
+ }
|
|
|
|
|
+ push_rule();
|
|
|
|
|
+ return rval;
|
|
|
|
|
+}
|
|
|
|
|
+}
|