#include #include #include #include #include #include #include #include #include #include namespace abnf { grammar parse(std::istream && in) { return parse(in); } static void append(rule & rule, rule_part const & part, bool is_one_of) { if (rule.rules.empty()) { rule.rules.push_back(is_one_of ? one_of(part) : part); } else if (not is_one_of) { rule.rules.push_back(part); } else if (auto * of = std::get_if(&rule.rules.back())) { of->rules.push_back(part); } else { rule.rules.back() = one_of{rule.rules.back(), part}; } } static repeated parse_repeated(std::string_view token) { if (token[0] == '[') { return {.min = 0, .max = 1}; } if (token[0] == '(') { // TODO: Can I just inline this when is_one_of is false? return {.min = 1, .max = 1}; } repeated rval; size_t idx = 0; if (not token.starts_with('*')) { rval.min = std::stoull(std::string(token), &idx); token.remove_prefix(idx); } if (not token.starts_with('*')) { rval.max = rval.min; return rval; } token.remove_prefix(1); if (not token.empty() && std::strchr("123456789", token[0])) { rval.max = std::stoull(std::string(token), &idx); token.remove_prefix(idx); } return rval; } char_range parse_char_range(std::string_view token) { char_range rval; token.remove_prefix(2); char const * const last = token.end(); auto [end, ec] = std::from_chars(token.data(), last, rval.first.value_, 16); if (*end == '-') { ec = std::from_chars(end + 1, last, rval.last.value_, 16).ec; } else { rval.last = rval.first; } return rval; } static std::string parse_rule(std::istream & in, std::string const & name, rule & rule, bool is_one_of = false) { bool expecting_return = false; std::string token; while (in >> token) { if (std::strchr("])", token[0])) { return token; // End Sub-Expression } static constexpr char const s_repeated_chars[] = "0123456789[(*"; if (std::strchr(s_repeated_chars, token[0])) { repeated tmp = parse_repeated(token); if (auto pos = token.find_first_not_of(s_repeated_chars); pos != std::string::npos) { std::stringstream ss(token.substr(pos)); parse_rule(ss, name, tmp.rule); } else { parse_rule(in, name, tmp.rule); } append(rule, tmp, is_one_of); } else if (token.starts_with("%x")) { append(rule, parse_char_range(token), is_one_of); } else if (token.starts_with('"')) { append(rule, literal{token}, is_one_of); } else if (token == "/") { // See Below } else if (token == ";") { std::getline(in, token); // Discard the comment } else if (token.starts_with('<') || std::isalpha(token[0])) { if (expecting_return) { return token; } append(rule, reference{token}, is_one_of); } expecting_return = false; is_one_of = (token == "/"); if (std::strchr("\r\n", in.peek())) { expecting_return = true; } } return ""; } static auto parse_impl(std::istream & in) { std::string first_rule; grammar_base::rule_store store; std::string name; rule rule; auto push_rule = [&]() { if (name.empty() || not std::isalpha(name[0])) { return; } if (first_rule.empty()) { first_rule = name; } store.insert_or_assign(std::move(name), std::move(rule)); }; bool is_one_of = false; std::string token; in >> name; in >> token; // = while (not(token = parse_rule(in, name, rule, is_one_of)).empty()) { if (token != name) { push_rule(); } name = token; in >> token; // = OR /= if ((is_one_of = (token == "/="))) { rule = store[name]; } } push_rule(); return std::make_pair(first_rule, std::move(store)); } grammar parse(std::istream & in) { auto [name, rules] = parse_impl(in); return grammar(name, rules.extract(name).mapped(), rules); } }