| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142 |
- #include <cassert>
- #include <cctype>
- #include <charconv>
- #include <ios>
- #include <iostream>
- #include <sstream>
- #include <string>
- #include <variant>
- #include <abnf/forward.h>
- #include <abnf/grammar.h>
- namespace abnf {
- grammar parse(std::istream && in) { return parse(in); }
- static void append(rule & rule, rule_part const & part, bool is_one_of) {
- if (rule.rules.empty()) {
- rule.rules.push_back(is_one_of ? one_of(part) : part);
- } else if (not is_one_of) {
- rule.rules.push_back(part);
- } else if (auto * of = std::get_if<one_of>(&rule.rules.back())) {
- of->rules.push_back(part);
- } else {
- rule.rules.back() = one_of{rule.rules.back(), part};
- }
- }
- static repeated parse_repeated(std::string_view token) {
- if (token[0] == '[') { return {.min = 0, .max = 1}; }
- if (token[0] == '(') {
- // TODO: Can I just inline this when is_one_of is false?
- return {.min = 1, .max = 1};
- }
- repeated rval;
- size_t idx = 0;
- if (not token.starts_with('*')) {
- rval.min = std::stoull(std::string(token), &idx);
- token.remove_prefix(idx);
- }
- if (not token.starts_with('*')) {
- rval.max = rval.min;
- return rval;
- }
- token.remove_prefix(1);
- if (not token.empty() && std::strchr("123456789", token[0])) {
- rval.max = std::stoull(std::string(token), &idx);
- token.remove_prefix(idx);
- }
- return rval;
- }
- char_range parse_char_range(std::string_view token) {
- char_range rval;
- token.remove_prefix(2);
- char const * const last = token.end();
- auto [end, ec] = std::from_chars(token.data(), last, rval.first.value_, 16);
- if (*end == '-') {
- ec = std::from_chars(end + 1, last, rval.last.value_, 16).ec;
- } else {
- rval.last = rval.first;
- }
- return rval;
- }
- static std::string parse_rule(std::istream & in, std::string const & name,
- rule & rule, bool is_one_of = false) {
- bool expecting_return = false;
- std::string token;
- while (in >> token) {
- if (std::strchr("])", token[0])) {
- return token; // End Sub-Expression
- }
- static constexpr char const s_repeated_chars[] = "0123456789[(*";
- if (std::strchr(s_repeated_chars, token[0])) {
- repeated tmp = parse_repeated(token);
- if (auto pos = token.find_first_not_of(s_repeated_chars);
- pos != std::string::npos) {
- std::stringstream ss(token.substr(pos));
- parse_rule(ss, name, tmp.rule);
- } else {
- parse_rule(in, name, tmp.rule);
- }
- append(rule, tmp, is_one_of);
- } else if (token.starts_with("%x")) {
- append(rule, parse_char_range(token), is_one_of);
- } else if (token.starts_with('"')) {
- append(rule, literal{token}, is_one_of);
- } else if (token == "/") {
- // See Below
- } else if (token == ";") {
- std::getline(in, token); // Discard the comment
- } else if (token.starts_with('<') || std::isalpha(token[0])) {
- if (expecting_return) { return token; }
- append(rule, reference{token}, is_one_of);
- }
- expecting_return = false;
- is_one_of = (token == "/");
- if (std::strchr("\r\n", in.peek())) { expecting_return = true; }
- }
- return "";
- }
- static auto parse_impl(std::istream & in) {
- std::string first_rule;
- grammar_base::rule_store store;
- std::string name;
- rule rule;
- auto push_rule = [&]() {
- if (name.empty() || not std::isalpha(name[0])) { return; }
- if (first_rule.empty()) { first_rule = name; }
- store.insert_or_assign(std::move(name), std::move(rule));
- };
- bool is_one_of = false;
- std::string token;
- in >> name;
- in >> token; // =
- while (not(token = parse_rule(in, name, rule, is_one_of)).empty()) {
- if (token != name) { push_rule(); }
- name = token;
- in >> token; // = OR /=
- if ((is_one_of = (token == "/="))) { rule = store[name]; }
- }
- push_rule();
- return std::make_pair(first_rule, std::move(store));
- }
- grammar parse(std::istream & in) {
- auto [name, rules] = parse_impl(in);
- return grammar(name, rules.extract(name).mapped(), rules);
- }
- }
|