parser.cxx 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164
  1. #include <cassert>
  2. #include <cctype>
  3. #include <charconv>
  4. #include <iostream>
  5. #include <sstream>
  6. #include <stdexcept>
  7. #include <string>
  8. #include <variant>
  9. #include <abnf/forward.h>
  10. #include <abnf/grammar.h>
  11. namespace abnf {
  12. grammar parse(std::istream && in) { return parse(in); }
  13. grammar_group parse_group(std::istream && in) { return parse_group(in); }
  14. static void append(rule & rule, rule_part const & part, bool is_one_of) {
  15. if (rule.rules.empty()) {
  16. rule.rules.push_back(is_one_of ? one_of(part) : part);
  17. } else if (not is_one_of) {
  18. rule.rules.push_back(part);
  19. } else if (auto * of = std::get_if<one_of>(&rule.rules.back())) {
  20. of->rules.push_back(part);
  21. } else {
  22. rule.rules.back() = one_of{rule.rules.back(), part};
  23. }
  24. }
  25. static repeated parse_repeated(std::string & token) {
  26. if (token[0] == '[') {
  27. token.erase(0, 1);
  28. return {.min = 0, .max = 1};
  29. }
  30. if (token[0] == '(') {
  31. // TODO: Can I just inline this when is_one_of is false?
  32. token.erase(0, 1);
  33. return {.min = 1, .max = 1};
  34. }
  35. repeated rval;
  36. size_t idx = 0;
  37. if (not token.starts_with('*')) {
  38. rval.min = std::stoull(token, &idx);
  39. token.erase(0, idx);
  40. }
  41. if (not token.starts_with('*')) {
  42. token.erase(0, token[0] == '(' ? 1 : 0);
  43. rval.max = rval.min;
  44. return rval;
  45. }
  46. token.erase(0, 1);
  47. if (not token.empty() && std::strchr("123456789", token[0])) {
  48. rval.max = std::stoull(token, &idx);
  49. token.erase(0, idx);
  50. }
  51. token.erase(0, token[0] == '(' ? 1 : 0);
  52. return rval;
  53. }
  54. static int base(char c) {
  55. using std::string_literals::operator""s;
  56. static std::unordered_map<char, int> const s_bases{{'x', 16}, {'d', 10}};
  57. auto it = s_bases.find(c);
  58. if (it == s_bases.end()) {
  59. throw std::invalid_argument("Unknown Base for Character Range: '"s + c +
  60. "'");
  61. }
  62. return it->second;
  63. }
  64. char_range parse_char_range(std::string_view token) {
  65. char_range rval;
  66. int const base = abnf::base(token[1]);
  67. token.remove_prefix(2);
  68. char const * const last = token.end();
  69. auto [end, ec] = std::from_chars(token.data(), last, rval.first.value_, base);
  70. if (*end == '-') {
  71. ec = std::from_chars(end + 1, last, rval.last.value_, base).ec;
  72. } else {
  73. rval.last = rval.first;
  74. }
  75. return rval;
  76. }
  77. static std::string parse_rule(std::istream & in, std::string const & name,
  78. rule & rule, bool is_one_of = false) {
  79. bool expecting_return = false;
  80. std::string token;
  81. while (in >> token) {
  82. if (std::strchr("])", token[0])) {
  83. return token; // End Sub-Expression
  84. }
  85. static constexpr char const s_repeated_chars[] = "0123456789[(*";
  86. if (std::strchr(s_repeated_chars, token[0])) {
  87. repeated tmp = parse_repeated(token);
  88. if (not token.empty()) {
  89. std::stringstream ss(token);
  90. parse_rule(ss, name, tmp.rule);
  91. } else {
  92. parse_rule(in, name, tmp.rule);
  93. }
  94. append(rule, tmp, is_one_of);
  95. } else if (token.starts_with("%")) {
  96. append(rule, parse_char_range(token), is_one_of);
  97. } else if (token.starts_with('"')) {
  98. append(rule, literal{token}, is_one_of);
  99. } else if (token == "/") {
  100. // See Below
  101. } else if (token == ";") {
  102. std::getline(in, token); // Discard the comment
  103. } else if (token.starts_with('<') || std::isalpha(token[0])) {
  104. if (expecting_return) { return token; }
  105. append(rule, reference{token}, is_one_of);
  106. }
  107. expecting_return = false;
  108. is_one_of = (token == "/");
  109. if (std::strchr("\r\n", in.peek())) { expecting_return = true; }
  110. }
  111. return "";
  112. }
  113. static auto parse_impl(std::istream & in) {
  114. std::string first_rule;
  115. grammar_base::rule_store store;
  116. std::string name;
  117. rule rule;
  118. auto push_rule = [&]() {
  119. if (name.empty() || not std::isalpha(name[0])) { return; }
  120. if (first_rule.empty()) { first_rule = name; }
  121. store.insert_or_assign(std::move(name), std::move(rule));
  122. };
  123. bool is_one_of = false;
  124. std::string token;
  125. in >> name;
  126. in >> token; // =
  127. while (not(token = parse_rule(in, name, rule, is_one_of)).empty()) {
  128. if (token != name) { push_rule(); }
  129. name = token;
  130. in >> token; // = OR /=
  131. if ((is_one_of = (token == "/=" || token == "=/"))) { rule = store[name]; }
  132. }
  133. push_rule();
  134. return std::make_pair(first_rule, std::move(store));
  135. }
  136. grammar parse(std::istream & in) {
  137. auto [name, rules] = parse_impl(in);
  138. return grammar(name, rules.extract(name).mapped(), rules);
  139. }
  140. grammar_group parse_group(std::istream & in) {
  141. return grammar_group(parse_impl(in).second);
  142. }
  143. }