parser.cxx 3.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142
  1. #include <cassert>
  2. #include <cctype>
  3. #include <charconv>
  4. #include <ios>
  5. #include <iostream>
  6. #include <sstream>
  7. #include <string>
  8. #include <variant>
  9. #include <abnf/forward.h>
  10. #include <abnf/grammar.h>
  11. namespace abnf {
  12. grammar parse(std::istream && in) { return parse(in); }
  13. static void append(rule & rule, rule_part const & part, bool is_one_of) {
  14. if (rule.rules.empty()) {
  15. rule.rules.push_back(is_one_of ? one_of(part) : part);
  16. } else if (not is_one_of) {
  17. rule.rules.push_back(part);
  18. } else if (auto * of = std::get_if<one_of>(&rule.rules.back())) {
  19. of->rules.push_back(part);
  20. } else {
  21. rule.rules.back() = one_of{rule.rules.back(), part};
  22. }
  23. }
  24. static repeated parse_repeated(std::string_view token) {
  25. if (token[0] == '[') { return {.min = 0, .max = 1}; }
  26. if (token[0] == '(') {
  27. // TODO: Can I just inline this when is_one_of is false?
  28. return {.min = 1, .max = 1};
  29. }
  30. repeated rval;
  31. size_t idx = 0;
  32. if (not token.starts_with('*')) {
  33. rval.min = std::stoull(std::string(token), &idx);
  34. token.remove_prefix(idx);
  35. }
  36. if (not token.starts_with('*')) {
  37. rval.max = rval.min;
  38. return rval;
  39. }
  40. token.remove_prefix(1);
  41. if (not token.empty() && std::strchr("123456789", token[0])) {
  42. rval.max = std::stoull(std::string(token), &idx);
  43. token.remove_prefix(idx);
  44. }
  45. return rval;
  46. }
  47. char_range parse_char_range(std::string_view token) {
  48. char_range rval;
  49. token.remove_prefix(2);
  50. char const * const last = token.end();
  51. auto [end, ec] = std::from_chars(token.data(), last, rval.first.value_, 16);
  52. if (*end == '-') {
  53. ec = std::from_chars(end + 1, last, rval.last.value_, 16).ec;
  54. } else {
  55. rval.last = rval.first;
  56. }
  57. return rval;
  58. }
  59. static std::string parse_rule(std::istream & in, std::string const & name,
  60. rule & rule, bool is_one_of = false) {
  61. bool expecting_return = false;
  62. std::string token;
  63. while (in >> token) {
  64. if (std::strchr("])", token[0])) {
  65. return token; // End Sub-Expression
  66. }
  67. static constexpr char const s_repeated_chars[] = "0123456789[(*";
  68. if (std::strchr(s_repeated_chars, token[0])) {
  69. repeated tmp = parse_repeated(token);
  70. if (auto pos = token.find_first_not_of(s_repeated_chars);
  71. pos != std::string::npos) {
  72. std::stringstream ss(token.substr(pos));
  73. parse_rule(ss, name, tmp.rule);
  74. } else {
  75. parse_rule(in, name, tmp.rule);
  76. }
  77. append(rule, tmp, is_one_of);
  78. } else if (token.starts_with("%x")) {
  79. append(rule, parse_char_range(token), is_one_of);
  80. } else if (token.starts_with('"')) {
  81. append(rule, literal{token}, is_one_of);
  82. } else if (token == "/") {
  83. // See Below
  84. } else if (token == ";") {
  85. std::getline(in, token); // Discard the comment
  86. } else if (token.starts_with('<') || std::isalpha(token[0])) {
  87. if (expecting_return) { return token; }
  88. append(rule, reference{token}, is_one_of);
  89. }
  90. expecting_return = false;
  91. is_one_of = (token == "/");
  92. if (std::strchr("\r\n", in.peek())) { expecting_return = true; }
  93. }
  94. return "";
  95. }
  96. static auto parse_impl(std::istream & in) {
  97. std::string first_rule;
  98. grammar_base::rule_store store;
  99. std::string name;
  100. rule rule;
  101. auto push_rule = [&]() {
  102. if (name.empty() || not std::isalpha(name[0])) { return; }
  103. if (first_rule.empty()) { first_rule = name; }
  104. store.insert_or_assign(std::move(name), std::move(rule));
  105. };
  106. bool is_one_of = false;
  107. std::string token;
  108. in >> name;
  109. in >> token; // =
  110. while (not(token = parse_rule(in, name, rule, is_one_of)).empty()) {
  111. if (token != name) { push_rule(); }
  112. name = token;
  113. in >> token; // = OR /=
  114. if ((is_one_of = (token == "/="))) { rule = store[name]; }
  115. }
  116. push_rule();
  117. return std::make_pair(first_rule, std::move(store));
  118. }
  119. grammar parse(std::istream & in) {
  120. auto [name, rules] = parse_impl(in);
  121. return grammar(name, rules.extract(name).mapped(), rules);
  122. }
  123. }