parser.cxx 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137
  1. #include <cassert>
  2. #include <cctype>
  3. #include <charconv>
  4. #include <iostream>
  5. #include <sstream>
  6. #include <string>
  7. #include <variant>
  8. #include <abnf/forward.h>
  9. #include <abnf/grammar.h>
  10. namespace abnf {
  11. grammar parse(std::istream && in) { return parse(in); }
  12. static void append(rule & rule, rule_part const & part, bool is_one_of) {
  13. if (rule.rules.empty()) {
  14. rule.rules.push_back(is_one_of ? one_of(part) : part);
  15. } else if (not is_one_of) {
  16. rule.rules.push_back(part);
  17. } else if (auto * of = std::get_if<one_of>(&rule.rules.back())) {
  18. of->rules.push_back(part);
  19. } else {
  20. rule.rules.back() = one_of{rule.rules.back(), part};
  21. }
  22. }
  23. static repeated parse_repeated(std::string_view token) {
  24. if (token[0] == '[') { return {.min = 0, .max = 1}; }
  25. if (token[0] == '(') {
  26. // TODO: Can I just inline this when is_one_of is false?
  27. return {.min = 1, .max = 1};
  28. }
  29. repeated rval;
  30. size_t idx = 0;
  31. if (not token.starts_with('*')) {
  32. rval.min = std::stoull(std::string(token), &idx);
  33. token.remove_prefix(idx);
  34. }
  35. if (not token.starts_with('*')) {
  36. rval.max = rval.min;
  37. return rval;
  38. }
  39. token.remove_prefix(1);
  40. if (not token.empty() && std::strchr("123456789", token[0])) {
  41. rval.max = std::stoull(std::string(token), &idx);
  42. token.remove_prefix(idx);
  43. }
  44. return rval;
  45. }
  46. char_range parse_char_range(std::string_view token) {
  47. char_range rval;
  48. token.remove_prefix(2);
  49. char const * const last = token.end();
  50. auto [end, ec] = std::from_chars(token.data(), last, rval.first, 16);
  51. if (*end == '-') {
  52. ec = std::from_chars(end + 1, last, rval.last, 16).ec;
  53. } else {
  54. rval.last = rval.first;
  55. }
  56. return rval;
  57. }
  58. static std::string parse_rule(std::istream & in, std::string const & name,
  59. rule & rule, bool is_one_of = false) {
  60. bool expecting_return = false;
  61. std::string token;
  62. while (in >> token) {
  63. if (std::strchr("])", token[0])) {
  64. return token; // End Sub-Expression
  65. }
  66. static constexpr char const s_repeated_chars[] = "0123456789[(*";
  67. if (std::strchr(s_repeated_chars, token[0])) {
  68. repeated tmp = parse_repeated(token);
  69. if (auto pos = token.find_first_not_of(s_repeated_chars);
  70. pos != std::string::npos) {
  71. std::stringstream ss(token.substr(pos));
  72. parse_rule(ss, name, tmp.rule);
  73. } else {
  74. parse_rule(in, name, tmp.rule);
  75. }
  76. append(rule, tmp, is_one_of);
  77. } else if (token.starts_with("%x")) {
  78. append(rule, parse_char_range(token), is_one_of);
  79. } else if (token.starts_with('"')) {
  80. append(rule, literal{token}, is_one_of);
  81. } else if (token == "/") {
  82. // See Below
  83. } else if (token == ";") {
  84. std::getline(in, token); // Discard the comment
  85. } else if (token.starts_with('<') || std::isalpha(token[0])) {
  86. if (expecting_return) { return token; }
  87. append(rule, reference{token}, is_one_of);
  88. }
  89. expecting_return = false;
  90. is_one_of = (token == "/");
  91. if (std::strchr("\r\n", in.peek())) { expecting_return = true; }
  92. }
  93. return "";
  94. }
  95. grammar parse(std::istream & in) {
  96. grammar rval;
  97. std::string name;
  98. rule rule;
  99. auto push_rule = [&rval, &name, &rule]() {
  100. if (name.empty() || not std::isalpha(name[0])) {
  101. // PASS
  102. } else if (rval.name_.empty()) {
  103. rval.name_ = std::move(name);
  104. rval.base_rule_ = std::move(rule);
  105. } else {
  106. rval.rules_.insert_or_assign(std::move(name), std::move(rule));
  107. }
  108. };
  109. bool one_of = false;
  110. std::string token;
  111. in >> name;
  112. in >> token; // =
  113. while (not(token = parse_rule(in, name, rule, one_of)).empty()) {
  114. if (token != name) { push_rule(); }
  115. name = token;
  116. in >> token; // = OR /=
  117. if ((one_of = (token == "/="))) { rule = rval.rules_[name]; }
  118. }
  119. push_rule();
  120. return rval;
  121. }
  122. }