2

I have the following three rules:

unary_expression =
    ( '(' > expression > ')' )
    | int_;

operator_expression =
    unary_expression >> *(operators > expression);

expression =
    ( '(' > expression > ')' )
    | operator_expression;

Obviously this is recursive, so I use boost::recursive_wrapper and created the following AST:

struct expression;
using unary_expression_node = boost::variant<boost::recursive_wrapper<expression>, int>;

struct unary_expression
{
  unary_expression_node m_unary_expression;
};

enum operators { op_eq, op_ne };
struct expression;
struct operator_expression
{
  unary_expression first;
  using second_type = std::vector<std::pair<operators, expression>>;
  second_type second;
};

using expression_node = 
boost::variant<boost::recursive_wrapper<expression>, operator_expression>;

struct expression
{
  expression_node m_expression;
};

This compiles (see full example below), but when the code attempts to construct an expression object the constructor gets into an infinite loop of calling these three constructors:

#11 0x0000000000466066 in ast::expression::expression ...
#12 0x00000000004682e0 in boost::recursive_wrapper<ast::expression>::recursive_wrapper ...
#13 0x000000000046718d in boost::variant<boost::recursive_wrapper<ast::expression>, ast::operator_expression>::variant
...

Thus, Creating an expression creates a boost::variant<boost::recursive_wrapper<ast::expression>, ast::operator_expression> (aka, an expression_node) which creates a boost::recursive_wrapper<ast::expression> which creates an expression which creates... and so on.

How can I solve this?

Here is a full example that compiles, but segfaults when the stack runs full:

#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/fusion/include/std_pair.hpp>
#include <iostream>
#include <string>
#include <vector>

namespace ast {

struct expression;
using unary_expression_node = boost::variant<boost::recursive_wrapper<expression>, int>;

struct unary_expression
{
  unary_expression_node m_unary_expression;
};

enum operators { op_eq, op_ne };
struct expression;
struct operator_expression
{
  unary_expression first;
  using second_type = std::vector<std::pair<operators, expression>>;
  second_type second;
};

using expression_node = boost::variant<boost::recursive_wrapper<expression>, operator_expression>;

struct expression
{
  expression_node m_expression;
};

std::ostream& operator<<(std::ostream& os, expression const& expression)
{
  return os << expression.m_expression;
}

std::ostream& operator<<(std::ostream& os, unary_expression const& unary_expression)
{
  return os << unary_expression.m_unary_expression;
}

std::ostream& operator<<(std::ostream& os, operator_expression const& operator_expression)
{
  os << operator_expression.first;
  for (auto& l : operator_expression.second)
  {
    os << ' ' << l.first << ' ' << l.second;
  }
  return os;
}

} // namespace ast

BOOST_FUSION_ADAPT_STRUCT(
  ast::expression,
  (ast::expression_node, m_expression)
)

BOOST_FUSION_ADAPT_STRUCT(
  ast::unary_expression,
  (ast::unary_expression_node, m_unary_expression)
)

BOOST_FUSION_ADAPT_STRUCT(
  ast::operator_expression,
  (ast::unary_expression, first),
  (ast::operator_expression::second_type, second)
)

namespace client
{

namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;

template <typename Iterator>
class expression_grammar : public qi::grammar<Iterator, ast::expression(), qi::space_type>
{
 private:
  qi::symbols<char, ast::operators> operators;
  qi::rule<Iterator, ast::unary_expression(), qi::space_type> unary_expression;
  qi::rule<Iterator, ast::operator_expression(), qi::space_type> operator_expression;
  qi::rule<Iterator, ast::expression(), qi::space_type> expression;

 public:
  expression_grammar() : expression_grammar::base_type(expression, "expression_grammar")
  {
    using qi::double_;
    using qi::char_;
    using qi::int_;

    operators.add
        ("==", ast::op_eq)
        ("!=", ast::op_ne)
    ;

    unary_expression =
        ( '(' > expression > ')' )
        | int_;

    operator_expression =
        unary_expression >> *(operators > expression);

    expression =
        ( '(' > expression > ')' )
        | operator_expression;
  }
};

} // namespace client

int main()
{
  std::string const input{"1 == 1 != 0"};
  using iterator_type = std::string::const_iterator;
  using expression_grammar = client::expression_grammar<iterator_type>;
  namespace qi = boost::spirit::qi;

  expression_grammar program;
  iterator_type iter{input.begin()};
  iterator_type const end{input.end()};
  ast::expression out;
  bool r = qi::phrase_parse(iter, end, program, qi::space, out);

  if (!r || iter != end)
  {
    std::cerr << "Parsing failed." << std::endl;
    return 1;
  }
  std::cout << "Parsed: " << out << std::endl;
}

EDIT:

I tried simplifying things to just two rules (and two 'ast's):

struct expression;
using unary_expression = boost::variant<boost::recursive_wrapper<expression>, int>;

enum operators { op_eq, op_ne };
struct expression
{
  unary_expression first;
  using second_type = std::vector<std::pair<operators, expression>>;
  second_type second;
};

BOOST_FUSION_ADAPT_STRUCT(
  ast::expression,
  (ast::unary_expression, first),
  (ast::expression::second_type, second)
)

[...]

  unary_expression =
    ( '(' > expression > ')' )
    | int_;

  expression =
    unary_expression >> *(operators > expression);

but also this result in an infinite loop.

#18 0x00000000004646f2 in ast::expression::expression
#19 0x00000000004669ac in boost::recursive_wrapper<ast::expression>::recursive_wrapper
#20 0x0000000000465821 in boost::variant<boost::recursive_wrapper<ast::expression>, int>::variant
...
sehe
  • 374,641
  • 47
  • 450
  • 633
Carlo Wood
  • 5,648
  • 2
  • 35
  • 47

1 Answers1

1

Variants default-construct to their first element type.

This indeed directly leads to an infinite loop. (Demo)

The way to solve it is to make the default variant element not re-entrant or to make it lazily constructed. In this case, you can simply re-arrange to make int the first element.

Better yet, there doesn't seem to be a need to reflect the operator precedence hieararchy (as it is expressed in the rules) in the resultant tree, so why not simplify to:

struct unary_expression;
struct binary_expression;

enum operators { op_eq, op_ne };

using expression = boost::variant<
    int, 
    boost::recursive_wrapper<unary_expression>,
    boost::recursive_wrapper<binary_expression>
>;

struct unary_expression {
    expression expr;
};

struct binary_expression {
    expression                                    first;
    std::vector<std::pair<operators, expression>> other;
};

This no longer crashes and seems a bit simpler in adaptation and usage.

Simplified Full Demo

This full demo uses that AST, but adds a true unary expression. A few style things have been fixed:

  • don't expose the skipper unless you intend for the caller to change it
  • make the parser const
  • show unparsed trailing data (or instead assert >> qi::eoi)

Note: I might have changed the precedence rules (specifically, associativity of binary operators). I'm not sure which version you require.

Live On Coliru

//#define BOOST_SPIRIT_DEBUG
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/fusion/include/std_pair.hpp>
#include <iostream>
#include <string>
#include <vector>

namespace ast {
    struct unary_expression;
    struct binary_expression;

    enum operators { op_eq, op_ne };

    using expression = boost::variant<
        int, 
        boost::recursive_wrapper<unary_expression>,
        boost::recursive_wrapper<binary_expression>
    >;

    struct unary_expression {
        bool negated = false;
        expression expr;
    };

    struct binary_expression {
        expression                                    first;
        std::vector<std::pair<operators, expression>> other;
    };

}

BOOST_FUSION_ADAPT_STRUCT(ast::unary_expression, negated, expr)
BOOST_FUSION_ADAPT_STRUCT(ast::binary_expression, first, other)

namespace ast {
    static inline std::ostream& operator<<(std::ostream& os, operators op) { return os << (op==op_eq?"==":"!="); }
    static inline std::ostream& operator<<(std::ostream& os, binary_expression const& e) { 
        os << e.first;
        for (auto& oe : e.other)
            os << " " << oe.first << " " << oe.second;
        return os;
    }
    static inline std::ostream& operator<<(std::ostream& os, unary_expression const& e) {
        return os << (e.negated?"!":"") << "(" << e.expr << ")";
    }
}

namespace client
{
    namespace qi = boost::spirit::qi;

    template <typename Iterator>
    class expression_grammar : public qi::grammar<Iterator, ast::expression()> {
      private:
        qi::symbols<char, ast::operators> operators;
        qi::rule<Iterator, ast::expression()> start;

        qi::rule<Iterator, ast::expression(),        qi::space_type> simple_expression;
        qi::rule<Iterator, ast::unary_expression(),  qi::space_type> unary_expression;
        qi::rule<Iterator, ast::binary_expression(), qi::space_type> binary_expression;
        qi::rule<Iterator, ast::expression(),        qi::space_type> expression;

      public:
        expression_grammar() : expression_grammar::base_type(start, "expression") {
            using namespace qi;

            operators.add
                ("==", ast::op_eq)
                ("!=", ast::op_ne)
                ;

            simple_expression =
                ( '(' > expression > ')' )
                | int_;

            unary_expression = 
                matches['!'] >> simple_expression;

            binary_expression =
                unary_expression >> *(operators > expression);

            expression = binary_expression;

            start = skip(space) [ expression ];

            BOOST_SPIRIT_DEBUG_NODES((expression)(binary_expression)(unary_expression)(simple_expression))
        }
    };

} // namespace client

int main() {
    using It = std::string::const_iterator;
    client::expression_grammar<It> const program;

    std::string const input{"1 == !(1 != 0)"};
    It iter = input.begin(), end = input.end();
    ast::expression out;

    if (parse(iter, end, program, out)) {
        std::cout << "Parsed: " << out << std::endl;
    } else {
        std::cerr << "Parsing failed." << std::endl;
        return 1;
    }

    if (iter != end) {
        std::cout << "Remaining unparsed input: '" << std::string(iter, end) << "'\n";
    }
}

Prints

Parsed: (1) == !((1) != (0))
sehe
  • 374,641
  • 47
  • 450
  • 633
  • In fact, I'd vouch to remove `std::pair<>` from the AST: [only one more line of code](http://coliru.stacked-crooked.com/a/3cd39da5fcd2b745) – sehe Jun 08 '17 at 22:31
  • Man you rock! Thank you so much! I got it to work now :). I tried to follow all your advise, although not everything was as easy because the real code is -again- different from this minimal example. Especially the part about not exposing the skipper is not clear to me, as I have a custom skipper and multiple grammars that use eachother :/. [The actual code is online](https://github.com/CarloWood/memorymodel/tree/master/src) by the way. The above question has to do with grammar_statement.cxx – Carlo Wood Jun 09 '17 at 00:13
  • Like I said, do expose the skipper if it's supposed to be exposed. It looked like "end-user interface" and you probably don't want the caller to change the skipper, or have to spell it out for no reason. Just keep it and drop the start rule again :) – sehe Jun 09 '17 at 00:37