1

Also have asked the question at boost spirit mailing list http://boost.2283326.n4.nabble.com/Spirit-X3-Boost-1-59-Compilation-never-finishes-for-a-recursive-grammar-td4693813.html

I am working on creating an xpath2.0 parser as per the RFC. It's basically a subproject of another project that I am working on.

After some initial success, I did the mistake of writing a bunch of grammar rules and AST instead of compiling and testing it at every point. After that I basically had a novel of template error messages to read (my fault actually).

Below I present a reduced grammar for the xpath (not particularly as per RFC), which does not finish compilation OR I had to stop the process when my mac started slowing down after about 7 mins.

#include <iostream>
#include <string>
#include <vector>
#include <boost/optional.hpp>
#include <boost/optional/optional_io.hpp>
#include <boost/fusion/include/std_pair.hpp>
#include <boost/spirit/home/x3/support/ast/variant.hpp>
#include <boost/fusion/adapted/struct/adapt_struct.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/spirit/home/x3.hpp>
#include <boost/fusion/include/io.hpp>
#include <boost/spirit/home/x3/support/ast/variant.hpp>

namespace x3 = boost::spirit::x3;

namespace ast {

  struct or_expression;
  struct function_call;

  template <typename T>
  struct operation_sequence_entry
  {
    std::string op;
    T expr;
  };

  struct primary_expression: x3::variant<
                              std::string,
                              x3::forward_ast<or_expression>,
                              std::string,
                              int32_t,
                              uint32_t,
                              double,
                              x3::forward_ast<function_call>
                             >
  {
    using base_type::base_type;
    using base_type::operator=;
  };

  struct filter_expression
  {
    primary_expression prim_expr;
    std::vector<x3::forward_ast<or_expression>> predicates;
  };

  struct path_expression: x3::variant<
                            boost::optional<filter_expression>,
                            boost::optional<primary_expression>
                          >
  {
    using base_type::base_type;
    using base_type::operator=;
  };

  using union_expression = std::vector<path_expression>;

  struct unary_expression
  {
    union_expression expr;
  };

  struct eq_expression
  {
    using expr_seq_type = operation_sequence_entry<unary_expression>;
    unary_expression lhs_expr;
    std::vector<expr_seq_type> rhs_expr;
  };

  struct and_expression
  {
    using expr_seq_type = operation_sequence_entry<eq_expression>;
    eq_expression lhs_expr;
    std::vector<expr_seq_type> rhs_expr;
  };

  struct or_expression
  {
    using expr_seq_type = operation_sequence_entry<and_expression>;
    and_expression lhs_expr;
    std::vector<expr_seq_type> rhs_expr;
  };

  struct function_call
  {
    std::string func_name;
    std::vector<or_expression> args;
  };
}

BOOST_FUSION_ADAPT_TPL_STRUCT(
  (T),
  (ast::operation_sequence_entry)(T),
  (std::string, op),
  (T, expr)
);

BOOST_FUSION_ADAPT_STRUCT(
  ast::unary_expression,
  (ast::union_expression, expr)
);

BOOST_FUSION_ADAPT_STRUCT(
  ast::eq_expression,
  (ast::unary_expression, lhs_expr),
  (std::vector<typename ast::eq_expression::expr_seq_type>, rhs_expr)
);

BOOST_FUSION_ADAPT_STRUCT(
  ast::and_expression,
  (ast::eq_expression, lhs_expr),
  (std::vector<typename ast::and_expression::expr_seq_type>, rhs_expr)
);

BOOST_FUSION_ADAPT_STRUCT(
  ast::or_expression,
  (ast::and_expression, lhs_expr),
  (std::vector<typename ast::or_expression::expr_seq_type>, rhs_expr)
);

BOOST_FUSION_ADAPT_STRUCT(
  ast::function_call,
  (std::string, func_name),
  (std::vector<ast::or_expression>, args)
);

BOOST_FUSION_ADAPT_STRUCT(
  ast::filter_expression,
  (ast::primary_expression, prim_expr),
  (std::vector<x3::forward_ast<ast::or_expression>>, predicates)
);

namespace grammar {
  // Bring in the spirit parsers
  using x3::lexeme;
  using x3::alpha;
  using x3::alnum;
  using x3::ascii::char_;
  using x3::ascii::string;
  using x3::lit;
  using x3::ascii::digit;
  using x3::int_;
  using x3::uint_;
  using x3::double_;

  template<typename T>
  auto as = [](auto p) { return x3::rule<struct _, T>{} = as_parser(p); };

  auto str_ = [](const char* lit) { return x3::string(lit); };

  x3::rule<class path_expr, ast::path_expression> path_expr = "path-expr";

  auto ncname = x3::rule<class ncname, std::string>{"ncname"}
              = x3::lexeme[+(char_ - ':')]
              ;

  auto qname = x3::rule<class qname, std::string>{"qname"}
             = as<std::string>(ncname >> char_(':') >> ncname)
             | as<std::string>(ncname)
             ;

  auto union_expr = x3::rule<class union_expr, ast::union_expression>{"union-expr"}
                  = path_expr % '/'
                  ;

  auto unary_expr = x3::rule<class unary_expr, ast::unary_expression>{"unary-expr"}
                  = -x3::lit('-') >> union_expr
                  ;

  auto equality_expr = x3::rule<class eq_expr, ast::eq_expression>{"equality-expr"}
                     =  unary_expr
                     >> *(as<ast::operation_sequence_entry<ast::unary_expression>>
                          ( (str_("=") | str_("!=")) > unary_expr )
                         )
                     ;

  auto and_expr = x3::rule<class and_expr, ast::and_expression>{"and-expr"}
                =  equality_expr
                >> *(as<ast::operation_sequence_entry<ast::eq_expression>>
                     ( str_("and") > equality_expr )
                    )
                ;

  auto or_expr = x3::rule<class or_expr, ast::or_expression>{"or-expr"}
               =  and_expr 
               >> *(as<ast::operation_sequence_entry<ast::and_expression>>
                    ( str_("or") >> and_expr )
                   )
               ;

  auto function_name = as<std::string>(qname);

  auto function_arg = or_expr;

  auto function_call = x3::rule<class func_call, ast::function_call>{"func-call"}
                     = function_name > '(' > (or_expr % ',') > ')'
                     ;

  auto prim_expr = x3::rule<class prim_expr, ast::primary_expression>{"prim-expr"}
                 = ('$' > qname)
                 | ('"' > *(char_ - '"') > '"')
                 | ('(' > or_expr > ')')
                 | (int_ | uint_ | double_)
                 | function_call
                 ;


  auto predicate = '[' > or_expr > ']';

  auto filter_expr = x3::rule<class filter_expr, ast::filter_expression>{"filter-expr"}
                   = prim_expr >> *(predicate)
                   ;

  auto path_expr_def = -(filter_expr) >> -(lit("/") | lit("//")) >> -(prim_expr);

  BOOST_SPIRIT_DEFINE (path_expr);
}

int main() {
  using x3::space;
  using grammar::or_expr;

  ast::or_expression oexpr;
  std::string input = "$ab/$cd or $ef";

  bool res = phrase_parse(input.begin(),
                          input.end(),
                          or_expr,
                          space,
                          oexpr);

  if (!res) {
    std::cout << "Parsing failed miserably!\n";
    return 1;
  }
  return 0;
}

Compiling it as

g++ -std=c++14 -ftemplate-depth=1024 -o rec_ex rec_ex.cc

Compiler : Clang 3.8

Boost Version : 1.59

Based on the template error instantiation for lower template-depth, I am pretty sure that somewhere deep recursion is going on. Is there anyway to optimize the above grammar so as to not cause this issue ?

Thanks.

Cœur
  • 37,241
  • 25
  • 195
  • 267
Arunmu
  • 6,837
  • 1
  • 24
  • 46
  • Also tried using the approach of dividing the entire code base into different files as suggested in one of the boost spirit mailing lists, but hitting the same issue. – Arunmu Apr 21 '17 at 06:37
  • Got a patch from Larry in the boost spirit mailing list. Check the link posted in the question for the details. – Arunmu Apr 22 '17 at 14:30

0 Answers0