This is a much too delayed follow on question to something I have previously asked. Basically, I have a fairly parameterized parser for a series of languages written in boost spirit x3. @sehe greatly helped me get even that far: Boost Spirit x3 -- Parameterizing Parsers with other Parsers
Now that I am there, though, I have some strange (to me) exploding compilation times. I cannot post the original code, so have been working on a 'minimal' example and have finally found at least one of the culprits. Here's my code:
#include <boost/spirit/home/x3.hpp>
#include <boost/spirit/home/x3/support/ast/variant.hpp>
#include <boost/fusion/adapted.hpp>
#include <iomanip>
#include <iostream>
namespace x3 = boost::spirit::x3;
using x3::char_;
using x3::int_;
using x3::lexeme;
struct boring1 { std::string name; int i; };
struct boring2 { std::string name; int i; };
struct sub1 { std::string name; int i; };
struct sub2 { std::string name; int i; };
struct sub3 { std::string name; int i; };
struct sub4 { std::string name; int i; };
struct sub : x3::variant<sub1,sub2,sub3,sub4> {
sub()=default;
sub(const sub&)=default;
sub& operator=(const sub&)=default;
using base_type::base_type;
using base_type::operator=;
};
struct leaf1 { char tag; int i; };
struct leaf2 { char tag; int i, j, k; };
struct leaf3 { std::string name; int i, j; };
struct leaf23 : x3::variant<leaf2, leaf3> {
leaf23(const leaf23&)=default;
leaf23()=default;
leaf23& operator=(const leaf23&)=default;
using base_type::base_type;
using base_type::operator=;
};
template <typename T> struct node1 { std::string name; T leaf; };
template <typename T> struct node2 { std::string name; T leaf; };
template <typename T> struct node3 { std::string name; T leaf; };
template <typename T> struct node4 { std::string name; std::vector<T> leafs; };
template <typename T> struct node5 { std::string name; std::vector<T> leafs; };
template <typename T> struct node6 { std::string name; std::vector<T> leafs; };
template <typename T> struct cont1 { int i; std::string name; std::vector<T> ts; };
template <typename T> struct cont2 { unsigned i; char t; std::string name; std::vector<T> ts; };
template <typename T, typename U> struct cont3 {
std::vector<T> ts; std::string name; std::vector<U> us;
};
template <typename T, typename U> struct cont4 {
std::string name; std::vector<U> us; std::vector<T> ts;
};
template <typename T> struct program { int name; std::vector<T> stmts; };
BOOST_FUSION_ADAPT_STRUCT(boring1, name, i)
BOOST_FUSION_ADAPT_STRUCT(boring2, name, i)
BOOST_FUSION_ADAPT_STRUCT(sub1, name, i)
BOOST_FUSION_ADAPT_STRUCT(sub2, name, i)
BOOST_FUSION_ADAPT_STRUCT(sub3, name, i)
BOOST_FUSION_ADAPT_STRUCT(sub4, name, i)
BOOST_FUSION_ADAPT_STRUCT(leaf1, tag, i)
BOOST_FUSION_ADAPT_STRUCT(leaf2, tag, i, j, k)
BOOST_FUSION_ADAPT_STRUCT(leaf3, name, i, j)
BOOST_FUSION_ADAPT_TPL_STRUCT((T), (node1)(T), name, leaf)
BOOST_FUSION_ADAPT_TPL_STRUCT((T), (node2)(T), name, leaf)
BOOST_FUSION_ADAPT_TPL_STRUCT((T), (node3)(T), name, leaf)
BOOST_FUSION_ADAPT_TPL_STRUCT((T), (node4)(T), name, leafs)
BOOST_FUSION_ADAPT_TPL_STRUCT((T), (node5)(T), name, leafs)
BOOST_FUSION_ADAPT_TPL_STRUCT((T), (node6)(T), name, leafs)
BOOST_FUSION_ADAPT_TPL_STRUCT((T), (cont1)(T), i, name, ts)
BOOST_FUSION_ADAPT_TPL_STRUCT((T), (cont2)(T), i, t, name, ts)
BOOST_FUSION_ADAPT_TPL_STRUCT((T) (U), (cont3)(T)(U), ts, name, us)
BOOST_FUSION_ADAPT_TPL_STRUCT((T) (U), (cont4)(T)(U), name, us, ts)
BOOST_FUSION_ADAPT_TPL_STRUCT((T), (program)(T), name, stmts)
template <typename T> struct as_type {
template <typename Expr>
auto operator[](Expr&& expr) const {
return x3::rule<struct _, T>{"as"} = x3::as_parser(std::forward<Expr>(expr));
}
};
template <typename T> static const as_type<T> as = {};
namespace Generic {
x3::rule<class boring1, boring1> const boring1_p = "boring 1 parser";
auto const boring1_p_def = lexeme[ +x3::alnum ] >> int_;
x3::rule<class boring2, boring2> const boring2_p = "boring 2 parser";
auto const boring2_p_def = lexeme[ +x3::alnum ] >> int_;
BOOST_SPIRIT_DEFINE( boring1_p, boring2_p )
x3::rule<class sub1, sub1> const sub1_p = "sub 1 parser";
auto const sub1_p_def = lexeme[ +x3::alnum ] >> int_;
x3::rule<class sub2, sub2> const sub2_p = "sub 2 parser";
auto const sub2_p_def = lexeme[ +x3::alnum ] >> int_;
x3::rule<class sub3, sub3> const sub3_p = "sub 3 parser";
auto const sub3_p_def = lexeme[ +x3::alnum ] >> int_;
x3::rule<class sub4, sub4> const sub4_p = "sub 4 parser";
auto const sub4_p_def = lexeme[ +x3::alnum ] >> int_;
x3::rule<class sub, sub> const sub_p = "combined sub parser";
auto const sub_p_def = sub1_p | sub2_p | sub3_p | sub4_p;
BOOST_SPIRIT_DEFINE( sub1_p, sub2_p, sub3_p, sub4_p, sub_p )
template <typename T> auto leaf_p = x3::eps(false);
template <typename T> auto leaves() {
return as<std::vector<T>>[ leaf_p<T> % ',' ];
}
template <> auto leaf_p<leaf1> = as<leaf1>[lexeme[ (char_('t') | char_('u')) >> int_ ]];
template <> auto leaf_p<leaf2> = as<leaf2>[lexeme[ char_('t') >> int_ >> "_" >> int_ >> "_" >> int_]];
template <> auto leaf_p<leaf3> = as<leaf3>[lexeme[ (+x3::alnum) >> "_" >> int_ >> "_" >> int_]];
template <> auto leaf_p<leaf23> = as<leaf23>[leaf_p<leaf2> | leaf_p<leaf3>];
template <typename T> auto parseNode1() {
return as<node1<T>>[ (x3::string("xx") | x3::string("yy") | x3::string("zz")) >> leaf_p<T>];
}
template <typename T> auto parseNode2() {
return as<node2<T>>[ x3::string("node1") >> leaf_p<T>];
}
template <typename T> auto parseNode3() {
return as<node3<T>>[ x3::string("node3") >> leaf_p<T>];
}
template <typename T> auto parseNode4() {
return as<node4<T>>[ x3::string("node4") >> leaves<T>()];
}
template <typename T> auto parseNode5() {
return as<node5<T>>[ x3::string("node5") >> leaves<T>()];
}
template <typename T> auto parseNode6() {
return as<node6<T>>[ x3::string("node6") >> leaves<T>()];
}
struct With1 {};
struct With2 {};
x3::rule<class header1,std::string> const header1 = "header 1";
auto const header1_def = lexeme[+x3::alnum][([](auto& ctx) {
std::string v = x3::_attr(ctx);
x3::get<With1>(ctx) = v;
x3::_val(ctx) = v;
})];
x3::rule<class footer1,std::string> const footer1 = "footer 1";
auto const footer1_def = lexeme[+x3::alnum][([](auto& ctx) {
std::string v = x3::get<With1>(ctx);
x3::_pass(ctx) = v == x3::_attr(ctx);
})];
x3::rule<class header2,std::string> const header2 = "header 2";
auto const header2_def = lexeme[+x3::alnum][([](auto& ctx) {
std::string v = x3::_attr(ctx);
x3::get<With2>(ctx) = v;
x3::_val(ctx) = v;
})];
x3::rule<class footer2,std::string> const footer2 = "footer 2";
auto const footer2_def = lexeme[+x3::alnum][([](auto& ctx) {
std::string v = x3::get<With2>(ctx);
x3::_pass(ctx) = v == x3::_attr(ctx);
})];
BOOST_SPIRIT_DEFINE(header1, footer1, header2, footer2);
template <typename T>
auto const parseCont1(x3::rule<class c1, T> const& parser) {
return as<cont1<T>>[ int_ >> x3::string("cont1") >> "{" >> +parser >> "}" ];
}
template <typename T>
auto const parseCont2(x3::rule<class c1, T> const& parser) {
return as<cont2<T>>[ x3::bin >> char_ >> header1 >> "{" >> +parser >> "}" >> x3::omit[footer1] ];
}
template <typename T, typename U>
auto const parseCont3(x3::rule<class c1, U> const& parser) {
return as<cont3<T,U>>[ leaves<T>() >> x3::string("cont3") >> "{" >> +parser >> "}" ];
}
auto vectorize = [](auto &ctx){ _val(ctx) = { _attr(ctx) }; };
template <typename stmt>
auto cont4_simple_body(x3::rule<class c1, stmt> const& parser) {
return as<std::vector<stmt>>[parser[ vectorize ]];
}
/* What follows are three different possible implementations of cont4_body, which
* is used in the parseCont4 rule below.
* - the first parses wrong, but is fast to compile (I don't understand why this is
* the case, but in tests from my real language, it produces empty vectors,
* always
* - the second is correct, but slower (maybe 2x)
* - the third is also correct, but slower than the above (another 2-3x)
*/
// wrong but fast compile: ~64s when just Language1 is enabled below in main
// template <typename stmt>
// auto cont4_body(x3::rule<class c1, stmt> const& parser) {
// return
// as<std::vector<stmt>>
// [ "{" >> +parser >> "}"
// | parser[vectorize]
// ];
// }
// correct, slower compile: ~189s when just Language1 is enabled below in main
// template <typename stmt>
// auto cont4_body(x3::rule<class c1, stmt> const& parser) {
// return
// as<std::vector<stmt>>
// [ "{" >> +parser >> "}"
// | cont4_simple_body(parser)
// ];
// }
// correct, even slower compile: ~424s when just Language1 is enabled below in main
template <typename stmt>
auto cont4_body(x3::rule<class c1, stmt> const& parser) {
return
as<std::vector<stmt>>["{" >> + parser >> "}"]
| as<std::vector<stmt>>[cont4_simple_body(parser)]
;
}
template <typename T, typename U>
auto const parseCont4(x3::rule<class c1, U> const& parser) {
return as<cont4<T,U>>[ header2 >> cont4_body(parser) >> leaves<T>() >> x3::omit[footer2]];
}
}
namespace Language1 {
struct lang1_stmt;
struct lang1_stmt : x3::variant<
boring1,
boring2,
sub,
node1<leaf1>,
node2<leaf1>,
node3<leaf1>,
node4<leaf1>,
node5<leaf1>,
node6<leaf1>,
x3::forward_ast<cont1<lang1_stmt>>,
x3::forward_ast<cont2<lang1_stmt>>,
x3::forward_ast<cont3<leaf1, lang1_stmt>>,
x3::forward_ast<cont4<leaf1, lang1_stmt>>
> {
lang1_stmt(const lang1_stmt&)=default;
lang1_stmt()=default;
lang1_stmt& operator=(const lang1_stmt&)=default;
using base_type::base_type;
using base_type::operator=;
};
x3::rule<class Generic::c1, lang1_stmt> lang1_stmt_p = "lang1 stmt parser";
auto const lang1_stmt_p_def =
Generic::boring1_p
| Generic::boring2_p
| Generic::sub_p
| Generic::parseNode1<leaf1>()
| Generic::parseNode2<leaf1>()
| Generic::parseNode3<leaf1>()
| Generic::parseNode4<leaf1>()
| Generic::parseNode5<leaf1>()
| Generic::parseNode6<leaf1>()
| Generic::parseCont1<lang1_stmt>( lang1_stmt_p )
| Generic::parseCont2<lang1_stmt>( lang1_stmt_p )
| Generic::parseCont3<leaf1, lang1_stmt>( lang1_stmt_p )
| Generic::parseCont4<leaf1, lang1_stmt>( lang1_stmt_p )
;
BOOST_SPIRIT_DEFINE( lang1_stmt_p );
std::string with1;
std::string with2;
auto const stmts
= x3::rule<class grammar_stmts1, std::vector<lang1_stmt>> { "Lang1 statements" }
= x3::with<Generic::With1>(with1)
[x3::with<Generic::With2>(with2)[ +lang1_stmt_p ]];
auto const grammar
= x3::rule<class grammar1, program<lang1_stmt>> { "Lang1 grammar" }
= x3::skip( x3::space )[x3::int_ >> stmts];
}
namespace Language2 {
struct lang2_stmt;
struct lang2_stmt : x3::variant<
boring1,
boring2,
sub,
node1<leaf23>,
node2<leaf23>,
node3<leaf23>,
node4<leaf23>,
node5<leaf23>,
node6<leaf23>,
x3::forward_ast<cont1<lang2_stmt>>,
x3::forward_ast<cont2<lang2_stmt>>,
x3::forward_ast<cont3<leaf23, lang2_stmt>>,
x3::forward_ast<cont4<leaf23, lang2_stmt>>
> {
lang2_stmt(const lang2_stmt&)=default;
lang2_stmt()=default;
lang2_stmt& operator=(const lang2_stmt&)=default;
using base_type::base_type;
using base_type::operator=;
};
x3::rule<class Generic::c1, lang2_stmt> lang2_stmt_p = "lang2 stmt parser";
auto const lang2_stmt_p_def =
Generic::boring1_p
| Generic::boring2_p
| Generic::sub_p
| Generic::parseNode1<leaf23>()
| Generic::parseNode2<leaf23>()
| Generic::parseNode3<leaf23>()
| Generic::parseNode4<leaf23>()
| Generic::parseNode5<leaf23>()
| Generic::parseNode6<leaf23>()
| Generic::parseCont1<lang2_stmt>( lang2_stmt_p )
| Generic::parseCont2<lang2_stmt>( lang2_stmt_p )
| Generic::parseCont3<leaf23, lang2_stmt>( lang2_stmt_p )
| Generic::parseCont4<leaf23, lang2_stmt>( lang2_stmt_p )
;
BOOST_SPIRIT_DEFINE( lang2_stmt_p );
std::string with1;
std::string with2;
auto const stmts
= x3::rule<class grammar_stmts2, std::vector<lang2_stmt>> { "Lang2 statements" }
= x3::with<Generic::With1>(with1)
[x3::with<Generic::With2>(with2)[ +lang2_stmt_p ]];
auto const grammar
= x3::rule<class grammar2, program<lang2_stmt>> { "Lang2 grammar" }
= x3::skip( x3::space )[x3::int_ >> stmts];
}
void test(auto const& grammar, std::string text, auto ast) {
auto f = text.begin(), l = text.end();
std::cout << "\nParsing: " << std::quoted(text, '\'') << "\n";
if (boost::spirit::x3::parse(f, l, grammar, ast)) {
std::cout << "Success!\n";
} else {
std::cout << " -- Failed " << std::quoted(text, '\'') << "\n";
}
}
int main() {
test( Language1::grammar,
"",
program<Language1::lang1_stmt>{});
// test(
// Language2::grammar,
// "",
// program<Language2::lang2_stmt>{});
}
You can see in this example that I have defined two languages. They are only different in the leaves of the ast, where targets of the language in Language2 are more complex than in Language1. I have replicated all of the features I am using in my real parser, but have created stupid fake parsers for different statement types. Of note: the performance problems I am seeing are greatly exaggerated in Language2. I have effectively only linked to Language1 in my main function to save time in testing.
The problem seems to be in my encoding of conditional statements, in particular cond4. Here, a conditional can optionally have braces. If there are none, then only a single statement is enclosed. Notice the 3 different implementations of the parser for the conditional body I have included. The first actually incorrectly parses (I mean, the parse succeeds, but the vectors are always empty!). The next two parse correctly but greatly increase compile times.
Any ideas what is going on here?