-3

I would like to implement a parser for a nested (and optional) structure with boos::spirit as "namespace" in C++ language.

What is the simplest way to do it?

Claudio La Rosa
  • 141
  • 1
  • 2
  • 7

1 Answers1

2

You can create a recursive rule like so:

namespace_ = lexeme["namespace"] >> '{' >> *namespace_ >> '}';

To also allow optional name identifiers:

rule<It, std::string()> identifier_ =     //
    raw[(alpha | '_') >> *(alnum | '_')]; //

namespace_ =
    lexeme["namespace"] >> -identifier_ >> '{' >> *namespace_ >> '}';

To also account for C++-style comments:

using Skip = rule<It>;
Skip skip  = space                           //
    | "//" >> *~char_("\r\n") >> (eol | eoi) //
    | "/*" >> *(char_ - "*/") >> "*/"        //
    ;

rule<It, Skip> namespace_;

rule<It, std::string()> identifier_ =     //
    raw[(alpha | '_') >> *(alnum | '_')]; //

namespace_ =
    lexeme["namespace"] >> -identifier_ >> '{' >> *namespace_ >> '}';

Demo:

Live On Coliru

#include <boost/spirit/include/qi.hpp>

auto parse(std::string_view input) {
    using namespace boost::spirit::qi;
    using It = std::string_view::const_iterator;

    using Skip = rule<It>;
    Skip skip  = space                           //
        | "//" >> *~char_("\r\n") >> (eol | eoi) //
        | "/*" >> *(char_ - "*/") >> "*/"        //
        ;

    rule<It, Skip> namespace_;

    rule<It, std::string()> identifier_ =     //
        raw[(alpha | '_') >> *(alnum | '_')]; //

    namespace_ =
        lexeme["namespace"] >> -identifier_ >> '{' >> *namespace_ >> '}';

    phrase_parse(begin(input), end(input), eps > *namespace_ > eoi, skip);
}

int main() {
    parse(R"(
        namespace X { namespace Y {
            namespace Z1 {
            }
            namespace Z2 {
            }
        }} // namespace X::Y
    )");
}

BONUS

Adding AST representation and debug output of the parsed data: Live On Coliruhttp://coliru.stacked-crooked.com/a/58542397b7f751e0

#include <boost/fusion/adapted.hpp>
#include <boost/spirit/include/qi.hpp>
#include <iomanip>

namespace Ast {
    using Id = std::string;

    struct Namespace;
    using Namespaces = std::vector<Namespace>;

    struct Namespace {
        boost::optional<Id> id;
        Namespaces          children;
    };
}

BOOST_FUSION_ADAPT_STRUCT(Ast::Namespace, id, children)

auto parse(std::string_view input) {
    using namespace boost::spirit::qi;
    using It = std::string_view::const_iterator;

    using Skip = rule<It>;
    Skip skip  = space                           //
        | "//" >> *~char_("\r\n") >> (eol | eoi) //
        | "/*" >> *(char_ - "*/") >> "*/"        //
        ;

    rule<It, Ast::Namespace(), Skip> namespace_;

    rule<It, Ast::Id()> identifier_ =     //
        raw[(alpha | '_') >> *(alnum | '_')]; //

    namespace_ =
        lexeme["namespace"] >> -identifier_ >> '{' >> *namespace_ >> '}';

    Ast::Namespaces parsed;
    phrase_parse(begin(input), end(input), eps > *namespace_ > eoi, skip, parsed);
    return parsed;
}

namespace Ast {
    void print(std::ostream& os, Namespace const& ns, unsigned indent = 0) {
        os << std::setw(indent) << ""
           << "namespace " << ns.id.value_or("/*anonymous*/") << " {\n";
        for (auto& child : ns.children) {
            print(os, child, indent+2);
        }
        os << std::setw(indent) << "" << "}\n";
    }
}

int main() {
    auto program = parse(R"(
        namespace X { namespace Y {
            namespace Z1 {
            }
            namespace Z2 {
            }
        }} // namespace X::Y
        namespace { }
    )");

    for (auto& ns : program) {
        print(std::cout, ns);
    }
}

Prints

namespace X {
  namespace Y {
    namespace Z1 {
    }
    namespace Z2 {
    }
  }
}
namespace /*anonymous*/ {
}

UPDATE

In response to the comments I made a more involved example that parses input where struct can appear at global or namespace level (or, indeed inside a struct namespace), like:

namespace Math {
    long factorial(int x);
}

struct GlobalA {
    int foo();
    double bar(string stuff, int i, bool flag);
    struct Nested {
        /* todo implementation */
    };
};

namespace X { namespace Y {
    struct Mixin{};
    namespace Z1 {
        struct Derived : GlobalA, Mixin {
            void qux();
        };
    }
    namespace Z2 {
    }
}} // namespace X::Y
namespace { }

See it Live On Coliru

//#define BOOST_SPIRIT_DEBUG
#include <boost/fusion/adapted.hpp>
#include <boost/fusion/include/io.hpp>
#include <boost/optional/optional_io.hpp>
#include <boost/spirit/include/qi.hpp>

namespace Ast {
    struct Id : std::string {
        using std::string::string;
        using std::string::operator=;
    };

    struct Namespace;
    using Namespaces = std::vector<Namespace>;

    struct VariableDeclaration { Id type, name; };
    using VariableDeclarations = std::vector<VariableDeclaration>;

    struct FunctionDeclaration {
        Id return_type, name;
        VariableDeclarations args;
    };

    struct StructDeclaration;
    using Declaration = boost::variant< //
        VariableDeclaration,            //
        FunctionDeclaration,            //
        boost::recursive_wrapper<StructDeclaration>>;

    using Bases   = std::list<Id>;
    using Members = std::vector<Declaration>;

    struct StructDeclaration {
        Id      name;
        Bases   bases;
        Members members;
    };

    using NsMember  = boost::variant<Declaration, Namespace>;
    using NsMembers = std::vector<NsMember>;

    struct Namespace {
        boost::optional<Id> id;
        NsMembers           members;
    };

    using Program = NsMembers;
} // namespace Ast

BOOST_FUSION_ADAPT_STRUCT(Ast::VariableDeclaration, type,        name)
BOOST_FUSION_ADAPT_STRUCT(Ast::FunctionDeclaration, return_type, name,    args)
BOOST_FUSION_ADAPT_STRUCT(Ast::StructDeclaration,   name,        bases,   members)
BOOST_FUSION_ADAPT_STRUCT(Ast::Namespace,           id,          members)

///// BEGIN DEBUG OUTPUT FACILITIES
namespace Ast {
    static std::ostream& operator<<(std::ostream& os, Namespace const& ns) {
        os << "namespace " << ns.id.value_or("/*anonymous*/") << " {\n";
        for (auto& mem : ns.members)
            os << mem;
        return os << "}\n";
    }

    static std::ostream& operator<<(std::ostream& os, FunctionDeclaration const& fd) {
        os << fd.return_type << " " << fd.name << "(";
        for (bool first = true; auto& arg : fd.args) {
            os << (std::exchange(first, false) ? "" : ", ") //
               << arg.type << " " << arg.name;
        }

        return os << ");";
    }

    static std::ostream& operator<<(std::ostream& os, VariableDeclaration const& vd) {
        return os << vd.type << " " << vd.name << ";";
    }

    static std::ostream& operator<<(std::ostream& os, StructDeclaration const& sd) {
        os << "struct " << sd.name;
        if (sd.bases.size())
            for (bool first = true; auto const& base : sd.bases)
                os << (std::exchange(first, false) ? " : " : ", ") << base;

        os << " {\n";
        for (auto& mem : sd.members)
            os << mem << "\n";

        return os << "};\n";
    }
}
///// END DEBUG OUTPUT FACILITIES

namespace qi = boost::spirit::qi;

template <typename It> struct ProgramParser : qi::grammar<It, Ast::Program()> {
    ProgramParser() : ProgramParser::base_type(start) {
        using namespace qi;

        keywords_ += "if", "do", "for", "else", "while", "not", "and", "or",
            "xor", "continue", "break", "case", "goto", "struct", "class",
            "enum", "namespace";

        kw_lexeme = keywords_ >> !(alnum|'_');

        skipper_ = space                             //
            | "//" >> *~char_("\r\n") >> (eol | eoi) //
            | "/*" >> *(char_ - "*/") >> "*/"        //
            ;

        identifier_ = !kw_lexeme >> raw[(alpha | '_') >> *(alnum | '_')];
        vardecl_    = identifier_ >> identifier_;
        fundecl_    = identifier_ >> identifier_ >> //
            '(' >> -(vardecl_ % ',') >> ')' >> ';';

        decl_ = fundecl_ | vardecl_ | struct_;

        Ast::Bases const no_bases;
        baselist_ = ':' >> identifier_ % ',' | attr(no_bases);

        struct_ =                                     //
            lexeme["struct" >> !graph] >> identifier_ //
            >> baselist_ >> '{'                       //
            >> *decl_                                 //
            >> '}' >> ';';

        nsmember_ = namespace_ | decl_;

        namespace_ = lexeme["namespace" >> ((!graph) | '{')] >>
            -identifier_ >> '{' >> *nsmember_ >> '}';

        program_  = *nsmember_;
        start     = skip(skipper_.alias())[program_ > eoi];

        BOOST_SPIRIT_DEBUG_NODES((start)(program_)(nsmember_)(namespace_)(
            struct_)(decl_)(vardecl_)(fundecl_)(baselist_)(identifier_))
    }

  private:
    qi::symbols<char> keywords_;
    qi::rule<It>      kw_lexeme;

    qi::rule<It, Ast::Program()> start;
    qi::rule<It, Ast::Id()>      identifier_;

    using Skip = qi::rule<It>;
    Skip skipper_;

    qi::rule<It, Ast::Bases(),               Skip> baselist_;
    qi::rule<It, Ast::Declaration(),         Skip> decl_;
    qi::rule<It, Ast::FunctionDeclaration(), Skip> fundecl_;
    qi::rule<It, Ast::Namespace(),           Skip> namespace_;
    qi::rule<It, Ast::NsMember(),            Skip> nsmember_;
    qi::rule<It, Ast::Program(),             Skip> program_;
    qi::rule<It, Ast::StructDeclaration(),   Skip> struct_;
    qi::rule<It, Ast::VariableDeclaration(), Skip> vardecl_;
};

Ast::Program parse_program(std::string_view input) {
    using It = std::string_view::const_iterator;

    Ast::Program parsed;
    static ProgramParser<It> const p;
    parse(begin(input), end(input), p, parsed);
    return parsed;
}

int main() {
    auto program = parse_program(R"(
            namespace Math {
                long factorial(int x);
            }

            struct GlobalA {
                int foo();
                double bar(string stuff, int i, bool flag);
                struct Nested {
                    /* todo implementation */
                };
            };

            namespace X { namespace Y {
                struct Mixin{};
                namespace Z1 {
                    struct Derived : GlobalA, Mixin {
                        void qux();
                    };
                }
                namespace Z2 {
                }
            }} // namespace X::Y
            namespace { }
        )");

    for (auto& member : program)
        std::cout << member << '\n';
}

The output (not pretty-printed):

namespace Math {
long factorial(int x);}

struct GlobalA {
int foo();
double bar(string stuff, int i, bool flag);
struct Nested {
};

};

namespace X {
namespace Y {
struct Mixin {
};
namespace Z1 {
struct Derived : GlobalA, Mixin {
void qux();
};
}
namespace Z2 {
}
}
}

namespace /*anonymous*/ {
}
sehe
  • 374,641
  • 47
  • 450
  • 633
  • Adding AST representation and debug output of the parsed data: **[Live On Coliru]()http://coliru.stacked-crooked.com/a/58542397b7f751e0** – sehe Mar 28 '22 at 21:23
  • If I understand well, this code work well, but I would like that the structure namespace itself are optional and not the namespace's name! In my "source code" for parsing I can to have or can't to have the "namespace" construct. I intend this as "optional" – Claudio La Rosa Mar 28 '22 at 21:30
  • Just write the corresponding rules. The only notable complexity in your question was around _nesting_ the namespaces. If you get stuck later with more specific things, come back *showing what you already have, and what you need help with* – sehe Mar 28 '22 at 21:33
  • Maybe my English is bad, but my question was about nesting and the optionality of the structure "namespace" itself. – Claudio La Rosa Mar 28 '22 at 21:35
  • (seeing your comment, you were probably not interested, but here's an X3 version: http://coliru.stacked-crooked.com/a/51192d9904bf9e7f) – sehe Mar 28 '22 at 21:36
  • @ClaudioLaRosa Your English is fine, your specificity is lacking. If there is no namespace, just don't parse it. However, if it optionally surrounds structs, you will have to parse it first. So, e.g: http://coliru.stacked-crooked.com/a/128078cf26fc3fd0 – sehe Mar 28 '22 at 23:46
  • If the answer was at all helpful, please remember to vote (https://meta.stackexchange.com/questions/5234/how-does-accepting-an-answer-work) – sehe Mar 29 '22 at 15:58
  • (And for sheer completeness, the X3 version of that too http://coliru.stacked-crooked.com/a/823bca99736728c9) – sehe Mar 29 '22 at 16:25