1

I extended the Mini XML example from the spirit manual.
The grammar describes a xml tag that can be closed with '/>' and has no child nodes or which is closed like in the example with a closing tag '' and can optionally have children.

#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix_core.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/variant.hpp>
#include <boost/variant/recursive_variant.hpp>

struct XmlTree;

typedef boost::variant<boost::recursive_wrapper<XmlTree>, std::string>
    mini_xml_node;

typedef std::vector<mini_xml_node> Children;

struct XmlTree
{
    std::string name;
    Children childs;
};

BOOST_FUSION_ADAPT_STRUCT(
XmlTree,
(std::string, name)
(Children, childs)
)

typedef std::string::const_iterator Iterator;

namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;
namespace phoenix = boost::phoenix;

class XmlParserGrammar : public qi::grammar<Iterator, XmlTree(), qi::locals<std::string*>, ascii::space_type>
{
public:
XmlParserGrammar() : XmlParserGrammar::base_type(xml, "xml")
{
    using qi::lit;
    using qi::lexeme;
    using qi::attr;
    using ascii::space;
    using ascii::char_;
    using ascii::alnum;
    using phoenix::val;

    xml %=
        startTag[qi::_a = &qi::_1]  >>
        (
        (
            lit("/>") > attr(Children()) //can i remove this somehow?
        )
        |
        (
            lit(">")
            >> *node_
            > endTag(*qi::_a)
        )
        );

    startTag %= '<' >> !lit('/') >> lexeme[ +(alnum - (space | '>' | "/>")) ] ;

    node_ %= xml | text;

    endTag = "</" > lit(qi::_r1) > '>';

    text %= lexeme[+(char_ - '<')];
}

private:
    qi::rule<Iterator, XmlTree(), qi::locals<std::string*>, ascii::space_type> xml;
    qi::rule<Iterator, std::string(), ascii::space_type> startTag;
    qi::rule<Iterator, mini_xml_node(), ascii::space_type> node_;
    qi::rule<Iterator, void(std::string&), ascii::space_type> endTag;
    qi::rule<Iterator, std::string(), ascii::space_type> text;
};

Is it possible to write this rule without the attr(Children()) tag? I think it is more or less a performance lag. I need it to avoid the optional attribute of the alternative parser. If there are no child tags the attribute should only be an empty vector.

hkaiser
  • 11,403
  • 1
  • 30
  • 35
MarcoH
  • 33
  • 4

1 Answers1

1

You should be able to write:

xml %= startTag[_a = &_1] 
       >> attributes 
       >> (  "/>" >> eps
          |  ">" >> *node > endTag(*_a) 
          )
    ;

That leaves the vector attribute unchanged (and empty).

hkaiser
  • 11,403
  • 1
  • 30
  • 35
  • Sorry but your grammar seems not to compile. I think it's because ( "/>" | ">" >> *node > endTag(*_a) ) generates an optional attribute but my 'childs' member ist not an optional. I added the xmlTree struct in the question. – MarcoH Oct 01 '10 at 18:15
  • I would need to see a minimal, self-contained example to be able to see what's wrong. – hkaiser Oct 01 '10 at 22:29
  • I added an example in the question. It compiles on VS 2008 – MarcoH Oct 02 '10 at 16:23
  • I edited my answer above to make it compile and work in your case. The trick is to convert the first alternative into an artificial sequence, as sequences do the right thing almost all the time. While, OTOH, alternatives are rather tricky to get right. – hkaiser Oct 02 '10 at 18:32