1

I'm trying to write my first boost spirit parser for a specific messaging format and I encountered some problems. The boost library version used is 1.49.0!

#include <iostream>
#include <sstream>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/support_multi_pass.hpp>
#include <boost/spirit/include/classic_position_iterator.hpp>
#include <boost/fusion/adapted/struct/adapt_struct.hpp>

namespace qi = boost::spirit::qi;

struct message
{
  std::string title;
  std::string sender;
  std::string receiver;
  unsigned int seqNo;
  std::string senderRef;
  std::string receiverRef;
  unsigned int seqNoRef;
  std::string id;
};

BOOST_FUSION_ADAPT_STRUCT(
    message,
    (std::string, title)
    (std::string, sender)
    (std::string, receiver)
    (unsigned int, seqNo)
    (std::string, senderRef)
    (std::string, receiverRef)
    (unsigned int, seqNoRef)
    (std::string, id)
)

template<typename Iterator>
struct MyQiGrammar : qi::grammar<Iterator, message(), qi::space_type>
{
  MyQiGrammar() : MyQiGrammar::base_type(start) {
    qi::uint_parser<unsigned int, 10, 3, 3> uint_3p;

    delim     = qi::char_("-/");            // some values are delimited by '-' or '/'

    title    %= qi::repeat(3)[qi::upper];   // exactly 3 upper case letters
    sender   %= +qi::upper;                 // at least one upper case letter
    receiver %= +qi::upper;                 // at least one upper case letter
    seqNo    %= uint_3p;                    // exactly 3 digits (e.g. 001)
    id       %= qi::repeat(1,7)[qi::alnum]; // at least 1 upper case letter and maximum 7

    start    %=
        '('
        >> title
        >> sender >> delim >> receiver >> seqNo
        >> -(sender >> delim >> receiver >> seqNo)
        >> delim >> id
        >>
        ')';
  }

  qi::rule<Iterator> delim;
  qi::rule<Iterator, std::string(), qi::space_type> title;
  qi::rule<Iterator, std::string(), qi::space_type> sender, receiver, id;
  qi::rule<Iterator, unsigned int(), qi::space_type> seqNo;
  qi::rule<Iterator, message(), qi::space_type> start;
};

int
main(int args, char** argv)
{
  typedef std::istreambuf_iterator<char> base_iterator_type;
  typedef boost::spirit::multi_pass<base_iterator_type> forward_iterator_type;
  typedef boost::spirit::classic::position_iterator2<forward_iterator_type> pos_iterator_type;
  typedef MyQiGrammar<pos_iterator_type> qi_parser;

  std::string rawMsg = "(ABCZ/Y002-GWI4576)";
  qi_parser myGrammarParser;
  message msg;

  std::istringstream iss(rawMsg);
  base_iterator_type in_begin(iss);
  forward_iterator_type fwd_begin = boost::spirit::make_default_multi_pass(in_begin);
  forward_iterator_type fwd_end;
  pos_iterator_type pos_begin(fwd_begin, fwd_end);
  pos_iterator_type pos_end;

  std::cout << rawMsg << std::endl;

  try {
    bool msgRes = qi::phrase_parse(pos_begin, pos_end,
                                   myGrammarParser,
                                   qi::space,
                                   msg);

    if(msgRes) {
      std::cout << "Parsing succeeded!" << std::endl;

      if(pos_begin == pos_end) {
        std::cout << "Full match!" << std::endl;
        std::cout << "Title                  : " << msg.title << std::endl;
        std::cout << "Sender                 : " << msg.sender << std::endl;
        std::cout << "Receiver               : " << msg.receiver << std::endl;
        std::cout << "Sequence number        : " << msg.seqNo << std::endl;
        std::cout << "Sender (ref.)          : " << msg.senderRef << std::endl;
        std::cout << "Receiver (ref.)        : " << msg.receiverRef << std::endl;
        std::cout << "Sequence number (ref.) : " << msg.seqNoRef << std::endl;
        std::cout << "Message Identifier     : " << msg.id << std::endl;
      }
    } else {
      std::cout << "Parsing failed!" << std::endl;
      std::cout << "Stopped at: " << pos_begin.get_position().line
                << ":" << pos_begin.get_position().column << std::endl;
    }
  } catch(qi::expectation_failure<pos_iterator_type>& e) {
    const boost::spirit::classic::file_position_base<std::string>& pos = e.first.get_position();
    std::stringstream ss;

    ss << "Parse error at line " << pos.line << " column " << pos.column
       << "\n\t" << e.first.get_currentline()
       << "\n\t" << std::string(pos.column, ' ') << "^--here";

    std::cerr << ss.str() << std::endl;
  }

  return 0;
}

In general, the message format looks like this:

'('<TITLE><SENDER>'/'<RECEIVER><SEQNO>[<SENDERREF>'/'<RECEIVERREF><SEQNOREF>]'-'<MID>')'

The output obviously shows that I'm doing something wrong with the optional message reference part:

Parsing succeeded!
Full match!
Title                  : ABC
Sender                 : Z
Receiver               : Y
Sequence number        : 2
Sender (ref.)          :
Receiver (ref.)        : GWI4576             <--- Message identifier
Sequence number (ref.) : 3072563792          <--- uninitialized, can be neglected
Message Identifier     :

There are more wrong member assignments if rawMsg includes a message reference, for example "(ABCZ/Y002Y/Z001-GWI4576)":

Parsing succeeded!
Full match!
Title                  : ABC
Sender                 : Z
Receiver               : Y
Sequence number        : 2
Sender (ref.)          : YZ                  <--- Sender and receiver!?
Receiver (ref.)        : GWI4576             <--- Message identifier
Sequence number (ref.) : 3214704440          <--- uninitialized, but should be 1
Message Identifier     :

What did I wrong in my rule(s)?

In addition, I observed that if an incorrect message format is parsed, then the line and column of the iterator is always set to 1 independent of the error position:

Parsing failed!
Stopped at: 1:1

Why is that?

janr
  • 3,664
  • 3
  • 24
  • 36
  • 1
    [This question](http://stackoverflow.com/questions/15998059/boostspiritqi-permutation-parser-and-synthesized-attributes) presents a similar (simpler to analyze) problem. [This post](http://boost.2283326.n4.nabble.com/Unexpected-behavior-using-versus-tp4662087p4662134.html) also explains how to solve another similar one. Using the method described in them you can get [this](http://coliru.stacked-crooked.com/a/e659b53773c259fc) that I think obtains the result that you expect. (Adding the tags [tag:boost] and/or [tag:boost-spirit] may help your question get more exposure) – llonesmiz Jul 11 '14 at 21:15
  • If you want `seqNoRef` initialized to 1 you should do so in a default constructor, something like [this](http://coliru.stacked-crooked.com/a/1f60348aaa22f030) could work. – llonesmiz Jul 12 '14 at 07:23
  • Thanks a lot! It's interesting that a new struct can solve this problem, but not an enclosing brackets with optional operator. Why don't add this in an answer? – janr Jul 14 '14 at 08:27

0 Answers0