5

in boost::spirit, I added error handling code based on example roman.

#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/spirit/include/phoenix_core.hpp>
#include <boost/spirit/include/phoenix_fusion.hpp>
#include <boost/spirit/include/phoenix_object.hpp>
#include <boost/foreach.hpp>

#include <iostream>
#include <fstream>
#include <string>
#include <vector>

namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;
namespace phoenix = boost::phoenix;

template <typename Iterator>
struct roman : qi::grammar<Iterator>
{
  roman() : roman::base_type(start)
  {
    using qi::eps;
    using qi::lit;
    using qi::lexeme;
    using qi::_val;
    using qi::_1;
    using ascii::char_;

    // for on_error
    using qi::on_error;
    using qi::fail;
    using phoenix::construct;
    using phoenix::val;

    start = +(lit('M') )  >> "</>";

    on_error<fail>
    (
        start
      , std::cout
            << val("Error! Expecting ")
            // << _4                            // what failed?
            << val(" here: \"")
            // << construct<std::string>(_3, _2)   // iterators to error-pos, end
            << val("\"")
            << std::endl
    );
  }
  qi::rule<Iterator> start;
};

int
main()
{
    std::cout << "/////////////////////////////////////////////////////////\n\n";
    std::cout << "\t\tRoman Numerals Parser\n\n";
    std::cout << "/////////////////////////////////////////////////////////\n\n";
    std::cout << "Type a Roman Numeral ...or [q or Q] to quit\n\n";

    typedef std::string::const_iterator iterator_type;
    typedef roman<iterator_type> roman;

    roman roman_parser; // Our grammar

    std::string str;
    unsigned result;
    while (std::getline(std::cin, str))
    {
        if (str.empty() || str[0] == 'q' || str[0] == 'Q')
            break;

        std::string::const_iterator iter = str.begin();
        std::string::const_iterator end = str.end();
        //[tutorial_roman_grammar_parse
        bool r = parse(iter, end, roman_parser, result);

        if (r && iter == end)
        {
            std::cout << "-------------------------\n";
            std::cout << "Parsing succeeded\n";
            std::cout << "result = " << result << std::endl;
            std::cout << "-------------------------\n";
        }
        else
        {
            std::string rest(iter, end);
            std::cout << "-------------------------\n";
            std::cout << "Parsing failed\n";
            std::cout << "stopped at: \": " << rest << "\"\n";
            std::cout << "-------------------------\n";
        }
        //]
    }

    std::cout << "Bye... :-) \n\n";
    return 0;
}

my questions are:

  1. the "on_error" isn't triggered, why?
  2. I comment the "<< _4", if I want to print out the failure part, how to do that?
sehe
  • 374,641
  • 47
  • 450
  • 633
Rui Zhou
  • 209
  • 1
  • 3
  • 9

1 Answers1

8

Three steps:

  1. Qualify the placeholders:

    on_error<fail>(start, 
            std::cout
               << val("Error! Expecting ")
               << qi::_4
               << val(" here: \"")
               << construct<std::string>(qi::_3, qi::_2)
               << val("\"")
               << std::endl
        );
    
  2. You'll also need to make sure you have expectation points to trigger the error handler.

    start = eps > +(lit('M') ) >> "</>";
    

    See e.g. Boost.Spirit.Qi - Errors at the beginning of a rule for explanation

  3. (optionally) Name your rules

    start.name("start");
    

    Using BOOST_SPIRIT_DEBUG_NODE(S) is another way to implicit name your rules.

See it Live on Coliru (cleaned up and simplified in places)

Now it prints (input iv):

Error! Expecting <sequence>"M""</>" here: 'iv'
Parsing failed
stopped at: 'iv'

Full code

#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>

#include <iostream>
#include <fstream>

namespace qi  = boost::spirit::qi;
namespace phx = boost::phoenix;

template <typename Iterator>
struct roman : qi::grammar<Iterator>
{
    roman() : roman::base_type(start)
    {
        using namespace qi;

        start = eps > +lit('M') >> "</>";
        start.name("start");

        on_error<fail>(start, 
                phx::ref(std::cout)
                   << "Error! Expecting "
                   << qi::_4
                   << " here: '"
                   << phx::construct<std::string>(qi::_3, qi::_2)
                   << "'\n"
            );
    }
    qi::rule<Iterator> start;
};

int main()
{
    typedef std::string::const_iterator iterator_type;
    roman<iterator_type> roman_parser; // Our grammar

    std::string str;
    while (std::getline(std::cin, str))
    {
        if (str.empty() || str[0] == 'q' || str[0] == 'Q')
            break;

        iterator_type iter = str.begin(), end = str.end();
        unsigned result;
        bool r = parse(iter, end, roman_parser, result);

        if (r && iter == end)
        {
            std::cout << "Parsing succeeded\n";
            std::cout << "result = " << result << std::endl;
        }
        else
        {
            std::string rest(iter, end);
            std::cout << "Parsing failed\n";
            std::cout << "stopped at: '" << rest << "'\n";
        }
    }
}

In addition to the comment: This is something I've been testing with - haven't exactly made it to work yet, but the error handler is getting invoked and eating input as it should. Maybe it could be of help?

static auto const at_eol = (*_1 == '\r') || (*_1 == '\n');
static auto const at_eoi = (_1 == _2);

on_error<retry>(start, 
    (
        (phx::ref(std::cout) << "rule start: expecting " << _4 << " here: '" << escape_(_3, _2) << "'\n"),
        phx::while_ (!at_eoi && !at_eol) [ ++_1, phx::ref(std::cout) << "\nadvance to newline\n" ],
        phx::while_ (!at_eoi && at_eol)  [ ++_1, phx::ref(std::cout) << "\neat newline\n" ],
        phx::if_ (at_eoi)                [ _pass = fail ]
    )
);

See also the note under Important in the documentation for multi_pass<>

Community
  • 1
  • 1
sehe
  • 374,641
  • 47
  • 450
  • 633
  • hi sehe, start = eps > +(lit('M') ) >> ">"; do you mean we need "eps" to trigger the on_error? but I don't see "eps" in mini_xml3.cpp? – Rui Zhou Oct 15 '13 at 13:08
  • 1
    You need `operator>` to indicate an _expectation point_. And since the operator takes 2 operands, you needs a _left-hand-side_ operator (`qi::eps`) takes that role. – sehe Oct 15 '13 at 13:10
  • The fact that the leading expectation point is absent from the mini_xml samples is probably an oversight and has been [reported on the spirit-general list](http://boost.2283326.n4.nabble.com/mini-xml-example-does-not-report-an-error-if-the-xml-has-no-start-tag-td4652557.html) after having been asked [for the second time on SO](http://stackoverflow.com/q/19215938/85371) – sehe Oct 15 '13 at 13:13
  • can I do some recover jobs inside on_error,e.g. move the pointer to the next line(after \r\n) so that the parser will ignore the current one and continue the parsing? – Rui Zhou Oct 15 '13 at 13:42
  • I think you can. That's unusual though. Be sure to test this thoroughly. Have a look at [the `seek[]` directive in the Spirit Repository](http://www.boost.org/doc/libs/1_54_0/libs/spirit/repository/doc/html/spirit_repository/qi_components/directives/seek.html) – sehe Oct 15 '13 at 13:46
  • @RuiZhou I've added something I've been testing with to the answer - haven't exactly made it to work yet, but the error handler is getting invoked and eating input as it should. Maybe it could be of help? I'm still not sure whether this is actually a "typical" usage of Spirit. Perhaps you should ask on the user list? – sehe Oct 15 '13 at 14:50
  • 1
    hi sehe, one more question, can you explain why _4 means the expecting token and _3 to _2 is the current input? why is in this order? thanks! – Rui Zhou Oct 16 '13 at 05:32
  • Not, other than the documentation for it. I can guess though: [_1. _2) is the raw input range for the current rule. _3 points to the error location (in that range). I assume that _4 was just added later for convenience/easier debugging (note that this information required much more work because parsers had to be made to be self-describing. This makes it likely that a "version 0.1" of `on_error` might not have had this) – sehe Oct 16 '13 at 08:34