3

I have following snippet.

#include <iostream>
#include <sstream>
#include <chrono>

#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/classic.hpp>

namespace qi = boost::spirit::qi;
namespace classic = boost::spirit::classic;

template<typename T>
void output_time(const T& end, const T& begin)
{
   std::cout << std::chrono::duration_cast<std::chrono::seconds>(
         end - begin).count() << std::endl;
}

template<typename Iter>
struct qi_grammar : public qi::grammar<Iter>
{
   qi_grammar():qi_grammar::base_type(rule_)
   {
      rule_ = *string_;
      string_ = qi::char_('"') >> *(qi::char_ - '"') >> qi::char_('"');
   }
   qi::rule<Iter> rule_;
   qi::rule<Iter> string_;
};

template<typename Iter>
struct classic_grammar : public classic::grammar<classic_grammar<Iter>>
{
   template<typename ScannerT>
   struct definition
   {
      definition(const classic_grammar&)
      {
         rule = *string_;
         string_ = classic::ch_p('"') >> *(classic::anychar_p - '"') >> classic::ch_p('"');
      }
      classic::rule<ScannerT> rule, string_;
      const classic::rule<ScannerT>& start() const { return rule; }
   };
};

template<typename Iter>
void parse(Iter first, Iter last, const qi_grammar<Iter>& prs)
{
   auto start = std::chrono::system_clock::now();
   for (int i = 0; i < 100; ++i)
   {
      Iter next = first;
      if (!qi::parse(next, last, prs) || next != last)
      {
         assert(false);
      }
   }
   auto finish = std::chrono::system_clock::now();
   output_time(finish, start);
}

template<typename Iter>
void parse_c(Iter first, Iter last, const classic_grammar<Iter>& prs)
{
   auto start = std::chrono::system_clock::now();
   for (int i = 0; i < 100; ++i)
   {
      auto info = classic::parse(first, last, prs);
      if (!info.hit) assert(false);
   }
   auto finish = std::chrono::system_clock::now();
   output_time(finish, start);
}

int main()
{
   qi_grammar<std::string::const_iterator> qi_lexeme;
   classic_grammar<std::string::const_iterator> classic_lexeme;
   std::stringstream ss;
   for (int i = 0; i < 1024 * 500; ++i)
   {
      ss << "\"name\"";
   }
   const std::string s = ss.str();
   std::cout << "Size: " << s.size() << std::endl;
   std::cout << "Qi" << std::endl;
   parse(s.begin(), s.end(), qi_lexeme);
   std::cout << "Classic" << std::endl;
   parse_c(s.begin(), s.end(), classic_lexeme);
}

results are

forever@pterois:~/My_pro1/cpp_pro$ ./simple_j 
Size: 3072000
Qi
0
Classic
1

so, qi parse faster than classic. But when i change attribute of string_ rule to std::string() (i.e. qi::rule<Iter, std::string()> string_;) i have

forever@pterois:~/My_pro1/cpp_pro$ ./simple_j 
Size: 3072000
Qi
19
Classic
1

It's very-very slow. I doing something wrong? Thanks.

compiler:gcc 4.6.3. boost - 1.48.0. flags: -std=c++0x -O2. On LWS results are same.

Usage of semantic actions for char_ i.e.

string_ = qi::char_('"') >> *(qi::char_[boost::bind(&some_f, _1)] - '"')
 >> qi::char_('"')[boost::bind(&some_clear_f, _1)];

improve perfomance, but i'm looking for another solution too, if it exists.

Cœur
  • 37,241
  • 25
  • 195
  • 267
ForEveR
  • 55,233
  • 2
  • 119
  • 133
  • What compiler? What compiler flags (optimization etc)? – Igor R. Jan 18 '13 at 14:27
  • @IgorR. gcc 4.6.3. -std=c++0x and -O2 – ForEveR Jan 18 '13 at 14:27
  • 3
    Seems like your question is similar to this one: http://stackoverflow.com/questions/13343874/boost-spirit-qi-slow – Igor R. Jan 18 '13 at 14:42
  • @IgorR. hm... mb, but answers for its question cannot help me. since i'm already using -O2 + i'm trying to use qi::repeat - results are same... handle strings as chars is not solution for me. – ForEveR Jan 18 '13 at 14:49
  • @IgorR. mm... i'm not right, i think... semantic actions for store this chars looks like one of possible solution. – ForEveR Jan 18 '13 at 15:20

1 Answers1

4

I think I answered a very similar question one before on SO. Sadly, I can't find it.

In short, you might prefer to use iterators into the source data instead of allocating (and copying) strings on each match.

When using

qi::rule<Iter, boost::iterator_range<Iter>()> string_;
string_ = qi::raw [ qi::char_('"') >> *(qi::char_ - '"') >> qi::char_('"') ];

I got (with considerably (16x) larger data set):

Size: 49152000
Qi
12
Classic
11

In fact, after changing the rule itself to

  string_ = qi::raw [ qi::lit('"') >> *~qi::char_('"') >> '"' ];

I got

Size: 49152000
Qi
7
Classic
11

So... that's pretty decent, I suppose. See it on LWS: http://liveworkspace.org/code/opA5s$0

For completeness, obviously you can get a string from the iterator_range by doing something like

const std::string dummy("hello world");
auto r = boost::make_iterator_range(begin(dummy), end(dummy));
std::string asstring(r.begin(), r.end());

The trick is to delay actual string construction to when it's needed. You might want to have this trick happen automatically. This is what Spirit Lex does for token attributes. You might want to look into that.

sehe
  • 374,641
  • 47
  • 450
  • 633
  • **edited** with a clarification and an optimization that saves ~40% of the runtime, making the `Qi` version ***vastly*** superior (1/3rd faster than `classic`). (Note I used gcc 4.7.2, 64bit, -O3 -march=native [-ltcmalloc](http://code.google.com/p/gperftools/?redir=1) on a Q9550 with 8Mb of RAM) - see also [other answers](http://stackoverflow.com/search?q=user%3A85371+tcmalloc) for info on `libtcmalloc` – sehe Jan 19 '13 at 18:54
  • oh... i don't know about raw directive. thank you very much. I think, this will be answer for my question. – ForEveR Jan 19 '13 at 21:10