I'm trying to parse a URL query string with special rules. So far it works with one exclusion described below URL is parsed as set of key-value pairs using following:
const qi::rule<std::string::const_iterator, std::string()> key = qi::char_("a-zA-Z_") >> *qi::char_("a-zA-Z_0-9/%\\-_~\\.");
const qi::rule<std::string::const_iterator, std::string()> value = *(qi::char_ - '=' - '&');
const qi::rule<std::string::const_iterator, std::pair<std::string, std::string>()> pair = key >> -('=' >> value);
const qi::rule<std::string::const_iterator, std::unordered_map<std::string, std::string>()> query = pair >> *(('&') >> pair);
so far, so good. one of the special cases it that ampersand can be presented in form of XML entity - & so the query rule was upgraded to
const qi::rule<std::string::const_iterator, std::unordered_map<std::string, std::string>()> query = pair >> *((qi::lit("&")|'&') >> pair);
and it worked as expected. Then additional special case appeared - quoted value which can contain unescaped equal signs and ampersands, something in form of a=b&d=e&f=$$g=h&i=j$$&x=y&z=def which should parse into
- a => b
- d => e
- f => g=h&i=j
- x => y
- x => def
So I've added additional rule for "quoted" values
const qi::rule<std::string::const_iterator, std::string()> key = qi::char_("a-zA-Z_") >> *qi::char_("a-zA-Z_0-9/%\\-_~\\.");
const qi::rule<std::string::const_iterator, std::string()> escapedValue = qi::omit["$$"] >> *(qi::char_ - '$') >> qi::omit["$$"];
const qi::rule<std::string::const_iterator, std::string()> value = *(escapedValue | (qi::char_ - '=' - '&'));
const qi::rule<std::string::const_iterator, std::pair<std::string, std::string>()> pair = key >> -('=' >> value);
const qi::rule<std::string::const_iterator, std::unordered_map<std::string, std::string>()> query = pair >> *((qi::lit("&")|'&') >> pair);
which, once again worked as expected until the next case - a=b&d=e&f=$$g=h&i=j$$x=y&z=def, note, there is no ampersand between closing "$$" and next key name. looks like it can be easily solved by adding kleene operator like
const qi::rule<std::string::const_iterator, std::unordered_map<std::string, std::string>()> query = pair >> *(__*__(qi::lit("&")|'&') >> pair);
but for some reason it does not do the trick. any suggestion will be appreciated!
EDIT: Sample code
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/adapted/std_pair.hpp>
#include <unordered_map>
namespace rulez
{
using namespace boost::spirit::qi;
using It = std::string::const_iterator;
const rule<It, std::string()> key = boost::spirit::qi::char_("a-zA-Z_") >> *boost::spirit::qi::char_("a-zA-Z_0-9/%\\-_~\\.");
const rule<It, std::string()> escapedValue = boost::spirit::qi::omit["$$"] >> *(boost::spirit::qi::char_ - '$') >> boost::spirit::qi::omit["$$"];
const rule<It, std::string()> value = *(escapedValue | (boost::spirit::qi::char_ - '=' - '&'));
const rule<It, std::pair<std::string, std::string>()> pair = key >> -('=' >> value);
const rule<It, std::unordered_map<std::string, std::string>()> query = pair >> *(*(boost::spirit::qi::lit("&")|'&') >> pair);
}
int main()
{
using namespace std;
unordered_map<string, string> keyVal;
//string const paramString = "a=b&d=e&f=$$g=h&i=j$$&x=y&z=def";
string const paramString = "a=b&d=e&f=$$g=h&i=j$$x=y&z=def";
boost::spirit::qi::parse(paramString.begin(), paramString.end(), rulez::query, keyVal);
for (const auto& pair : keyVal)
cout << "(\"" << pair.first << "\",\"" << pair.second << "\")" << endl;
}
Output for "a=b&d=e&f=$$g=h&i=j$$x=y&z=def" (erroneous, should be the same as for "a=b&d=e&f=$$g=h&i=j$$&x=y&z=def")
("a", "b"),("d", "e"),("f", "g=h&i=jx")
Output for "a=b&d=e&f=$$g=h&i=j$$&x=y&z=def" (as expected)
("a", "b"),("d", "e"),("f", "g=h&i=j"),("x", "y"),("z", "def")
EDIT: Somewhat simpler parsing rules, just to make stuff easier to understand
namespace rulez
{
const rule<std::string::const_iterator, std::string()> key = +(char_ - '&' - '=');
const rule<std::string::const_iterator, std::string()> escapedValue = omit["$$"] >> *(char_ - '$') >> omit["$$"];
const rule<std::string::const_iterator, std::string()> value = *(escapedValue | (char_ - '&' - '='));
const rule<std::string::const_iterator, pair<std::string, std::string>()> pair = key >> -('=' >> value);
const rule<std::string::const_iterator, unordered_map<std::string, std::string>()> query = pair >> *(*(lit('&')) >> pair);
}