0

include

#include <fstream>
#include <string>
#include<string>
#include<boost/algorithm/string.hpp>
#include<boost/regex.hpp>
#include <boost/algorithm/string/trim.hpp>
using namespace std;
using namespace boost;


int main() {
    string robotsfile="User-Agent: *"
            "Disallow: /";

    regex exrp( "^Disallow:(.*)$");

            match_results<string::const_iterator> what;

            if( regex_search( robotsfile, what, exrp ) )

            {

                string s( what[1].first, what[1].second );


                cout<< s;
            }

    return 0;
}

i need to get the disallowed path / from Disallow: / what is wrong with my regex??

Community
  • 1
  • 1
llal
  • 13
  • 3
  • as a small BTW, I think that `what[1]` acts as a string, so you can do `string s(what[1])`, or probably even `cout << what[1]` – davka Sep 20 '10 at 13:16

1 Answers1

5
string robotsfile = "User-Agent: *"
    "Disallow: /";

The string literals above are merged into "User-Agent: *Disallow: /" and there is no newline as you might have thought. Since your regular expression states that string must start with "Disallow" word, it does not match. The logically correct code would be something like this:

string robotsfile = "User-Agent: *\n"
    "Disallow: /";

or

string robotsfile = "User-Agent: *\nDisallow: /";
  • if it is like `User-Agent: *\nDisallow: /posts/\nDisallow: /tags/\nDisallow: /unanswered/\n` i need to get /post/ only but in this regex i am getting /posts/ Disallow: /tags/ Disallow: /unanswered/ how to get the /post/ alone – llal Sep 20 '10 at 13:01
  • @llal: I am not that good at regular expressions so I'd recommend you post a separate question to get exact expression that will match what you want. –  Sep 22 '10 at 13:01