4

I need to split the URL into host, port and resource. I searched a lot of references but couldn't find anything that could help me. This is how I want:

eg: url is - 1.2.3.4:5678/path1/path2.html Necessary output is: Host - 1.2.3.4, Port - 5678, Resource - /path1/path2.html

This is how I tired:

#include <iostream>
 #include <cstddef>
 #include <string>
 using namespace std;

int main()
{
   string url="http://qwert.mjgug.ouhnbg:5678/path1/path2.html";
   size_t found = url.find_first_of("://");
   cout<<found<<endl;
   string protocol=url.substr(0,found);
   size_t found1 =url.find_first_of(":");
   cout<<found1<<endl;
   string host =url.substr(found+3,found1-found+1);
   size_t found2 = url.find_first_of(":/");
   string port1 =url.substr(found1+7,found2+found1-1);
   string port =url.substr(found2+1);
   cout<<protocol<<endl;
   cout<<host<<endl;
   cout<<port1<<endl;
   cout<<port;
   return 0;
}

My expected result is:

Protocol - http
Host - qwert.mjgug.ouhnbg
Port - 5678
Resource - path1/path2.html

But my result is:

http:                                                                                                                                                  
qwert.mj                                                                                                                                               
t.mjgug                                                                                                                                                
//qwert.mjgug.ouhnbg:5678/path1/path2.html

What should I change?

Electronic Brat
  • 133
  • 1
  • 2
  • 11
  • write the string into a `std::stringstream` and carve it up with `std::getline(stream, hoststr, ':')` and `std::getline(stream, portstr, '/')`. The remnant in the stream only needs prepending a '/' to give you the path. – user4581301 May 11 '17 at 05:15

2 Answers2

4

Use string.first_find_of(":") to get the index of first occurrence of any char and use string.substr(pos,len) to get the substring starting at index pos and length=len;

 #include <iostream>
 #include <cstddef>
 #include <string>
 using namespace std;

int main()
{
   string url="1.2.3.4:5678/path1/path2.html";
   size_t found = url.find_first_of(":");
   string host=url.substr(0,found);
   size_t found1 =url.find_first_of("/");
   string port =url.substr(found+1,found1-found-1);
   string resource =url.substr(found1);
   cout<<host<<endl;
   cout<<port<<endl;
   cout<<resource;
   return 0;
}

With http or https in url

int main()
{
  string url="http://qwert.mjgug.ouhnbg:5678/path1/path2.html";
  size_t found = url.find_first_of(":");
  string protocol=url.substr(0,found); 

 string url_new=url.substr(found+3); //url_new is the url excluding the http part
 size_t found1 =url_new.find_first_of(":");
 string host =url_new.substr(0,found1);

 size_t found2 = url_new.find_first_of("/");
 string port =url_new.substr(found1+1,found2-found1-1);
 string path =url_new.substr(found2);

  cout<<protocol<<endl;
 cout<<host<<endl;
 cout<<port<<endl;
 cout<<path;
 return 0;
 }
Chandini
  • 540
  • 2
  • 11
  • Thank you so much Chandini. It really was a great help! I mean it. – Electronic Brat May 11 '17 at 05:38
  • Happy to help :) @SSK – Chandini May 11 '17 at 05:38
  • I've a small doubt. What if my url is of this way : `http://1.2.3.4:5678/path1/path2.html` or `https://1.2.3.4:5678/path1/path2.html` and I wanna separate `http://` or `https:` too?? Like: `Protocol - http, Host - 1.2.3.4, Port - 5678, Resource - /path1/path2.html`. Can you help me with this? – Electronic Brat May 11 '17 at 06:47
  • Its the same way.. url=`https://1.2.3.4:5678/path1/path2.html` . if you have http or https at the start, first find occurence of "/" let that index be i, and get the protocol part by protocol=url.substr(0,i+1) and then search for host,port,resource in the url_excluding_protocol = url.substr(i+2) – Chandini May 11 '17 at 07:39
  • I tried it, but didn't get the expected one. Didn't get what's wrong. Check the que above, I've edited it. – Electronic Brat May 11 '17 at 08:54
  • I tried a lot, I really don't understand what's wrong. Can you help me with the above code? – Electronic Brat May 11 '17 at 09:15
  • Let us [continue this discussion in chat](http://chat.stackoverflow.com/rooms/143954/discussion-between-chandini-and-ssk). – Chandini May 11 '17 at 09:19
0

Putting the two together:

string url = "http://qwert.mjgug.ouhnbg:5678/path1/path2.html";
size_t found = 0;
string protocol;
if (url.rfind("http", 0) == 0) {
    // URL starts with http[s]
    found = url.find_first_of(":");
    protocol = url.substr(0, found);
    found += 3; // Step over colon and slashes
}
size_t found1 = url.find_first_of(":", found);
string host;
string port;
string path;
if (string::npos != found1) {
    // Port found
    host = url.substr(found, found1 - found);
    size_t found2 = url.find_first_of("/", found1);
    port = url.substr(found1 + 1, found2 - found1 - 1);
    path = url.substr(found2);
} else {
    // No port
    found1 = url.find_first_of("/", found);
    host = url.substr(found, found1 - found);
    path = url.substr(found1);
}
cout << "protocol = [" << protocol << "]";
cout << "host = [" << host << "]";
cout << "port = [" << port << "]";
cout << "path = [" << path << "]";
parsley72
  • 8,449
  • 8
  • 65
  • 98