10

I'm trying to download remote html pages with my C++ program, however with some URLs a timeout occurs, but I don't know how to handle this, so the program will just hang indefinatly.

virtual void downloadpage(string pageaddress) {
    CURL *curl;
        CURLcode informationdownloaded;
        curl = curl_easy_init();
        if (curl) { 
            curl_easy_setopt(curl, CURLOPT_USERAGENT, "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/525.13 (KHTML, like Gecko) Chrome/0.A.B.C Safari/525.13");
            curl_easy_setopt(curl, CURLOPT_URL, pageaddress.c_str());
            curl_easy_setopt(curl, CURLOPT_HEADER, 0);
            curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1);
            curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, writepageinformation);
            curl_easy_setopt(curl, CURLOPT_WRITEDATA, &pageinformation);
            informationdownloaded = curl_easy_perform(curl);
            curl_easy_cleanup(curl);
    }
}

Here is my function for downloading the html source of a page into a string variable called "pageinformation" via the "writepageinformation" function.

Ben Clooney
  • 101
  • 1
  • 1
  • 3

3 Answers3

6
informationdownloaded = curl_easy_perform(curl);

You can also specify timeout for your download

curl_easy_setopt(hCurl, CURLOPT_TIMEOUT, iTimeoutSeconds); // timeout for the URL to download

This is a blocked call until the entire file is downloaded. If you are interested to interrupt the blocked call (for signal to kill) install a progress callback, like below

curl_easy_setopt(hCurl, CURLOPT_NOPROGRESS, 0);
curl_easy_setopt(hCurl, CURLOPT_PROGRESSFUNCTION, progress_callback);
curl_easy_setopt(hCurl, CURLOPT_PROGRESSDATA, this);

static int progress_callback(void *clientp,
                           double dltotal,
                           double dlnow,
                           double ultotal,
                           double ulnow)
{
    CLASS &obj = *(CLASS*)clientp;


    if (obj.exit)
      return 1; // if u want the signal curl to unblock and return from curl_easy_perform

    return 0; // if u want the callback to continue
}
rajeshk
  • 716
  • 1
  • 11
  • 21
  • 1
    Note that TIMEOUT probably controls the download time after the connection, while there is a separate timeout parameter for the Connection process hanging itself, which happens more often. See CURLOPT_CONNECTTIMEOUT. – DragonLord Jun 19 '19 at 15:51
3

Along with other suggestions to use CURLOPT_TIMEOUT which will enable you to define a timeout, you should just check for the return value of curl_easy_perform as it's a blocking call. Here is a slightly modified version of doc/examples/getinfo.c of libcurl,

#include <stdio.h>
#include <stdlib.h>
#include <curl/curl.h>

int main(int argc, char* argv[])
{
  CURL *curl;
  CURLcode res;

  if (argc != 2) {
    printf("Usage: %s <timeoutInMs>\n", argv[0]);
    return 1;
  }

  curl = curl_easy_init();
  curl_easy_setopt(curl, CURLOPT_URL, "http://httpbin.org/ip");
  curl_easy_setopt(curl, CURLOPT_TIMEOUT_MS, atol(argv[1]));

  res = curl_easy_perform(curl);

  if (CURLE_OK == res)
    printf("Success.\n");
  else if (CURLE_OPERATION_TIMEDOUT == res)
    printf("Operation timed out.\n");

  curl_easy_cleanup(curl);
  return 0;
}
Baris Demiray
  • 1,539
  • 24
  • 35
3

Use the CURLOPT_TIMEOUT option?

Use the CURLOPT_PROGRESSFUNCTION callback and make the operation stop whenever you think it is enough?

Use the CURLOPT_LOWSPEED option or similar to make it depend on transfer rate.

Daniel Stenberg
  • 54,736
  • 17
  • 146
  • 222