I use libcurl in my C code to download files given their urls. My code looks similar to this:
#include <stdio.h>
#include <curl.h>
#include <pthread.h>
static size_t write_data(void *ptr, size_t size, size_t nmemb, void *stream)
{
size_t written = fwrite(ptr, size, nmemb, (FILE *)stream);
return written;
}
int progress_func(void *ptr, double TotalToDownload, double NowDownloaded,
double TotalToUpload, double NowUploaded)
{
struct my_custom_struct *my_dummy_data = (struct my_custom_struct *) data;
//do some stuffs here
return 0;
}
void *download_with_curl(void *data)
{
char *url = (char *) data;
int res = 0;
// My custom struct to store data
struct my_custom_struct my_dummy_data;
char errbuff[CURL_ERROR_SIZE] = {0};
CURL *curl_handle;
/* init the curl session */
curl_handle = curl_easy_init();
/* set URL to get here */
curl_easy_setopt(curl_handle, CURLOPT_URL, url);
/* disable progress meter, set to 0L to enable*/
curl_easy_setopt(curl_handle, CURLOPT_NOPROGRESS, 0L);
/* send all data to this function*/
curl_easy_setopt(curl_handle, CURLOPT_WRITEFUNCTION, write_data);
curl_easy_setopt(curl_handle, CURLOPT_LOW_SPEED_TIME, RESPOND_TIME);
curl_easy_setopt(curl_handle, CURLOPT_LOW_SPEED_LIMIT, 30L);
/* set the progress function */
curl_easy_setopt(curl_handle, CURLOPT_PROGRESSFUNCTION, progress_func);
/* set the progress data */
curl_easy_setopt(curl_handle, CURLOPT_PROGRESSDATA, &my_dummy_data);
/* provide a buffer to store errors in */
curl_easy_setopt(curl_handle, CURLOPT_ERRORBUFFER, errbuff);
FILE *pagefile = fopen(path_to_where_I_want_to_store_the_file, "wb");
/* write the page body to this file handle */
curl_easy_setopt(curl_handle, CURLOPT_WRITEDATA, pagefile);
/* get the file*/
int status = curl_easy_perform(curl_handle);
res = 0;
int response_code;
curl_easy_getinfo(curl_handle, CURLINFO_RESPONSE_CODE, &response_code);
fclose(pagefile);
if (status != 0) {
log_warn("CURL ERROR %d: %s", status, errbuff);
response_code = -status;
}
/* cleanup curl stuff */
curl_easy_cleanup(curl_handle);
return NULL;
}
int main()
{
// sockfd = create a sockfd
// bind, listen
do {
// accept new connection
char *url;
// receive the url from client
pthread_t tid;
pthread_create(&tid, NULL, download_with_curl, url);
} while (1);
}
When I send a single download request, the code works fine. "Works fine" means that the md5sum values of the original file and the downloaded file are equal. However, when I send multiple requests to download multiple files, only the first file that is downloaded has the correct md5sum value. To be clear, if I send requests to download files A (200MB), B (5MB) and C (50MB) in that order, only file B is correctly downloaded because it is finished first. Files A and C will have incorrect md5sum values. Moreover, when I check the content of files A and C, it looks like curl just inserts random segments of data into them. If the original file content is
This is the content of a file
then the downloaded file is like
This is the #$%!@#%@% content of $%(#(!)$()$%||@#$%*&) a file
After spending two days of debugging, I finally solved the problem (I hope so). All I did was just flushing the data after calling fwrite. The function write_data
now looks like this:
static size_t write_data(void *ptr, size_t size, size_t nmemb, void *stream)
{
size_t written = fwrite(ptr, size, nmemb, (FILE *)stream);
fflush((FILE *) stream);
return written;
}
I do not know if it completely solves the problem or not. Could anyone explain why it behaves that way and give my a solution to this?
UPDATE 1
It seems that there is something to do with fwrite()
's internal buffer. Changing from fwrite(ptr, size, nmemb, stream)
to write(fileno(stream), ptr, size * nmemb)
seems to give the same result as using fflush()
.
UPDATE 2
Using the default write function (remove the option CURLOPT_WRITEFUNCTION
) of libcurl gives the same problem.