0

I'm having an issue regarding some parallel I/O I've been attempting to do. The output response from the code varies depending on the run. Essentially, I'm creating a producer/consumer program that receives as input one directory and completely copies that directory to some destination. One thread will place the various file descriptors inside a buffer (queue or any other data structure). The code should run as follows.

./task1 threadcount srcdir destdir

Essentially, the problem is random and so I believe it may be related to some sort of race condition. I'd say once in every 4 times it does the job right. I just need some fresh eyes to catch my mistake.

Header

NOTE: The code posted below is in very, very poor style and will be corrected. I apologize for this.

#include <queue>
#include <pthread.h>
#include <sys/types.h>
#include <dirent.h>
#include <fcntl.h>
#include <unistd.h>
#include <string>
#include <iostream>
#include <unistd.h>
#include <vector>
#include <fstream>
#include <mutex>
#include <thread> 
using namespace std;
const int isFile = 0x8;
/* The task for the consumer thread to execute. */ 
struct task {
    int infd;
    int outfd;
    bool end;
    string filename;
    task& operator =(const task& tsk)
    {
        infd = tsk.infd;
        outfd = tsk.outfd;
        end = tsk.end;
        return *this;
    }

 };
/*The argument for the producer thread */
struct arguments {
    vector<string> files;
    string src_dir;
    string dest_dir;
};
mutex output_lock;
mutex queue_lock;
queue<task> filequeue;

/* Get a list of all the files in a directory */
int get_files(vector<string>& files, string dir_nm ) {
    int file_count = 0;
    DIR* directory;
    struct dirent* dir_str;
    directory = opendir (dir_nm.c_str());
    if(directory == NULL) {
        cout << "Error with directory." << endl;
        return -1; 
    }
    while ((dir_str = readdir(directory)) != NULL) {
        if (dir_str->d_type == isFile) {
            string file_nm(dir_str->d_name);
            files.push_back(file_nm);
        }
    }
    closedir(directory);
    return file_count;
}

void copy(task tsk_to_copy) {
    int src_file = tsk_to_copy.infd;
    int dest_file = tsk_to_copy.outfd;
    char buf[8000];
    if(src_file < 0 || dest_file < 0) {
        cout << "Error with files." << endl;
        return;
    }
    while (true) {
        cout << "The error occurs here. " << endl;
        long int to_write = read(src_file, &buf[0], sizeof(buf));
        if(!to_write) break;
        write(dest_file, &buf[0], to_write);
    }
    cout << "The error occurs here " << endl;
    close(src_file);
    close(dest_file);
}

void create_task(string src, string dest, task& new_task) {
    unlink(dest.c_str());
    int src_file = open(src.c_str(), O_RDONLY);
    int dest_file = creat(dest.c_str(), 0700);
    new_task.infd = src_file;
    new_task.outfd = dest_file;
    new_task.end = false;
}

string pathname(string file_name, string dir) {
    return dir + file_name; 
}

void pdc_thd(void* arg) {
    /*Get arguments from struct */
    arguments* args = (arguments*) arg;
    string src = args->src_dir;
    string dest = args->dest_dir;
    vector<string> files = args->files;
    /* Start the buffer */
    for(int i = 0; i < files.size(); ++i) {
        task new_task;
        create_task(pathname(files[i], src), pathname(files[i], dest), new_task);
        new_task.filename = files[i];
        queue_lock.lock();
        filequeue.push(new_task);
        queue_lock.unlock();
        output_lock.lock();
        cout << "Written file " << files[i] << " to the buffer." << endl;
        output_lock.unlock();
    }
    /*Put a NULL file at the end for the threads to join. */
    queue_lock.lock();
    task final_task;
    final_task.infd = 0;
    final_task.outfd = 0;
    final_task.end = true;
    filequeue.push(final_task);
    queue_lock.unlock();

}


void cons_thd(void* arg) {

    while(true) {
        if(filequeue.empty()) {
            usleep(1);
        }
        else {
            queue_lock.lock();
            task job = filequeue.front();
            if(job.end) {
                queue_lock.unlock();
                break;
            }
            filequeue.pop();
            queue_lock.unlock();
            copy(job);
            output_lock.lock();
            cout << "Finished writing file: " << job.filename << endl;
            output_lock.unlock();
        }
    }
}

main

#include "task1.h"

int main(int argc, char* argv[]) {

    /*Check for valid argument count. */
    if(argc != 4) {
        cout << "Argument error." << endl;
        return -1;
    }
    pthread_attr_t attr;
    void* status;
    pthread_attr_init(&attr);
    pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
    /* Get the source and destination directory */
    int total_threads = 1 + atoi(argv[1]);
    string src_dir_nm(argv[2]);
    string dest_dir_nm(argv[3]);
    vector<string> files;
    get_files(files, src_dir_nm);
    thread myThreads[total_threads];
    arguments arg;
    arg.files = files;
    arg.src_dir = src_dir_nm;
    arg.dest_dir = dest_dir_nm;
    myThreads[0] = thread(pdc_thd, &arg);
    for(int i = 1; i < total_threads; ++i) {
        myThreads[i] = thread(cons_thd, &arg);
    }
     for (int i = 0; i < total_threads; i++){
        myThreads[i].join();
    }
    return 0;
}

There's a sort of fling between pthread and standard C++ threads. They're virtually interchangeable here with two minutes of editing. One place I believe the error might be is in the write/read, but again I'm not 100% sure.

EOF
  • 6,273
  • 2
  • 26
  • 50

1 Answers1

0

Your consumer threads are calling filequeue.empty without holding the queue lock.

This can result in multiple threads accessing filequeue at the same time.

user253751
  • 57,427
  • 7
  • 48
  • 90