I am beginner in CUDA. I am using NVIDIA Geforce GTX 1070 and CUDA toolkit 11.3 and ubuntu 18.04. As shown in the code below, I use two CPU threads to send two kernels in the form of two streams to a GPU. I want exactly these two kernels to be sent to the GPU at the same time. Is there a way to do this?
Or at least better than what I did.
Thank you in advance.
My code:
//Headers
pthread_cond_t cond;
pthread_mutex_t cond_mutex;
unsigned int waiting;
cudaStream_t streamZero, streamOne;
//Kernel zero defined here
__global__ void kernelZero(){...}
//Kernel one defined here
__global__ void kernelOne(){...}
//This function is defined to synchronize two threads when sending kernels to the GPU.
void threadsSynchronize(void) {
pthread_mutex_lock(&cond_mutex);
if (++waiting == 2) {
pthread_cond_broadcast(&cond);
} else {
while (waiting != 2)
pthread_cond_wait(&cond, &cond_mutex);
}
pthread_mutex_unlock(&cond_mutex);
}
void *threadZero(void *_) {
// ...
threadsSynchronize();
kernelZero<<<blocksPerGridZero, threadsPerBlockZero, 0, streamZero>>>();
cudaStreamSynchronize(streamZero);
// ...
return NULL;
}
void *threadOne(void *_) {
// ...
threadsSynchronize();
kernelOne<<<blocksPerGridOne, threadsPerBlockOne, 0, streamOne>>>();
cudaStreamSynchronize(streamOne);
// ...
return NULL;
}
int main(void) {
pthread_t zero, one;
cudaStreamCreate(&streamZero);
cudaStreamCreate(&streamOne);
// ...
pthread_create(&zero, NULL, threadZero, NULL);
pthread_create(&one, NULL, threadOne, NULL);
// ...
pthread_join(zero, NULL);
pthread_join(one, NULL);
cudaStreamDestroy(streamZero);
cudaStreamDestroy(streamOne);
return 0;
}