I would like to pass a class object to a global kernel and execute its member functions in it. All my efforts up to now has been unsuccessful. Below I am giving a detailed description of what I am trying to do.
I have class PDE_Parabolic_Num_GPU implemented like this:
class PDE_Parabolic_Num_GPU: public PDE_Parabolic_GPU
{
public:
__host__ __device__ PDE_Parabolic_Num_GPU();
__host__ __device__ ~PDE_Parabolic_Num_GPU();
__host__ __device__ Solve();
__host__ __device__ Setup();
...
//data
gdd_real* an;
gdd_real* bn;
gdd_real* cn;
}
where gdd_real is
struct gdd_real
{
double2 val;
__host__ __device__ gdd_real(double hi, double lo) {val.x = hi; val.y = lo;}
__host__ __device__ gdd_real(double h){val.x = h; val.y = 0.;}
__host__ __device__ gdd_real(){};
};
In my main(), I am doing the usual cudaMalloc and cudaMemcpy in order to pass my class object to the global kernel:
PDE_Parabolic_Num_GPU pdes_host;
PDE_Parabolic_Num_GPU *pdes_dev;
cudaError_t cudaStatus;
cudaStatus = cudaMalloc((void**)&pdes_dev, 1 * sizeof(PDE_Parabolic_Num_GPU));
cudaStatus = cudaMemcpy(pdes_dev, &pdes_host, sizeof(PDE_Parabolic_Num_GPU), cudaMemcpyHostToDevice);
pdegpu<<<1,1>>>(pdes_dev);
cudaStatus = cudaThreadSynchronize();
cudaStatus = cudaMemcpy(&pdes_host, pdes_dev, sizeof(PDE_Parabolic_Num_GPU), cudaMemcpyDeviceToHost);
cudaStatus = cudaThreadExit();
system("pause");
pdegpu kernel is as follows:
__global__ void pdegpu(PDE_Parabolic_Num_GPU *pdes)
{
pdes->Setup(); //initializes class members an, bn, cn using "new"
pdes->Solve();
}
My first problem: the program crashes at pdes->Setup() during debugging;
The second problem is: if I change the kernel to use local object such as below, the program crashes after system("pause") statement in main() during debugging.
__global__ void pdegpu()
{
PDE_Parabolic_Num_GPU pdes; //using local object
pdes.Setup();
pdes.Solve();
}
These are two cases when the program crashes during debugging using Nsight. But when I run the program without debugging, the second version of pdegpu(which uses the local object) crashes at pdes.Solve(). Can anyone help me to resolve theese problems? Thank you,