I wrote following code to see how to use texture memory for 1D array.but tex1D function is not fetching the value from array for corresponding thread id.Please correct this code and tell me how to use texture memory for 1D array efficiently and effectively.
__global__ void sum(float *b,cudaTextureObject_t texObj)
{
b[threadIdx.x]=tex1D<float>(texObj,threadIdx.x);
//printf("\n%f\n",tex1Dfetch<float>(texObj,threadIdx.x));
}
int main()
{
float *a,*b;
float *d_a,*d_b;
int i;
a=(float*)malloc(sizeof(float)*5);
b=(float*)malloc(sizeof(float)*5);
for(i=0;i<5;i++)
a[i]=i;
cudaChannelFormatDesc channelDesc =cudaCreateChannelDesc(32, 0, 0, 0,cudaChannelFormatKindFloat);
cudaArray* cuArray;
cudaMallocArray(&cuArray, &channelDesc, 5, 0);
cudaMemcpyToArray(cuArray, 0, 0, a,sizeof(float)*5,cudaMemcpyHostToDevice);
struct cudaResourceDesc resDesc;
memset(&resDesc, 0, sizeof(resDesc));
resDesc.resType = cudaResourceTypeArray;
resDesc.res.array.array = cuArray;
struct cudaTextureDesc texDesc;
memset(&texDesc, 0, sizeof(texDesc));
texDesc.addressMode[0] = cudaAddressModeWrap;
texDesc.addressMode[1] = cudaAddressModeWrap;
texDesc.filterMode = cudaFilterModeLinear;
texDesc.readMode = cudaReadModeElementType;
texDesc.normalizedCoords = 1;
// Create texture object
cudaTextureObject_t texObj = 0;
cudaCreateTextureObject(&texObj, &resDesc, &texDesc, NULL);
cudaMalloc(&d_b, 5* sizeof(float));
sum<<<1,5>>>(d_b,texObj);
// Free device memory
cudaMemcpy(b,d_b,sizeof(float),cudaMemcpyDeviceToHost);
for(i=0;i<5;i++)
printf("%f\t",b[i]);
cudaDestroyTextureObject(texObj);
cudaFreeArray(cuArray);
cudaFree(d_b);
return 0;
}