I am trying to use cusolver library to solve a number of linear equations but instead an exception is raised which is very strange. the code is using only one function from the library and the rest is memory allocation and memory copy. the function is
cusolverSpScsrlsvcholHost(
cusolverSpHandle_t handle, int m, int nnz,
const cusparseMatDescr_t descrA, const float *csrVal,
const int *csrRowPtr, const int *csrColInd, const float *b,
float tol, int reorder, float *x, int *singularity);
I think my problem maybe in tol - reorder - singularity parameters as the rest is the matrix parameters here is the code:
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <cuda.h>
#include <cusparse.h>
#include <cublas_v2.h>
#include <stdio.h>
#include <cusolverSp.h>
int main()
{
//initialize our test cases
const int size = 3;
int nnz = 6 ;
int sing = -1 ;
//float values[] = {0,0,0,0} ;
float values[] = {1,2,3,4,5,6} ;
int colIdx[] = {0,0,1,0,1,2};
int rowPtr[] = {0, 1,3,7};
float x[] = {4,-6,7};
float y[3]= {0,0,0} ;
float *dev_values = 0 ;
int *dev_rowPtr = 0 ;
int *dev_colIdx = 0 ;
float *dev_x = 0 ;
float *dev_y = 0 ;
cusolverSpHandle_t solver_handle ;
cusolverSpCreate(&solver_handle) ;
cusparseMatDescr_t descr = 0;
cusparseCreateMatDescr(&descr);
cusparseSetMatType(descr,CUSPARSE_MATRIX_TYPE_GENERAL);
cusparseSetMatIndexBase(descr,CUSPARSE_INDEX_BASE_ZERO);
// Choose which GPU to run on, change this on a multi-GPU system.
cudaSetDevice(0);
cudaEvent_t start, stop;
float time;
cudaEventCreate(&start);
cudaEventCreate(&stop);
cudaEventRecord(start, 0);
// Allocate GPU buffers for three vectors (two input, one output) .
cudaMalloc((void**)&dev_x, size * sizeof(float));
cudaMalloc((void**)&dev_y, size * sizeof(float));
cudaMalloc((void**)&dev_values, nnz * sizeof(float));
cudaMalloc((void**)&dev_rowPtr, (size + 1) * sizeof(int));
cudaMalloc((void**)&dev_colIdx, nnz * sizeof(int));
//Memcpy
cudaMemcpyAsync(dev_x, x, size * sizeof(float), cudaMemcpyHostToDevice);
cudaMemcpyAsync(dev_values, values, nnz * sizeof(float), cudaMemcpyHostToDevice);
cudaMemcpyAsync(dev_rowPtr, rowPtr, (size + 1) * sizeof(int), cudaMemcpyHostToDevice);
cudaMemcpyAsync(dev_colIdx, colIdx, nnz * sizeof(int), cudaMemcpyHostToDevice);
cudaMemcpyAsync(dev_y, y, size * sizeof(float), cudaMemcpyHostToDevice);
cusolverSpScsrlsvluHost(solver_handle, size, nnz, descr, dev_values, dev_rowPtr, dev_colIdx, dev_y, 0,0, dev_x, &sing);
cudaMemcpyAsync(y, dev_y, size*sizeof(float), cudaMemcpyDeviceToHost );
cudaEventRecord(stop, 0);
cudaEventSynchronize(stop);
cudaEventElapsedTime(&time, start, stop);
printf ("Time for the kernel: %f ms\n", time);
printf("%f\n",y[0]);
printf("%f\n",y[1]);
printf("%f\n",y[2]);
// cudaDeviceReset must be called before exiting in order for profiling and
// tracing tools such as Nsight and Visual Profiler to show complete traces.
cudaDeviceReset();
cudaFree(dev_x);
cudaFree(dev_y);
cudaFree(dev_values);
cudaFree(dev_rowPtr);
cudaFree(dev_colIdx);
return 1;
}