0

An object detection project I am working on invovles adding a motion vector extractor on top of the AlexeyAB darknet YOLO module. Both the MV extractor and the YOLO module were separately tested and verified working as expected on a system with the following specs:

Ubuntu OS: Ubuntu 20.04
CPU:       INTEL i7-3770
GPU:       RTX 3080 Ti
NV Driver: 510.108.03
CUDA:      11060 (11.6)
CuDNN:     8.4.1

I then incorporated the MV extractor into the YOLOv3 module, and during the initialization phase I encountered the error message below:

I! CuDNN (v8401) function cudnnConvolutionForward() called:
i!     handle: type=cudnnHandle_t; streamId=0x564585e158f0;
i!     alpha: type=CUDNN_DATA_FLOAT; val=1.000000;
i!     xDesc: type=cudnnTensorDescriptor_t:
i!         dataType: type=cudnnDataType_t; val=CUDNN_DATA_FLOAT (0);
i!         nbDims: type=int; val=4;
i!         dimA: type=int; val=[1,3,608,608];
i!         strideA: type=int; val=[1108992,369664,608,1];
i!     xData: location=dev; addr=0x7f9a40c00000;
i!     wDesc: type=cudnnFilterDescriptor_t:
i!         dataType: type=cudnnDataType_t; val=CUDNN_DATA_FLOAT (0);
i!         vect: type=int; val=0;
i!         nbDims: type=int; val=4;
i!         dimA: type=int; val=[32,3,3,3];
i!         format: type=cudnnTensorFormat_t; val=CUDNN_TENSOR_NCHW (0);
i!     wData: location=dev; addr=0x7f9b4ba00000;
i!     convDesc: type=cudnnConvolutionDescriptor_t:
i!         mode: type=cudnnConvolutionMode_t; val=CUDNN_CROSS_CORRELATION (1);
i!         dataType: type=cudnnDataType_t; val=CUDNN_DATA_FLOAT (0);
i!         mathType: type=cudnnMathType_t; val=CUDNN_TENSOR_OP_MATH (1);
i!         reorderType: type=int; val=0;
i!         arrayLength: type=int; val=2;
i!         padA: type=int; val=[1,1];
i!         strideA: type=int; val=[1,1];
i!         dilationA: type=int; val=[1,1];
i!         groupCount: type=int; val=1;
i!     algo: type=cudnnConvolutionFwdAlgo_t; val=CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM (1);
i!     workSpace: location=dev; addr=0x7f9a36000000;
i!     workSpaceSizeInBytes: type=size_t; val=29577744;
i!     beta: type=CUDNN_DATA_FLOAT; val=0.000000;
i!     yDesc: type=cudnnTensorDescriptor_t:
i!         dataType: type=cudnnDataType_t; val=CUDNN_DATA_FLOAT (0);
i!         nbDims: type=int; val=4;
i!         dimA: type=int; val=[1,32,608,608];
i!         strideA: type=int; val=[11829248,369664,608,1];
i!     yData: location=dev; addr=0x7f9b4c000000;
i! Time: 2023-04-28T11:24:27.776188 (0d+0h+0m+5s since start)
i! Process=294440; Thread=294440; GPU=0; Handle=0x564585e1bf30; StreamId=0x564585e158f0.


I! CuDNN (v8401) function cudnnBackendExecuteInternal() called:
i!     enable_tf32: type=bool; val=true;
i!     operation: type=internalType; val=CONV_FORWARD;
i!     xDesc: type=cudnnTensorDescriptor_t:
i!         dataType: type=cudnnDataType_t; val=CUDNN_DATA_FLOAT (0);
i!         nbDims: type=int; val=4;
i!         dimA: type=int; val=[1,3,608,608];
i!         strideA: type=int; val=[1108992,369664,608,1];
i!     wDesc: type=cudnnFilterDescriptor_t:
i!         dataType: type=cudnnDataType_t; val=CUDNN_DATA_FLOAT (0);
i!         vect: type=int; val=0;
i!         nbDims: type=int; val=4;
i!         dimA: type=int; val=[32,3,3,3];
i!         format: type=cudnnTensorFormat_t; val=CUDNN_TENSOR_NCHW (0);
i!     yDesc: type=cudnnTensorDescriptor_t:
i!         dataType: type=cudnnDataType_t; val=CUDNN_DATA_FLOAT (0);
i!         nbDims: type=int; val=4;
i!         dimA: type=int; val=[1,32,608,608];
i!         strideA: type=int; val=[11829248,369664,608,1];
i!     convDesc: type=cudnnConvolutionDescriptor_t:
i!         mode: type=cudnnConvolutionMode_t; val=CUDNN_CROSS_CORRELATION (1);
i!         dataType: type=cudnnDataType_t; val=CUDNN_DATA_FLOAT (0);
i!         mathType: type=cudnnMathType_t; val=CUDNN_TENSOR_OP_MATH (1);
i!         reorderType: type=int; val=0;
i!         arrayLength: type=int; val=2;
i!         padA: type=int; val=[1,1];
i!         strideA: type=int; val=[1,1];
i!         dilationA: type=int; val=[1,1];
i!         groupCount: type=int; val=1;
i!     alpha: type=CUDNN_DATA_FLOAT; val=1.000000;
i!     beta: type=CUDNN_DATA_FLOAT; val=0.000000;
i!     engine_id: type=int; val=34;
i!     knobDesc: type=cudnnBackendDescriptor_t:
i!         CUDNN_KNOB_TYPE_TILE_SIZE: type=int; val=2;
i!         CUDNN_KNOB_TYPE_EDGE: type=int; val=1;
i!         CUDNN_KNOB_TYPE_KBLOCK: type=int; val=0;
i!         CUDNN_KNOB_TYPE_LDGA: type=int; val=0;
i!         CUDNN_KNOB_TYPE_LDGB: type=int; val=0;
i!         CUDNN_KNOB_TYPE_SLICED: type=int; val=0;
i!     numericalNotes: type=cudnnBackendDescriptor_t; val=NULL_PTR;
i!     behaviorNotes: type=cudnnBackendDescriptor_t; val=NULL_PTR;
i! Time: 2023-04-28T11:24:27.776425 (0d+0h+0m+5s since start)
i! Process=294440; Thread=294440; GPU=0; Handle=0x564585e1bf30; StreamId=0x564585e158f0.


E! CuDNN (v8401) function cudnnConvolutionForward() called:
e!     Info: Traceback contains 6 message(s)
e!         Error: CUDNN_STATUS_EXECUTION_FAILED; Reason: cstat != cudaSuccess
e!         Error: CUDNN_STATUS_EXECUTION_FAILED; Reason: engine.execute(vars_exec, stream)
e!         Error: CUDNN_STATUS_EXECUTION_FAILED; Reason: m_executor[0].execute_pipeline(remainder_engine, vars, stream)
e!         Error: CUDNN_STATUS_EXECUTION_FAILED; Reason: m_bpe(core_engine, aux_engine, vars, stream)
e!         Error: CUDNN_STATUS_EXECUTION_FAILED; Reason: ptr->execute(vars, handle->streamId)
e!         Error: CUDNN_STATUS_EXECUTION_FAILED; Reason: ConvForwardEngineMap_t::execute_wrapper(handle, algo, plan, vars)
e! Time: 2023-04-28T11:24:28.728591 (0d+0h+0m+6s since start)
e! Process=294440; Thread=294440; GPU=NULL; Handle=NULL; StreamId=NULL.


 cuDNN status Error in: file: ./src/convolutional_kernels.cu : () : line: 551 : build time: Apr 27 2023 - 16:48:39 

I! CuDNN (v8401) function cudnnGetErrorString() called:
i!     status: type=int; val=8;
i! Time: 2023-04-28T11:24:28.728719 (0d+0h+0m+6s since start)
i! Process=294440; Thread=294440; GPU=NULL; Handle=NULL; StreamId=NULL.

I traced and identified the line of code in ./src/convolutional_kernels.cu to be:

CHECK_CUDNN(cudnnConvolutionForward(cudnn_handle(),
            &alpha, //&one,
            l.srcTensorDesc,
            state.input,
            l.weightDesc,
            l.weights_gpu,
            l.convDesc,
            l.fw_algo,
            state.workspace,
            l.workspace_size,
            &beta,  //&one,
            l.dstTensorDesc,
            l.output_gpu));

I looked into the cudnn_handle() function inside ./src/dark_cuda.c and verified that it did initialize a cudnn handler. From here on, it becomes confusing to me what causes this CUDNN_STATUS_EXECUTION_FAILED error, and how I can possibly resolve it.

Any help will be greatly appreciated.

0 Answers0