Despite the other good answer that suggests separation between C++ and CUDA, there is an alternative way to include OpenCV containers in .cu
files:
CMakeLists.txt
cmake_minimum_required(VERSION 3.8)
project(test LANGUAGES CXX CUDA)
find_package(OpenCV 3.0 REQUIRED)
# compile the target
add_executable(test_app main.cpp medianFilter.cu)
target_link_libraries(test_app PRIVATE cudart ${OpenCV_LIBS})
main.cpp
#include "opencv2/opencv.hpp"
#include "medianFilter.h"
int main()
{
// input data
cv::Mat inputMat(cv::Size(128, 128), CV_8UC3, cv::Scalar(100));
cv::Mat kernelMat(cv::Size(16, 16), CV_8UC1, cv::Scalar(1));
// call CUDA
cudaMedianCaller(inputMat, kernelMat);
return 0;
}
medianFilter.cu
#include "medianFilter.h"
__global__ void kernelMedianFilter(uchar3* d_inputMat, uchar* d_kernelMat)
{
return;
}
void cudaMedianCaller(const cv::Mat& inputMat, cv::Mat& kernelMat)
{
// allocate device pointers
uchar3 *d_inputMat;
uchar *d_kernelMat;
cudaMalloc(&d_inputMat, inputMat.total() * sizeof(uchar3));
cudaMalloc(&d_kernelMat, kernelMat.total() * sizeof(uchar));
// copy from host to device
cudaMemcpy(d_inputMat, inputMat.ptr<uchar3>(0), inputMat.total() * sizeof(uchar3), cudaMemcpyHostToDevice);
cudaMemcpy(d_kernelMat, kernelMat.ptr<uchar>(0), kernelMat.total() * sizeof(uchar), cudaMemcpyHostToDevice);
// call CUDA kernel
kernelMedianFilter <<<1, 1>>> (d_inputMat, d_kernelMat);
// free
cudaFree(d_inputMat);
cudaFree(d_kernelMat);
}
medianFilter.h
#include "opencv2/opencv.hpp"
void cudaMedianCaller (const cv::Mat& inputMat, cv::Mat& kernelMat);
For the binary application to run, you may need to copy some of the required .dll
to the binary folder. For me, I copied opencv_core343.dll
from C:\Program Files\OpenCV\x64\vc15\bin
to the folder where test_app.exe
exists.