I'm new to SYCL and trying to run very simple vector addition program using ComputeCpp.
#include <sycl/sycl.hpp>
#include <iostream>
class vector_addition;
class vector_initialization;
int main() {
constexpr int size = 10;
try {
cl::sycl::queue queue(cl::sycl::gpu_selector{});
int *result_device = cl::sycl::malloc_device<int>(size , queue);
int *vec1 = cl::sycl::malloc_shared<int>(size , queue);
int *vec2 = cl::sycl::malloc_shared<int>(size , queue);
queue.submit([&](cl::sycl::handler &cgh) {
cgh.parallel_for<vector_initialization>(
size, [=](cl::sycl::id<1> idx) {
vec1[idx[0]] = idx[0];
vec2[idx[0]] = idx[0] * 2;
});
});
queue.wait();
//std::cout << "vec1[1] = " << vec1[1] << std::endl;
queue.submit([&](cl::sycl::handler &cgh) {
cgh.parallel_for<class vector_addition>(
size, [=](cl::sycl::id<1> idx) {
result_device[idx[0]] = vec1[idx[0]] + vec2[idx[0]];
});
});
queue.wait();
int result[size];
queue.submit([&](cl::sycl::handler &cgh) {
cgh.memcpy(result, result_device, size * sizeof(int));
});
queue.wait();
for (int i = 0; i < size; i++)
std::cout << result[i] << " ";
}
catch (cl::sycl::exception& e) {
std::cerr << "SYCL exception caught: " << e.what() << std::endl;
return 1;
}
return 0;
}
As one can see, 3 arrays are being allocated using USM. The program runs without any errors, but cgh.memcpy
seems not to copy the results back to the host.
The output is:
0 0 0 0 0 0 0 0 0 0
However, it runs perfectly on host device:
0 3 6 9 12 15 18 21 24 27
Also, if the line //std::cout << "vec1[1] = " << vec1[1] << std::endl;
is uncommented I get the segmentation fault
which, I believe, should not happend when accessing shared memory on host.
Here's the output of computecpp_info
:
********************************************************************************
ComputeCpp Info (CE 2.11.0 2022/08/08)
SYCL 1.2.1 revision 3
********************************************************************************
Device Info:
Discovered 1 devices matching:
platform : <any>
device type : <any>
--------------------------------------------------------------------------------
Device 0:
Device is supported : UNTESTED - Untested OS
Bitcode targets : spirv64 ptx64
CL_DEVICE_NAME : NVIDIA GeForce RTX 3050 Ti Laptop GPU
CL_DEVICE_VENDOR : NVIDIA Corporation
CL_DRIVER_VERSION : 525.116.03
CL_DEVICE_TYPE : CL_DEVICE_TYPE_GPU
If you encounter problems when using any of these OpenCL devices, please consult
this website for known issues:
https://developer.codeplay.com/products/computecpp/ce/guides/platform-support?version=2.11.0
********************************************************************************
Can someone explain what I'm doing wrong here?