0

I am writing this simple opencl code and everytime I execute the code it causes system to crash, only my mouse cursor is responsive and there is a one screen blank. Similar thing happened to me when I was using clinfo with up-to-date AMDGPU-PRO driver. I am currently using slightly older version which is 20.40, and I had not had any problem with this version for running other OpenCL codes. I did try downgrading the driver to even older one, 20.30, and I am having the same problem.

One edit that I made for my code is changing the arg_size paramters in kernel arguments, clSetKernelArg(), from sizeof(cl_mem) to sizeof(cl_int) for the first four variables and sizeof(cl_mem) to sizeof(cl_float) or sizeof(float) for the last four variables. I made the changes because I was getting CL_INVALID_KERNEL_ARGS error. After the changes were made, I've noticed that I was not getting any CL errors anymore, so I thought I finally fixed the code, but now it is crashing my system.... Looking at journalctl log, it seems that it has to do with some shared libraries. Other than that I have no idea what the log is saying.

Below is the main code, kernel code, and the journalctl log.

Main code

#include <stdio.h>
#include <stdlib.h>
#include <netcdf.h>

#define CL_TARGET_OPENCL_VERSION 120 
#include <CL/cl.h>
#include "cl_err.h"

// netCDF constants
#define err(e) {printf("Error: %s\n", nc_strerror(e)); return(2);}
#define clerrchk(arg, e) {printf("  %-40s : %s\n",arg, geterrstr(e));}


#define fname "leap3d.nc"

// Variable sizes and dimensions (constants)
#define ndims 4


void data_init(int in_x_siz, int in_y_siz, int in_z_siz, float *in_arr);
void pbndry(int in_x_siz, int in_y_siz, int in_z_siz, float *in_arr);




int main()
{
   int      i,j,k;

   int      nx       =  128,
            ny       =  128,
            nz       =  16,
            nt       =  1000;
  
   int      *p_nx    =  &nx,
            *p_ny    =  &ny,
            *p_nz    =  &nz,
            *p_nt    =  &nt;

   float    u        =  0.0,
            v        =  5.0,
            w        =  0.0,
            c        =  0.01;

   float    *p_u     =  &u,
            *p_v     =  &v,
            *p_w     =  &w,
            *p_c     =  &c;
   

   // p_tf : p at future
   // p_tn : p at now
   // p_tp : p at past
   float    q_tf[nz+2][ny+2][nx+2];
   float    q_tn[nz+2][ny+2][nx+2];
   float    q_tp[nz+2][ny+2][nx+2];
   
   float    (*p_tf)[ny+2][nx+2] = q_tf;
   float    (*p_tn)[ny+2][nx+2] = q_tn;
   float    (*p_tp)[ny+2][nx+2] = q_tp;

   size_t   p_siz =  sizeof(float) * (nx+2) * (ny+2) * (nz+2);
   size_t   n_siz =  sizeof(int)    ,
            c_siz =  sizeof(float)    ;


   int      ncid, retval, varid, x_dimid, y_dimid, z_dimid, t_dimid;
   int      dimids[ndims];
   size_t   start[ndims], count[ndims];


   // netCDF file operation
   // Creating netCDF file
   if ((retval = nc_create(fname, NC_CLOBBER, &ncid)))
      err(retval);

   // Define dimensions
   if ((retval = nc_def_dim(ncid, "z", nz+2, &z_dimid)))
      err(retval);
   if ((retval = nc_def_dim(ncid, "y", ny+2, &y_dimid)))
      err(retval);
   if ((retval = nc_def_dim(ncid, "x", nx+2, &x_dimid)))
      err(retval);
   if ((retval = nc_def_dim(ncid, "t", NC_UNLIMITED, &t_dimid)))
      err(retval);


   // Dimension ids
   dimids[0] = t_dimid;
   dimids[1] = z_dimid;
   dimids[2] = y_dimid;
   dimids[3] = x_dimid;

   // Variable for writing netCDF data one timestep at a time
   count[0] = 1;       // For time dimension : 1 timestep
   count[1] = nz+2;    // For z              : write everything
   count[2] = ny+2;    // For y              : write everything
   count[3] = nx+2;    // For x              : write everything
   
   start[1] = 0;       // For z              : don't do anything
   start[2] = 0;       // For y              : don't do anything
   start[3] = 0;       // For x              : don't do anything
   

   if ((retval = nc_def_var(ncid, "data", NC_FLOAT, ndims, dimids, &varid)))
      err(retval);
   
   if ((retval = nc_enddef(ncid)))
      err(retval);


   data_init(nx,ny,nz,(float*)p_tf); 
   data_init(nx,ny,nz,(float*)p_tn); 
   data_init(nx,ny,nz,(float*)p_tp); 


   // Euler scheme for the first time step
   for(k=1;k<nz+1;k++)
      for(j=1;j<ny+1;j++)
         for(i=1;i<nx+1;i++)
         {
            p_tf[k][j][i] = p_tn[k][j][i]
                           - u * c * (p_tn[k][j][i] - p_tn[k][j][i-1])
                           - v * c * (p_tn[k][j][i] - p_tn[k][j-1][i])
                           - w * c * (p_tn[k][j][i] - p_tn[k-1][j][i]);
         }

   pbndry(nx,ny,nz,(float*)p_tf);

   p_tp  =  p_tn;
   p_tn  =  p_tf;

   start[0] = 0;
   
   if (retval = nc_put_vara_float(ncid, varid, start, count, &p_tf[0][0][0]))
      err(retval);



   // OpenCL part //

   // Use this to check the output of each API call
   cl_int status;

   // Retrieve the number of Platforms
   cl_uint numPlatforms = 0;
   status = clGetPlatformIDs(0, NULL, &numPlatforms);

   // Allocate enough space for each Platform
   cl_platform_id *platforms = (cl_platform_id*)malloc(numPlatforms*sizeof(cl_platform_id));

   // Fill in the Platforms
   status = clGetPlatformIDs(numPlatforms, platforms, NULL);

   // Retrieve the number of Devices
   cl_uint numDevices = 0;
   status = clGetDeviceIDs(platforms[0],CL_DEVICE_TYPE_ALL, 0, NULL, &numDevices);
   clerrchk("get number of devices", status);

   // Allocate enough spaces for each Devices
   char name_data[100];
   int *comp_units;
   cl_device_fp_config cfg;
   cl_device_id *devices = (cl_device_id*)malloc(numDevices*sizeof(cl_device_id));

   // Fill in the Devices
   status = clGetDeviceIDs(platforms[0], CL_DEVICE_TYPE_ALL, numDevices, devices, NULL);
   clerrchk("get device ids", status);

//   for(i=0;i<numDevices;i++)
//   {
//      status = clGetDeviceInfo(devices[i], CL_DEVICE_NAME, sizeof(name_data), name_data, NULL);
//
//      printf("Device Name #%d: %s\n", i, name_data);
//      status = clGetDeviceInfo(devices[i], CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(comp_units), &comp_units, NULL);
// 
//      printf("Max Work-Group %d\n", comp_units);
//      status = clGetDeviceInfo(devices[i], CL_DEVICE_DOUBLE_FP_CONFIG, sizeof(cfg), &cfg, NULL);
//
//      printf("Double FP config = %llu, Support? = %d\n", cfg, status);
//   }

   // Create a context and associate it with the devices
   cl_context context = clCreateContext(NULL, numDevices, devices, NULL, NULL, &status);
   clerrchk("create context", status);

   // Create a command queue and associate it with the devices
   cl_command_queue cmdQueue = clCreateCommandQueue(context, devices[0], 0, &status);
   clerrchk("create cmd queue", status);

   cl_mem buf_p_tf    =  clCreateBuffer(context, CL_MEM_READ_WRITE, p_siz, NULL, &status);
   clerrchk("create buffer buf_p_tf", status);
   cl_mem buf_p_tn    =  clCreateBuffer(context, CL_MEM_READ_ONLY , p_siz, NULL, &status);
   clerrchk("create buffer buf_p_tn", status);
   cl_mem buf_p_tp    =  clCreateBuffer(context, CL_MEM_READ_ONLY , p_siz, NULL, &status);
   clerrchk("create buffer buf_p_tp", status);
   cl_mem buf_nx      =  clCreateBuffer(context, CL_MEM_READ_ONLY , n_siz, NULL, &status);
   clerrchk("create buffer buf_nx", status);
   cl_mem buf_ny      =  clCreateBuffer(context, CL_MEM_READ_ONLY , n_siz, NULL, &status);
   clerrchk("create buffer buf_ny", status);
   cl_mem buf_nz      =  clCreateBuffer(context, CL_MEM_READ_ONLY , n_siz, NULL, &status);
   clerrchk("create buffer buf_nz", status);
   cl_mem buf_nt      =  clCreateBuffer(context, CL_MEM_READ_ONLY , n_siz, NULL, &status);
   clerrchk("create buffer buf_nt", status);
   cl_mem buf_u       =  clCreateBuffer(context, CL_MEM_READ_ONLY , c_siz, NULL, &status);
   clerrchk("create buffer buf_u", status);
   cl_mem buf_v       =  clCreateBuffer(context, CL_MEM_READ_ONLY , c_siz, NULL, &status);
   clerrchk("create buffer buf_v", status);
   cl_mem buf_w       =  clCreateBuffer(context, CL_MEM_READ_ONLY , c_siz, NULL, &status);
   clerrchk("create buffer buf_w", status);
   cl_mem buf_c       =  clCreateBuffer(context, CL_MEM_READ_ONLY , c_siz, NULL, &status);
   clerrchk("create buffer buf_c", status);


   status   =  clEnqueueWriteBuffer(cmdQueue, buf_p_tf  , CL_FALSE, 0, p_siz, p_tf  ,0, NULL, NULL);
   clerrchk("enqueue write buffer for buf_p_tf", status);
   status   =  clEnqueueWriteBuffer(cmdQueue, buf_p_tn  , CL_FALSE, 0, p_siz, p_tn  ,0, NULL, NULL);
   clerrchk("enqueue write buffer for buf_p_tn", status);
   status   =  clEnqueueWriteBuffer(cmdQueue, buf_p_tp  , CL_FALSE, 0, p_siz, p_tp  ,0, NULL, NULL);
   clerrchk("enqueue write buffer for buf_p_tp", status);
   status   =  clEnqueueWriteBuffer(cmdQueue, buf_nx    , CL_FALSE, 0, n_siz, p_nx  ,0, NULL, NULL);
   clerrchk("enqueue write buffer for buf_nx", status);
   status   =  clEnqueueWriteBuffer(cmdQueue, buf_ny    , CL_FALSE, 0, n_siz, p_ny  ,0, NULL, NULL);
   clerrchk("enqueue write buffer for buf_ny", status);
   status   =  clEnqueueWriteBuffer(cmdQueue, buf_nz    , CL_FALSE, 0, n_siz, p_nz  ,0, NULL, NULL);
   clerrchk("enqueue write buffer for buf_nz", status);
   status   =  clEnqueueWriteBuffer(cmdQueue, buf_nt    , CL_FALSE, 0, n_siz, p_nt  ,0, NULL, NULL);
   clerrchk("enqueue write buffer for buf_nt", status);
   status   =  clEnqueueWriteBuffer(cmdQueue, buf_u     , CL_FALSE, 0, c_siz, p_u   ,0, NULL, NULL);
   clerrchk("enqueue write buffer for buf_u", status);
   status   =  clEnqueueWriteBuffer(cmdQueue, buf_v     , CL_FALSE, 0, c_siz, p_v   ,0, NULL, NULL);
   clerrchk("enqueue write buffer for buf_v", status);
   status   =  clEnqueueWriteBuffer(cmdQueue, buf_w     , CL_FALSE, 0, c_siz, p_w   ,0, NULL, NULL);
   clerrchk("enqueue write buffer for buf_w", status);
   status   =  clEnqueueWriteBuffer(cmdQueue, buf_c     , CL_FALSE, 0, c_siz, p_c   ,0, NULL, NULL);
   clerrchk("enqueue write buffer for buf_c", status);

  

   // Create Program with the source code
   cl_program program = NULL;
   size_t program_size;
   char *program_source;
   FILE *program_handle = fopen("leapfrog.cl","r");

   fseek(program_handle, 0, SEEK_END);
   program_size = ftell(program_handle);
   rewind(program_handle);
   program_source = (char*)malloc(program_size+1);
   program_source[program_size] = '\0';
   fread(program_source, sizeof(char), program_size, program_handle);
   fclose(program_handle);

   program = clCreateProgramWithSource(context, 1, (const char**)&program_source, &program_size, &status);
   clerrchk("create program", status);


   // Compile the Program for the Device
   status = clBuildProgram(program, numDevices, devices, NULL, NULL, NULL);
   if(status != CL_SUCCESS)
   {
      //printf("Code : %d\n",status);
      //printf("Program 1 %s\n",getErrorString(status));

      size_t log_size;
      clGetProgramBuildInfo(program, devices[0], CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size);

      char *log = (char *) malloc(log_size);

      clGetProgramBuildInfo(program, devices[0], CL_PROGRAM_BUILD_LOG, log_size, log, NULL);

      printf("%s\n", log);
   }
   // Create a kernel
   cl_kernel kernel = NULL;
   kernel = clCreateKernel(program, "leapfrog3d", &status);
   clerrchk("create kernel", status);


   // Associate the input and output buffers with the kernel
   status = clSetKernelArg(kernel, 0, sizeof(cl_int), &buf_nx  );
   clerrchk("set kernel buf_nx", status);
   status = clSetKernelArg(kernel, 1, sizeof(cl_int), &buf_ny  );
   clerrchk("set kernel buf_ny", status);
   status = clSetKernelArg(kernel, 2, sizeof(cl_int), &buf_nz  );
   clerrchk("set kernel buf_nz", status);
   status = clSetKernelArg(kernel, 3, sizeof(cl_int), &buf_nt  );
   clerrchk("set kernel buf_nz", status);
   status = clSetKernelArg(kernel, 4, sizeof(cl_mem), &buf_p_tf);
   clerrchk("set kernel buf_p_tf", status);
   status = clSetKernelArg(kernel, 5, sizeof(cl_mem), &buf_p_tn);
   clerrchk("set kernel buf_p_tn", status);
   status = clSetKernelArg(kernel, 6, sizeof(cl_mem), &buf_p_tp);
   clerrchk("set kernel buf_p_tp", status);
   status = clSetKernelArg(kernel, 7, sizeof(cl_float), &buf_u   );
   clerrchk("set kernel buf_u", status);
   status = clSetKernelArg(kernel, 8, sizeof(cl_float), &buf_v   );
   clerrchk("set kernel buf_v", status);
   status = clSetKernelArg(kernel, 9, sizeof(cl_float), &buf_w   );
   clerrchk("set kernel buf_w", status);
   status = clSetKernelArg(kernel,10, sizeof(cl_float), &buf_c   );
   clerrchk("set kernel buf_c", status);


   // Define index space (global work size) of work items for execution
   // A workgroup size (local work size) is not required, but can be used
   size_t glbworksiz[3] = {nx,ny,nz};


   // Execute the kernel for execution
   status = clEnqueueNDRangeKernel(cmdQueue, kernel, 3, NULL, glbworksiz, NULL, 0, NULL, NULL);
   clerrchk("enqueue nd range kernel", status);

   // Read the Device output buffer to the host output array
   status = clEnqueueReadBuffer(cmdQueue, buf_p_tf, CL_TRUE, 0, p_siz, p_tf, 0, NULL, NULL);
   clerrchk("enqueue read buffer", status);


   start[0] = 1;
   
   if (retval = nc_put_vara_float(ncid, varid, start, count, &p_tf[0][0][0]))
      err(retval);

   if ((retval = nc_close(ncid)))
      err(retval);


   clReleaseMemObject(buf_p_tf);
   clReleaseMemObject(buf_p_tn);
   clReleaseMemObject(buf_p_tp);
   clReleaseMemObject(buf_nx);
   clReleaseMemObject(buf_ny);
   clReleaseMemObject(buf_nz);
   clReleaseMemObject(buf_nt);
   clReleaseMemObject(buf_u);
   clReleaseMemObject(buf_v);
   clReleaseMemObject(buf_w);
   clReleaseMemObject(buf_c);
   clReleaseContext(context); 
   clReleaseKernel(kernel);
   clReleaseProgram(program);
   clReleaseCommandQueue(cmdQueue);

   printf("\nDone. . .\n");

   return 0;                     
}                             


void data_init(int in_x_siz, int in_y_siz, int in_z_siz, float *in_arr)
{
   int i,j,k;

   int i_min   =  50,
       i_max   =  70,
       j_min   =  50,
       j_max   =  70;

   for(k=0;k<in_z_siz+2;k++)
      for(j=0;j<in_y_siz+2;j++)
         for(i=0;i<in_x_siz+2;i++)
            in_arr[k * (in_y_siz+2) * (in_x_siz+2) + j * (in_x_siz+2) +i] = 0.0;

    for(k=1;k<in_z_siz+1;k++)
      for(j=j_min;j<j_max;j++)
         for(i=i_min;i<i_max;i++)
            in_arr[k * (in_y_siz+2) * (in_x_siz+2) + j * (in_x_siz+2) +i] = 3.0;

}


void pbndry(int in_x_siz, int in_y_siz, int in_z_siz, float *in_arr)
{
   int i,j,k;

   // Periodic boundary
   // x-direction
   for(k=1;k<in_z_siz+1;k++)
      for(j=1;j<in_y_siz+1;j++)
         in_arr[k * (in_y_siz+2) * (in_x_siz+2) + j * (in_x_siz+2) + 0] =
         in_arr[k * (in_y_siz+2) * (in_x_siz+2) + j * (in_x_siz+2) + in_x_siz];

         in_arr[k * (in_y_siz+2) * (in_x_siz+2) + j * (in_x_siz+2) + (in_x_siz+1)] =
         in_arr[k * (in_y_siz+2) * (in_x_siz+2) + j * (in_x_siz+2) + 1];


   // y-direction
   for(k=1;k<in_z_siz+1;k++)
      for(i=1;i<in_x_siz+1;i++)
         in_arr[k * (in_y_siz+2) * (in_x_siz+2) + 0 * (in_x_siz+2) + i] =
         in_arr[k * (in_y_siz+2) * (in_x_siz+2) + in_y_siz * (in_x_siz+2) + i];

         in_arr[k * (in_y_siz+2) * (in_x_siz+2) + (in_y_siz+1) * (in_x_siz+2) + i] =
         in_arr[k * (in_y_siz+2) * (in_x_siz+2) + 1 * (in_x_siz+2) + i];


   // z-direction
   for(j=1;j<in_y_siz+1;j++)
      for(i=1;i<in_x_siz+1;i++)
         in_arr[0 * (in_y_siz+2) * (in_x_siz+2) + j * (in_x_siz+2) + i] =
         in_arr[in_z_siz * (in_y_siz+2) * (in_x_siz+2) + j * (in_x_siz+2) + i];

         in_arr[(in_z_siz+1) * (in_y_siz+2) * (in_x_siz+2) + j * (in_x_siz+2) + i] =
         in_arr[1 * (in_y_siz+2) * (in_x_siz+2) + j * (in_x_siz+2) + i];

}

Kernel

void pbndry(int in_x_siz, int in_y_siz, int in_z_siz, global float *in_arr)
{
   int i,j,k;

   // Periodic boundary
   // x-direction
   for(k=1;k<in_z_siz+1;k++)
      for(j=1;j<in_y_siz+1;j++)
         in_arr[k * (in_y_siz+2) * (in_x_siz+2) + j * (in_x_siz+2) + 0] =
         in_arr[k * (in_y_siz+2) * (in_x_siz+2) + j * (in_x_siz+2) + in_x_siz];

         in_arr[k * (in_y_siz+2) * (in_x_siz+2) + j * (in_x_siz+2) + (in_x_siz+1)] =
         in_arr[k * (in_y_siz+2) * (in_x_siz+2) + j * (in_x_siz+2) + 1];


   // y-direction
   for(k=1;k<in_z_siz+1;k++)
      for(i=1;i<in_x_siz+1;i++)
         in_arr[k * (in_y_siz+2) * (in_x_siz+2) + 0 * (in_x_siz+2) + i] =
         in_arr[k * (in_y_siz+2) * (in_x_siz+2) + in_y_siz * (in_x_siz+2) + i];

         in_arr[k * (in_y_siz+2) * (in_x_siz+2) + (in_y_siz+1) * (in_x_siz+2) + i] =
         in_arr[k * (in_y_siz+2) * (in_x_siz+2) + 1 * (in_x_siz+2) + i];


   // z-direction
   for(j=1;j<in_y_siz+1;j++)
      for(i=1;i<in_x_siz+1;i++)
         in_arr[0 * (in_y_siz+2) * (in_x_siz+2) + j * (in_x_siz+2) + i] =
         in_arr[in_z_siz * (in_y_siz+2) * (in_x_siz+2) + j * (in_x_siz+2) + i];

         in_arr[(in_z_siz+1) * (in_y_siz+2) * (in_x_siz+2) + j * (in_x_siz+2) + i] =
         in_arr[1 * (in_y_siz+2) * (in_x_siz+2) + j * (in_x_siz+2) + i];
}


kernel void leapfrog3d(
                        const  int x_siz,
                        const  int y_siz,
                        const  int z_siz,
                        const  int t_siz,
                        global float *in_p_tf,
                        global float *in_p_tn,
                        global float *in_p_tp,
                        const  float u_vel,
                        const  float v_vel,
                        const  float w_vel,
                        const  float c
                      )
{
   int nx   =  x_siz;
   int ny   =  y_siz;
   int nz   =  z_siz;
   int nt   =  t_siz;
   float u  =  u_vel;
   float v  =  v_vel;
   float w  =  w_vel;
   float C  =  c    ;
   int i    =  get_global_id(0);
   int j    =  get_global_id(1);
   int k    =  get_global_id(2);

   int idx0, idx_i0, idx_i1, idx_j0, idx_j1, idx_k0, idx_k1;


   for(int t=1;t<t_siz;t++)
   {

      idx0     =  i + j * (nx+2) + k * (nx+2) * (ny+2);

      idx_i0   =  (i+1) + j * (nx+2) + k * (nx+2) * (ny+2);
      idx_j0   =  i + (j+1) * (nx+2) + k * (nx+2) * (ny+2);
      idx_k0   =  i + j * (nx+2) + (k+1) * (nx+2) * (ny+2);
      
      idx_i1   =  (i-1) + j * (nx+2) + k * (nx+2) * (ny+2);
      idx_j1   =  i + (j-1) * (nx+2) + k * (nx+2) * (ny+2);
      idx_k1   =  i + j * (nx+2) + (k-1) * (nx+2) * (ny+2);

      in_p_tf[idx0] = in_p_tp[idx0] 
                   - u_vel * C * (in_p_tn[idx_i0] - in_p_tn[idx_i1])
                   - v_vel * C * (in_p_tn[idx_j0] - in_p_tn[idx_j1])
                   - w_vel * C * (in_p_tn[idx_k0] - in_p_tn[idx_k1]);

      pbndry(nx,ny,nz,in_p_tf);

      in_p_tp = in_p_tn;
      in_p_tn = in_p_tf;
   }
}

journalctl log

Aug 19 13:17:12 Angke systemd-coredump[121514]: Process 121510 (lpf.gpu) of user 1000 dumped core.
                                                
                                                Found module /home/rangke/prog_works/opencl/adv/lpf.gpu with build-id: 8036638bab286ce6a1e81a4983bb68d19a7145fd
                                                Found module linux-vdso.so.1 with build-id: 27d9b8c0c25b172c86a29351e47701c1d0676035
                                                Found module libamdocl12cl64.so without build-id.
                                                Found module libgcc_s.so.1 with build-id: 7f8508bb914546ada778809b64b99d234337d835
                                                Found module libstdc++.so.6 with build-id: 8ab0e57054dd1dcba681f217016afc6a4e639783
                                                Found module libamd_comgr.so.1 with build-id: 438b1fbf7c58fd6a5b555a7283a58ee1eb1808f0
                                                Found module libdrm.so.2 with build-id: 3aeff5403ca8d7589eabc05752eb613937f454a1
                                                Found module libdrm_amdgpu.so.1 with build-id: a89ceb7c9082e5276f39023716eb4d194e75f6b8
                                                Found module libamdocl-orca64.so without build-id.
                                                Found module librt.so.1 with build-id: 75484da2d6f1515189eefa076e0a40328834cd16
                                                Found module libamdocl64.so without build-id.
                                                Found module libresolv.so.2 with build-id: c915c72668282861a813f7ea3c0780f37b681dc0
                                                Found module libkeyutils.so.1 with build-id: ac405ddd17be10ce538da3211415ee50c8f8df79
                                                Found module libkrb5support.so.0 with build-id: c4ee4ad1dc2da57487bc2419b88f1b6873184582
                                                Found module libcom_err.so.2 with build-id: eb61ef71c8b97846db759fb89a115405cff6dd30
                                                Found module libk5crypto.so.3 with build-id: 632a59ed7c35704d84645e6d1e9873348d1eb802
                                                Found module libkrb5.so.3 with build-id: c61cb4da63b8a839ee7df99eaf9dbd3d0968534c
                                                Found module libunistring.so.2 with build-id: 015ac6d6bcb60b7d8bea31a80d1941b06e8636ab
                                                Found module libpthread.so.0 with build-id: 07c8f95b4f3251d08550217ad8a1f31066229996
                                                Found module libzstd.so.1 with build-id: 4b10444c1560ebc574af4d5f488b7408b22d450e
                                                Found module libgssapi_krb5.so.2 with build-id: 9be9d3348399b72b76161a64e6d9fd760b77163a
                                                Found module libcrypto.so.1.1 with build-id: 81b77a8e0b6e1c0db19644a5f120890f02762021
                                                Found module libssl.so.1.1 with build-id: 99394a6653d9c107f2e9b730bbbfd18ed43ae3b9
                                                Found module libpsl.so.5 with build-id: 0229a201aaf5652186c9fdc192ebe52baf19d7f1
                                                Found module libssh2.so.1 with build-id: 7f6d9edd2e793b266cae4f22fc1ba7b6b401c08c
                                                Found module libidn2.so.0 with build-id: 1ce2b50ad9f9821c2c629b521cf5a3c99593d332
                                                Found module libnghttp2.so.14 with build-id: 5ca39b42e7cb2770878644d57e88677df6336815
                                                Found module libz.so.1 with build-id: 81bf6e728a6d6f5b105b0f8b25f6c614ce10452a
                                                Found module libsz.so.2 with build-id: c114ff6d6bb52989ad7099aacac51780e5ef418e
                                                Found module libcurl.so.4 with build-id: 49c0cb842d0e0dad11c435b7fb88b3d88b8a43ac
                                                Found module libm.so.6 with build-id: 2b8fd1f869ecab4e0b55e92f2f151897f6818acf
                                                Found module libhdf5.so.200 with build-id: 553f354452b0af3e7232580b8dff9e0c6584830b
                                                Found module libhdf5_hl.so.200 with build-id: 2b0926fbab5318a556eb524497edb6e78099ff60
                                                Found module ld-linux-x86-64.so.2 with build-id: 040cc3dd10461562f177df39e3be2f3704258c3c
                                                Found module libdl.so.2 with build-id: 5abc547e7b0949f89f3c0e21ab0c8331a7440a8a
                                                Found module libc.so.6 with build-id: 4b406737057708c0e4c642345a703c47a61c73dc
                                                Found module libnetcdf.so.18 with build-id: f68d6c8120acfeaaa265d3b0750d24b669671124
                                                Found module libOpenCL.so.1 with build-id: 4f566e048bc3b8112ba357ef29a3affd5858ccdf
                                                Stack trace of thread 121510:
                                                #0  0x00007efc82a9ad71 clEnqueueWriteBuffer (libamdocl64.so + 0xc8ad71)
                                                #1  0x00007efc88a2d28d clEnqueueWriteBuffer (libOpenCL.so.1 + 0x1228d)
                                                #2  0x000055d84a146dcd n/a (/home/rangke/prog_works/opencl/adv/lpf.gpu + 0x2dcd)
                                                #3  0x00007efc88726b25 __libc_start_main (libc.so.6 + 0x27b25)
                                                #4  0x000055d84a14528e n/a (/home/rangke/prog_works/opencl/adv/lpf.gpu + 0x128e)

Mind you that lpf.gpu is the executable that I ran.

Redshoe
  • 125
  • 5
  • this is a duplicate of this post https://stackoverflow.com/questions/16175837/opencl-computation-freezes-the-screen – Ethan Aug 20 '21 at 22:54
  • Comment out parts of the code and see if it still crashes then, see here: https://stackoverflow.com/a/67031414/9178992 – ProjectPhysX Aug 21 '21 at 10:07
  • Well, I did try using less problem size and it still crashed. I have APUs not dedicated GPUs, so maybe this would be the problem? When I ran the code in different machine with dedicated GPUs or on CPUs, it worked (without having to change the problem domain size). – Redshoe Aug 23 '21 at 15:58

0 Answers0