-3

I'm supposed to write a fast GPU solution for 1-bit images (C++). In my opinion my code is correct, but for some reason when I submit my answer the system says

/box/is.cu:3:10: fatal error: cudacheck.h: No such file or directory
3 | #include "cudacheck.h"
|          ^~~~~~~~~~~~~
compilation terminated.

The code is below. Can you see the possible mistake I'm making?

#include "is.h"
#include <cuda_runtime.h>
#include "cudacheck.h"
#include <vector>

static inline int jakolasku(int alku, int loppu) { return (alku + loppu - 1)/loppu; }

std::vector<float> summienlasku(int ny, int nx, int pny, int pnx, const float* data){

  std::vector<float> sums(pnx*pny, 0.f);

  for(int alk=0; alk<ny; ++alk){
    for(int lop=0; lop<nx; ++lop){
      sums[(lop+1) + pnx*(alk+1)] = data[3 * (lop+nx*alk)]
                              + sums[(lop+1) + pnx*alk]
                              + sums[lop + pnx*(alk+1)]
                              - sums[lop + pnx*alk];
    }
  }
  return sums;
}

__global__ void nelio(int ny, int nx, int size, int pny, int pnx, const float* sums, float* mitat){

  int leveys = threadIdx.x + blockIdx.x * blockDim.x;
  int korkeus = threadIdx.y + blockIdx.y * blockDim.y;

  if( !(0 < leveys && leveys <= nx) || !(0 < korkeus && korkeus <=ny) ) return;

  int xsize = korkeus * leveys;
  int ysize = size - xSize;

  float xluku = 1.0f / (float) xSize;
  float yluku = ySize == 0 ? 0.f : 1.0f / (float) ySize;

  float lk = sums[pnx*pny-1];
  float L = 0.f;

  for(int y0=0; y0<=ny-korkeus; ++y0){

    int y1 = y0 + korkeus;
    for(int x0=0; x0<=nx-leveys; ++x0){
      int x1 = x0 + leveys;

      float s1 = sums[y1*pnx + x1];
      float s2 = sums[y1*pnx + x0];
      float s3 = sums[y0*pnx + x1];
      float s4 = sums[y0*pnx + x0];

      float xtoin = s1 - s2 - s3 + s4;
      float ytoin = lk - xtoin;

      float l = xtoin * xtoin * xluku + ytoin * ytoin * yluku;

      if(l > L) L = l;
    }
  }

  mitat[korkeus*pnx + leveys] = L;
}

struct Rectangle{ int width; int height; int size; };

Rectangle loydanelikulmio(int ny, int nx, int pnx, const float* rectdims){

  float L = 0.f;
  int width = 0, height = 0;

  for(int l=1; l<=ny; ++l){
    for(int v=1; v<=nx; ++v){

      float lu = rectdims[l*pnx+v];
      if (lu > L){
        L = lu;
        width = v;
        height = l;
      }
    }
  }

  Rectangle rect = {width, height, width*height};
  return rect;
}

struct SegmentResult{ int y0; int x0; int y1; int x1; float outer[3]; float inner[3]; };

SegmentResult loydaSegmentti(int ny, int nx, int pny, int pnx, Rectangle* rect, const float* sums){

  int size = nx*ny;
  float vluku = sums[pnx*pny-1];

  int korkeus = rect->height;
  int leveys = rect->width;
  int xkoko = rect->size;

  int ykoko = size - xkoko;

  float xx = 1.0f / (float) xkoko;
  float yy = ykoko == 0 ? 0.f : 1.0f / (float) ykoko;

  float K = 0.f;
  float nL = 0.f, bL = 0.f;
  int xx0 = 0, xx1 = 0, yy0 = 0, yy1 = 0;

  for(int y0=0; y0<=ny-korkeus; ++y0){
    for(int x0=0; x0<=nx-leveys; ++x0){
      int y1 = y0 + korkeus;
      int x1 = x0 + leveys;

      float s1 = sums[y1*pnx + x1];
      float s2 = sums[y1*pnx + x0];
      float s3 = sums[y0*pnx + x1];
      float s4 = sums[y0*pnx + x0];

      float vlukuu = s1 - s2 - s3 + s4;
      float ylukuu = vluku - vlukuu;
      float k = vlukuu * vlukuu * xx + ylukuu * ylukuu * yy;

      if(k > K){
        K = k;
        nL = xlukuu;
        bL = ylukuu;
        xx0 = x0;
        xx1 = x1;
        yy0 = y0;
        yy1 = y1;
      }
    }
  }

  nL *= xx;
  bL *= yy;

  SegmentResult tulos = { yy0, xx0, yy1, xx1, { bL, bL, bL }, {nL, nL, nL } };
  return tulos;
}

Result segment(int ny, int nx, const float* data){

  int laskux = nx+1, laskuy = ny+1;

  std::vector<float> summa = summienlasku(ny, nx, laskuy, laskux, data);

  float* smuuttuja = NULL;
  CHECK(cudaMalloc((void**)&smuuttuja, laskux*laskuy*sizeof(float)));

  float* hmuuttuja = NULL;
  CHECK(cudaMalloc((void**)&hmuuttuja, laskux*laskuy*sizeof(float)));

  CHECK(cudaMemcpy(smuuttuja, summa.data(), laskux*laskuy*sizeof(float), cudaMemcpyHostToDevice));

  {
    dim3 dimBlock(16, 16);
    dim3 dimGrid(jakolasku(nx, dimBlock.x), jakolasku(ny, dimBlock.y));
    nelio<<<dimGrid, dimBlock>>>(ny, nx, nx*ny, laskuy, laskux, smuuttuja, hmuuttuja);
    CHECK(cudaGetLastError());
  }

  std::vector<float> rectdims(lasku*laskuy);
  CHECK(cudaMemcpy(rectdims.data(), hmuuttuja, laskux*laskuy*sizeof(float), cudaMemcpyDeviceToHost));

  Rectangle rect = loydaSegmentti(ny, nx, laskux, rectdims.data());

  SegmentResult sr = loydaSegmentti(ny, nx, laskuy, laskux, &rect, summa.data());
  
  Result result {
    sr.y0,
    sr.x0,
    sr.y1,
    sr.x1,
    { sr.outer[0], sr.outer[1], sr.outer[2] },
    { sr.inner[0], sr.inner[1], sr.inner[2] }
  };

  CHECK(cudaFree(smuuttuja)); CHECK(cudaFree(hmuuttuja));

  return result;
}

I'm not sure if the mistake is about the "cuda" rows here in my code? Why does it give an error witht the #include "cudacheck.h" when I'm trying to submit my code?

talonmies
  • 70,661
  • 34
  • 192
  • 269
prghelps
  • 3
  • 2
  • Because the compiler cannot find `cudacheck.h`. The possible reasons for this are many. However none of them involve the code that follows the `#include` statement. – john Jun 04 '23 at 12:18
  • well, the answer to the question is literally in the error message. – Marcus Müller Jun 04 '23 at 12:18
  • To answer the question asked, the answer is no. The error you are making is in the way you are invoking the compiler or in the environment you are running the compiler in (e.g. where the relevant files actually are). None of these things are apparent from your question. – john Jun 04 '23 at 12:21

1 Answers1

0

From the error message, it looks like the compiler could not find the codacheck.h header file in the directory that contains the C++ source file or paths that are specified by the INCLUDE environment variable.

Are you supposed to put the header file in the project directory or install it somehow?

Another possibility is that the filename or the path in the include statement is incorrect.

Leon
  • 23
  • 7