18

I am writing a rendering system in CUDA and want results to be quickly displayed via OpenGL, without touching main memory. I basically do the following:

Create and initialize OpenGL texture, and register it in CUDA as cudaGraphicsResource

GLuint viewGLTexture;
cudaGraphicsResource_t viewCudaResource;

void initialize() {
    glEnable(GL_TEXTURE_2D);
    glGenTextures(1, &viewGLTexture);

    glBindTexture(GL_TEXTURE_2D, viewGLTexture); 
    {
        glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
        glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
        glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, view.getWidth(), view.getHeight(), 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
    } 
    glBindTexture(GL_TEXTURE_2D, 0);

    cudaGraphicsGLRegisterImage(&viewCudaResource, viewGLTexture, GL_TEXTURE_2D, cudaGraphicsRegisterFlagsWriteDiscard)
}

Whenever view is resized I resize viewport and texture image appropriately:

void resize() {
    glViewport(0, 0, view.getWidth(), view.getHeight());

    glBindTexture(GL_TEXTURE_2D, viewGLTexture); 
    {
        glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, view.getWidth(), view.getHeight(), 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
    } 
    glBindTexture(GL_TEXTURE_2D, 0);
}

And then each frame I map graphicsResource as a cudaSurfaceObject via cudaArray, call rendering kernel on it, unmap and synchronize to let OpenGL draw a fullscreen quad with this texture:

void renderFrame() {
    cudaGraphicsMapResources(1, &viewCudaResource); 
    {
        cudaArray_t viewCudaArray;
        cudaGraphicsSubResourceGetMappedArray(&viewCudaArray, viewCudaResource, 0, 0);
        cudaResourceDesc viewCudaArrayResourceDesc;
        {
            viewCudaArrayResourceDesc.resType = cudaResourceTypeArray;
            viewCudaArrayResourceDesc.res.array.array = viewCudaArray;
        }
        cudaSurfaceObject_t viewCudaSurfaceObject;
        cudaCreateSurfaceObject(&viewCudaSurfaceObject, &viewCudaArrayResourceDesc); 
        {
            invokeRenderingKernel(viewCudaSurfaceObject);
        } 
        cudaDestroySurfaceObject(viewCudaSurfaceObject));
    } 
    cudaGraphicsUnmapResources(1, &viewCudaResource);

    cudaStreamSynchronize(0);

    glBindTexture(GL_TEXTURE_2D, viewGLTexture); 
    {
        glBegin(GL_QUADS); 
        {
            glTexCoord2f(0.0f, 0.0f); glVertex2f(-1.0f, -1.0f);
            glTexCoord2f(1.0f, 0.0f); glVertex2f(+1.0f, -1.0f);
            glTexCoord2f(1.0f, 1.0f); glVertex2f(+1.0f, +1.0f);
            glTexCoord2f(0.0f, 1.0f); glVertex2f(-1.0f, +1.0f);
        } 
        glEnd();
    }
    glBindTexture(GL_TEXTURE_2D, 0);

    glFinish();
}

The problem is: Whenever view is resized all CUDA calls start spewing out "unknown error"s and visually it looks like the texture is not in fact resized, just stretched across the whole view. Why is this happening and how do I fix it?

yuri kilochek
  • 12,709
  • 2
  • 32
  • 59

1 Answers1

15

It seems interop requires to re-register textures upon resize. The following works:

void resize() {
    glViewport(0, 0, view.getWidth(), view.getHeight());

        // unregister
    cudaGraphicsUnregisterResource(viewCudaResource);
        // resize
    glBindTexture(GL_TEXTURE_2D, viewGLTexture);
    {
        glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, view.getWidth(), view.getHeight(), 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
    }
    glBindTexture(GL_TEXTURE_2D, 0);
        // register back
    cudaGraphicsGLRegisterImage(&viewCudaResource, viewGLTexture, GL_TEXTURE_2D, cudaGraphicsRegisterFlagsWriteDiscard);
}
yuri kilochek
  • 12,709
  • 2
  • 32
  • 59
  • is it able to write to the OpenGL default BackBuffer which is GL_BACK by cudaSurface_t? Thanks! – gpu Jun 20 '23 at 08:03
  • should use cudaGraphicsRegisterFlagsWriteSurfaceLoadStore instead of cudaGraphicsRegisterFlagsWriteDiscard ? – gpu Jun 21 '23 at 00:55
  • 1
    @gpu dude, it's been almost 10 years, and I haven't worked with cuda since. I have no idea if it's possible to write directly to framebuffer. As for `Discard` vs `SurfaceLoadStore`, I'm pretty sure that discard is correct for the use case I had (i.e. redraw the entire texture every frame). – yuri kilochek Jun 22 '23 at 10:01
  • https://stackoverflow.com/questions/16765895/what-is-the-use-case-of-cudagraphicsregisterflagswritediscard-in-cudagraphicsglr – gpu Jun 22 '23 at 12:43
  • https://stackoverflow.com/questions/76532086/which-is-faster-glblitframebuffer-a-renderbuffer-or-render-a-fullscreen-texture – gpu Jun 22 '23 at 12:48