1

I have a 5 megapixel texture that I am having trouble updating. The texture is displayed on a rectangle not dissimilar to a video stream.

The OpenGL commands execute quickly, but the real texture update rate is sub optimal, perhaps only 3 actual frames per second. There is some change, but not much change when using a smaller texture (500x500).

The machine has a NVIDIA gtx 570

My initial efforts were to use glTexSubImage2D and glBufferSubData, but these performed slightly worse than the memory mapped scheme.

Is there any way to force the graphics card to update the texture? How is video streaming software written?

Render Loop

void glStream::paintGL()
{
    glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
    glLoadIdentity();
    program1.bind();
    program1.setUniformValue("texture", 0);
    program1.enableAttributeArray(vertexAttr1);
    program1.enableAttributeArray(vertexTexr1);
    program1.setAttributeArray(vertexAttr1, vertices.constData());
    program1.setAttributeArray(vertexTexr1, texCoords.constData());
    //
    glECheck();
    glBindTexture(GL_TEXTURE_2D, textures[0]);
    glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB,pbos[0]);
    void* memory = glMapBuffer(GL_PIXEL_UNPACK_BUFFER_ARB,GL_WRITE_ONLY);
    device->fillBuffer((unsigned char *)memory,heightGL,widthGL); // takes 2ms (not long)
    glUnmapBuffer(GL_PIXEL_UNPACK_BUFFER_ARB);
    glTexSubImage2D(GL_TEXTURE_2D,0,0,0,widthGL,heightGL,GL_LUMINANCE,GL_UNSIGNED_BYTE, NULL);
    glDrawArrays(GL_TRIANGLES, 0, vertices.size());
    glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB,0);
    glBindTexture(GL_TEXTURE_2D,0);
    //
    program1.disableAttributeArray(vertexTexr1);
    program1.disableAttributeArray(vertexAttr1);
    program1.release();
    glECheck();//no errors
}

Texture Reservation

void glStream::reserveTextures()
{
    assert(numGLFrames>0);
    assert(glGenBuffers);
    displayBuff = (GLubyte*) calloc(numGLFrames*widthGL*heightGL,sizeof(GLubyte));//GL_RGB8
    memset(displayBuff,100,numGLFrames*widthGL*heightGL*sizeof(GLubyte));
    glGenBuffers(1,&pbos[0]);
    glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, pbos[0]);
    glBufferData(GL_PIXEL_UNPACK_BUFFER_ARB,
                 numGLFrames*widthGL*heightGL*sizeof(GLubyte),
                 &displayBuff[0], GL_STREAM_DRAW);
    glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, 0);
    glGenTextures(1,&textures[0]);
    glBindTexture(GL_TEXTURE_2D,textures[0]);
    glTexImage2D(GL_TEXTURE_2D,0,GL_LUMINANCE,
                 widthGL,heightGL,0,GL_LUMINANCE,GL_UNSIGNED_BYTE,NULL);
    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
    glTexParameteri(GL_TEXTURE_2D, GL_GENERATE_MIPMAP, GL_FALSE);
    glBindTexture(GL_TEXTURE_2D,0);
}

Initialization

void glStream::initializeGL()
{
    GLenum err = glewInit();
    if (GLEW_OK != err)
    {
        const char * emssage = (const char*)glewGetErrorString(err);
        QMessageBox::information(0, "OpenGL 3.x Context Example",
                                 emssage);
        exit(20);
    }
    glDisable(GL_DEPTH_TEST);
    QGLShader *vshader1 = new QGLShader(QGLShader::Vertex, this);
    const char *vsrc1 =
        "attribute vec2 coord2d;   \n"
        "attribute mediump vec4 texCoord;\n"
        "varying mediump vec4 texc;\n"
        "void main()                  \n"
        "{                            \n"
        "   gl_Position = vec4(coord2d, 0.0, 1.0); \n"
        "   texc = texCoord;\n"
        "}                          \n";
    vshader1->compileSourceCode(vsrc1);
    QGLShader *fshader1 = new QGLShader(QGLShader::Fragment, this);
    const char *fsrc1 =
        "uniform sampler2D texture;\n"
        "varying mediump vec4 texc;\n"
        "void main(void)\n"
        "{\n"
        "    gl_FragColor = texture2D(texture, texc.st);\n"
        "}\n";
    fshader1->compileSourceCode(fsrc1);
    program1.addShader(vshader1);
    program1.addShader(fshader1);
    program1.link();
    vertexAttr1 = program1.attributeLocation( "coord2d");
    vertexTexr1 = program1.attributeLocation( "texCoord");
    // Create the vertex buffer.
    vertices.clear();
    float u=1;
#define AVEC -u,u
#define BVEC -u,-u
#define CVEC u,u
#define DVEC u,-u
    vertices << QVector2D(AVEC);vertices << QVector2D(BVEC);
    vertices << QVector2D(CVEC);vertices << QVector2D(BVEC);
    vertices << QVector2D(DVEC);vertices << QVector2D(CVEC);
    // Create the texture vertex buffer
#define TAVEC 0,1
#define TBVEC 0,0
#define TCVEC 1,1
#define TDVEC 1,0
    texCoords << QVector2D(TAVEC);texCoords << QVector2D(TBVEC);
    texCoords << QVector2D(TCVEC);texCoords << QVector2D(TBVEC);
    texCoords << QVector2D(TDVEC);texCoords << QVector2D(TCVEC);
    glECheck();
    reserveTextures();
}

Edit 1

I am confident that fill buffer comes up with a new texture because in some other part of the code I write this texture to a file and it is indeed different. Its a sad day when my File IO is faster then a OpenGL texture.

Edit 2 I tried out FRAPS and I verified that the render loop is going at ~18 FPS, but the visible updates are slower (maybe 3 FPS). What could account for such a discrepancy?

Mikhail
  • 7,749
  • 11
  • 62
  • 136
  • I'm not an expert on this myself, but since you mention the fact that you have an Nvidia card and want the graphics card to talk to update your OpenGL buffer directly: have you thought about using Cuda? There are quite a number of example projects on Nvidia's developers' page that illustrate exactly that. – Yellow Feb 25 '13 at 16:13
  • 1
    Your choice of words `real texture update rate` makes me wonder what exactly that statement means? Hot did you measure it? What code causes the your repaints to happen? How often do they happen? In a comment below you said something about 20fps: are you saying `paintGL()` gets called about 20 times per second but only 3 times per second you see an updated texture on screen? If yes: are you sure `fillBuffer()` comes up with a 'new' texture every time you call it? – axxel Feb 25 '13 at 20:32
  • @axxel Yep thats exactly what I mean, I am confident that fill buffer comes up with a new texture because in some other part of the code I write this texture to a file and it is indeed different. Its a sad day when your File IO is faster than a OpenGL texture. – Mikhail Feb 25 '13 at 20:35
  • That sounds weird. I don't see how your texture update rate would be anything but those 20fps. Can you try something like `glDrawArrays(GL_TRIANGLES, 0, vertices.size()/((++i%2)+1));` and check if half of your quad flickers with 20fps? – axxel Feb 25 '13 at 21:18
  • I think axxel is right, it seems like device->fillBuffer is not creating a 'new' texture every time it is called. – shouston Feb 25 '13 at 22:26
  • @shouston I tried setting a flicker with memset and it doesn't go at 20 fps, more like 3 fps. Its a very reasonable suggestion but I don't think its correct. – Mikhail Feb 25 '13 at 22:36
  • Have you tried the modified `glDrawArrays` line, just to make sure each run of your `paintGL` function actually results in an updated framebuffer? If that should not be the case: did you interfere with the Qt internal SwapBuffers call? If it is the case, can you simplify your texture handling using only the plain old `glTexImage2D`? (At the end of the day, there is alway the chance of a driver bug :-/ ... If you have some hacked-down version of the source somewhere I'd give it a run on my machine.) – axxel Feb 25 '13 at 23:14

1 Answers1

1

The PBO is much larger than the texture, in the call to glBufferData:

numGLFrames*widthGL*heightGL*sizeof(GLubyte)

You're allocating a PBO large enough for multiple textures (frames), but only ever reading/writing one frame's worth of data.

If you make the PBO the same size as the texture and use glTexImage2D instead of glTexSubImage2D, is it much faster?

shouston
  • 641
  • 4
  • 7
  • Thanks for the suggestion, it goes a bit faster although its not quite realtime. Any ideas? – Mikhail Feb 25 '13 at 19:15
  • Do you have all of the frames in memory ready to be drawn, or are you capturing them from a camera? If capturing, how many times per second does device->fillBuffer get called? Are you measuring actual rendered fps? – shouston Feb 25 '13 at 19:52
  • the command gets called at ~20 fps and the `device->fillBuffer` command takes less than 2ms. I am measuring actual rendered FPS which is different than the time it takes the render loop to operate. – Mikhail Feb 25 '13 at 20:09
  • What is your actual fps if you remove all of the PBO & texture calls in paintGL, calling only device->fillBuffer and glDrawArrays? – shouston Feb 25 '13 at 20:15
  • The FPS measured by the number of times the paint function is called results in about 18/22 frames per second. This is not the rate at which the texture streams in. This produces a visible delay. Just to be sure I tried `memset(memory,150*(i++%2),numGLFrames*widthGL*heightGL*sizeof(GLubyte));` which blinks, but not fast enough to be 20 fps. – Mikhail Feb 25 '13 at 20:37
  • The only way I could see the rendered FPS being lower than the times per sec paintGL is called is if paintGL is being called multiple times between swapping the front and back buffers. Texture loads aren't queued up where they can be stalled and fall behind rendering, if the texture is not ready then swapping the buffers will block until it is and the frame has been rendered. Have you tried something like FRAPS to display the rendered FPS? – shouston Feb 25 '13 at 22:08