4

When I read vertex data to a Float32Array from a transform buffer in Chrome using getBufferSubData, I get the warning "performance warning: READ-usage buffer was read back without waiting on a fence. This caused a graphics pipeline stall.". My understanding is that the GPU is trying to write vertex data back to the CPU as soon as getBufferSubData is called, which may be before the shaders have finished. I figured that if I can prevent this I may be be able to speed up my application, and I thought the best way to do this would be with a callback. To clarify, the data returned is correct; I'm looking to speed up my application and better understand what's going on.

I have tried to implement a callback using fenceSync, similar to this answer. This should check whether the GPU has finished executing the current commands (including the transform feedback), before executing getBufferSubData. Here is my code.

(function () {
    'use strict';

    const createRandomF32Array = (arrSize) => {
        return Float32Array.from({length: arrSize}, () => Math.floor(Math.random() * 1000));
    };

    const createGlContext = () => {
        const canvas = document.createElement("canvas");
        const gl = canvas.getContext("webgl2");
        canvas.id = 'webgl_canvas';
        document.body.appendChild(canvas);
        if (gl === null) {
            alert("Unable to initialize WebGL. Your browser or machine may not support it.");
            return;
          }
        return gl;
    };

    // creates a single set of linked shaders containing a vertex and a fragment shader
    class shaderProgram {
        constructor(gl, rawVertex, rawFragment, transformFeedbackAttribs=false) {
            this.gl = gl;
            const compiledVertex = this.compileShader(gl.VERTEX_SHADER, rawVertex);
            const compiledFragment = this.compileShader(gl.FRAGMENT_SHADER, rawFragment);
            this.program = this.createProgram(compiledVertex, compiledFragment, transformFeedbackAttribs);
            this.attributeLocations = {};
            this.uniformLocations = {};
        }
        // run on init
        compileShader(shaderType, shaderSource) {
            const gl = this.gl;
            var shader = gl.createShader(shaderType);
            gl.shaderSource(shader, shaderSource);
            gl.compileShader(shader);
            var success = gl.getShaderParameter(shader, gl.COMPILE_STATUS);
            if (success) {
              return shader;
            }
            console.log(gl.getShaderInfoLog(shader));
            gl.deleteShader(shader);
          }
        // run on init
        createProgram = (rawVertex, rawFragment, transformFeedbackAttribs) => {
            const gl = this.gl;
            var program = gl.createProgram();
            gl.attachShader(program, rawVertex);
            gl.attachShader(program, rawFragment);

            if (!(transformFeedbackAttribs === false)) {
                gl.transformFeedbackVaryings(program, [transformFeedbackAttribs], gl.INTERLEAVED_ATTRIBS);
            }
            gl.linkProgram(program);
            var success = gl.getProgramParameter(program, gl.LINK_STATUS);
            if (success) {
              return program;
            }
            console.log(gl.getProgramInfoLog(program));
            gl.deleteProgram(program);
        }

        logAttributeLocations = (attributeName) => {
            const gl = this.gl;
            const attributeLocation = gl.getAttribLocation(this.program, attributeName);
            if (!(attributeName in this.attributeLocations)) {
                this.attributeLocations[attributeName] = attributeLocation;
            }
            return attributeLocation;
        }

        logUniformLocations = (uniformName) => {
            const gl = this.gl;
            const uniformLocation = gl.getUniformLocation(this.program, uniformName);
            if (!(uniformName in this.uniformLocations)) {
                this.uniformLocations[uniformName] = uniformLocation;
            }
            return uniformLocation;
        }

        activate = () => {
            const gl = this.gl;
            gl.useProgram(this.program);
        }

        deactivate = () => {
            const gl = this.gl;
            gl.useProgram(0);
        }

    }

    // the aim of this class is to build a buffer to be sent to the gpu
    class renderObject {
        constructor(gl) {
            this.gl = gl;
            this.vao = this.gl.createVertexArray();
            this.buffers = {};
        }

        addDataToShaderAttribute = (dataset, dataDimension, attributeLocation) => {
            const gl = this.gl;
            var attributeVboNumber = this.addDataToBuffer(dataset);
            gl.bindVertexArray(this.vao);
            gl.enableVertexAttribArray(attributeLocation);
            gl.vertexAttribPointer(attributeLocation, dataDimension, gl.FLOAT, false, 0, 0);
            return attributeVboNumber;
        }

        prepareDataForShaderUniform = (dataset) => {
            const gl = this.gl;
            var uniformVboNumber = this.addDataToBuffer(dataset);
            return uniformVboNumber;
        }

        addDataToBuffer = (dataset) => {
            const gl = this.gl;
            var vertexBuffer = gl.createBuffer();
            gl.bindBuffer(gl.ARRAY_BUFFER, vertexBuffer);
            gl.bufferData(gl.ARRAY_BUFFER, dataset, gl.STATIC_DRAW);
            var bufferNumber = Object.keys(this.buffers).length;
            this.buffers[bufferNumber] = vertexBuffer;
            return bufferNumber;
        }

        draw = (drawType, offset, dataLength) => {
            const gl = this.gl;
            gl.drawArrays(drawType, offset, dataLength);
        }

        calculateAndRetreive = (drawType, offset, dataLength) => {
            const gl = this.gl;
            var transformBuffer = gl.createBuffer();
            var emptyDataArray = new Float32Array(dataLength);
            gl.enable(gl.RASTERIZER_DISCARD);

            gl.bindBuffer(gl.TRANSFORM_FEEDBACK_BUFFER, transformBuffer);
            gl.bufferData(gl.TRANSFORM_FEEDBACK_BUFFER, emptyDataArray, gl.STATIC_READ);
            var bufferNumber = Object.keys(this.buffers).length;
            this.buffers[bufferNumber] = transformBuffer;
        
            gl.bindBufferBase(gl.TRANSFORM_FEEDBACK_BUFFER, 0, transformBuffer);
            gl.beginTransformFeedback(gl.POINTS);
            gl.drawArrays(gl.POINTS, offset, dataLength);
            gl.endTransformFeedback();
            var arrBuffer = emptyDataArray;
            gl.getBufferSubData(gl.TRANSFORM_FEEDBACK_BUFFER, 0, arrBuffer);
            this.callbackOnSync(this.returnBufferData, emptyDataArray);
        }

        callbackOnSync = (callback, param) => {
            const gl = this.gl;

            var fence = gl.fenceSync(gl.SYNC_GPU_COMMANDS_COMPLETE, 0);
            gl.flush();
            setTimeout(checkSync);

            function checkSync() {
                console.log(fence);
                const status = gl.clientWaitSync(fence, 0, 0);
                console.log(status);
                if (status == gl.CONDITION_SATISFIED) {
                    gl.deleteSync(fence);
                    return callback(param);
                } else {
                    return(setTimeout(checkSync));
                }
            }
        }

        returnBufferData = (arrBuffer) => {
            const gl = this.gl;

            gl.getBufferSubData(gl.TRANSFORM_FEEDBACK_BUFFER, 0, arrBuffer);
            console.log(arrBuffer);
            return arrBuffer;
        }

    }

    var testVertex = "#version 300 es\r\n\r\nin float a_position;\r\nout float o_position;\r\n\r\nvoid main() {\r\n    o_position = float(a_position + 5.0);\r\n}";

    var testFragment = "#version 300 es\r\nprecision mediump float;\r\n\r\nout vec4 o_FragColor;\r\n\r\nvoid main() {\r\n  o_FragColor = vec4(0.0);\r\n}";

    const gl = createGlContext();
    var positions = createRandomF32Array(1000);

    var t0 = performance.now();

    var testShader = new shaderProgram(gl, testVertex, testFragment, "o_position");
    var aPositionAttribute = testShader.logAttributeLocations("a_position");
    var uResolutionUniform = testShader.logUniformLocations("u_resolution");

    var pointsBuffer = new renderObject(gl);
    var dataBuffer = pointsBuffer.addDataToShaderAttribute(positions, 1, aPositionAttribute);

    gl.viewport(0, 0, gl.canvas.width, gl.canvas.height);
    gl.clearColor(0, 0, 0, 0);
    gl.clear(gl.COLOR_BUFFER_BIT);

    testShader.activate();
    var output = pointsBuffer.calculateAndRetreive(gl.TRIANGLES, 0, positions.length, testShader);

    var t1 = performance.now();
    console.log("GPU function took " + (t1 - t0) + " milliseconds.");

    console.log(output);

}());
<!DOCTYPE html>
<html lang="en">
    <meta charset="utf-8">
    <head>
        <title>Rollup Example</title>
    </head>

    <body>
    </body>

    <script src="../build/bundle.min.js"></script>
</html>

This gives the warning "GL_INVALID_OPERATION: Buffer is bound for transform feedback." and every value in the returned array is 0. The line causing the issue seems to be:

var fence = gl.fenceSync(gl.SYNC_GPU_COMMANDS_COMPLETE, 0)

, which seems to be interfering with the Transform Feedback. The checkSync function seems to work fine. My questions are 1) Where am I going wrong with this? 2) Is this a technique that could work for my use case with some tweaking, or do I need to try something different entirely?

bm13563
  • 688
  • 5
  • 18
  • Use a [debug helper](https://www.khronos.org/webgl/wiki/Debugging#Programmatically_Debugging_WebGL_applications) to call `gl.getError` after every WebGL command. I suspect your error is not related to the `gl.fenceSync`. – gman Jun 07 '20 at 23:35
  • Also you haven't posted enough code to find the issue. Your code works for me. Please post a **minimal** repo **in the question itself**. Suggest you use a [snippet](https://stackoverflow.blog/2014/09/16/introducing-runnable-javascript-css-and-html-code-snippets/). I was able to make a repo showing your code works by adding about 40 lines of boilerplate most of which were standard code for compiling shaders and linking programs – gman Jun 08 '20 at 05:52
  • point taken. i've replaced my code sample with a snippet that reproduces the error – bm13563 Jun 08 '20 at 08:27
  • The webgl debugging resources you provided have helped highlight a separate issue that I've now fixed, but unfortunately haven't identified the nominal problem. I also tried adding getError manually after all of my webGL commands to check. Like you said, it must be something to do with my implementation given that you've managed to get the callback working, but I'm really struggling to see what I've missed given that the array returns correctly (albeit with the fence warning) without the callback. – bm13563 Jun 08 '20 at 23:28
  • The code in your question needs to divide position.length by 2 when calling `calculateAndRetreive ` – gman Jun 08 '20 at 23:30
  • I'm sure I'm probably being thick, but if im passing 1000 1 dimensional floats to the shaders then isn't the number of points rendered 1000? – bm13563 Jun 08 '20 at 23:39
  • Oh, sorry, you fixed that bug 2 hours ago. When I looked at it last night you were passing 2 as the size to vertexAttribPointer – gman Jun 08 '20 at 23:45

1 Answers1

3

So I think this might be a bug in Chrome. Your code works on Mac Chrome but fails on Windows Chrome.

There is one bug where the code waits for CONDITION_SATISFIED but it is also possible for the status to be ALREADY_SIGNALED

A few notes:

  1. The code at the time I wrote this answer is calling getBufferSubData twice.

    The correct thing to do is call it after the fence passes, not before. The warning is related to calling it before AFAICT.

  2. The timing code makes no sense.

    At the bottom the code does

    var t0 = performance.now();
    ...
    var output = pointsBuffer.calculateAndRetreive(...);
    var t1 = performance.now();
    console.log("GPU function took " + (t1 - t0) + " milliseconds.");
    console.log(output);
    

    pointsBuffer.calculateAndRetreive will always return immediately and output will always be undefined

  3. This is subjective but passing in a callback and a param to be used with it later looks like a C programmer using JavaScript. JavaScript has closures so there is arguably never a reason to pass in a parameter to be passed to a callback. The callback itself can always "close" over whatever variables it needs. Like I said though it's a style issue so feel free to continue to do it the way you're doing it. I'm just pointing out it stuck out to me.

  4. The code passes a drawType to calculateAndRetreive but it's never used.

  5. As an example for the future, here is a minimal repo.

'use strict';

/* global document, setTimeout */

const canvas = document.createElement("canvas");
const gl = canvas.getContext("webgl2");

function compileShader(gl, shaderType, shaderSource) {
  const shader = gl.createShader(shaderType);
  gl.shaderSource(shader, shaderSource);
  gl.compileShader(shader);
  const success = gl.getShaderParameter(shader, gl.COMPILE_STATUS);
  if (success) {
    return shader;
  }
  throw new Error(gl.getShaderInfoLog(shader));
}

function createProgram(gl, rawVertex, rawFragment, transformFeedbackAttribs) {
  const program = gl.createProgram();
  gl.attachShader(program, compileShader(gl, gl.VERTEX_SHADER, rawVertex));
  gl.attachShader(program, compileShader(gl, gl.FRAGMENT_SHADER, rawFragment));
  if (transformFeedbackAttribs) {
    gl.transformFeedbackVaryings(program, [transformFeedbackAttribs], gl.INTERLEAVED_ATTRIBS);
  }
  gl.linkProgram(program);
  const success = gl.getProgramParameter(program, gl.LINK_STATUS);
  if (success) {
    return program;
  }
  throw new Error(gl.getProgramInfoLog(program));
}

const vertexShader = `#version 300 es
in float inputValue;
out float outputValue;
void main() {
  outputValue = inputValue * 2.0;
}`;

const fragmentShader = `#version 300 es
precision mediump float;
out vec4 dummy;
void main() {
  dummy = vec4(0.0);
}`;

const program = createProgram(gl, vertexShader, fragmentShader, ['outputValue']);
gl.useProgram(program);

const input = new Float32Array([11, 22, 33, 44]);
const vao = gl.createVertexArray();
gl.bindVertexArray(vao);
const vertexBuffer = gl.createBuffer();
gl.bindBuffer(gl.ARRAY_BUFFER, vertexBuffer);
gl.bufferData(gl.ARRAY_BUFFER, input, gl.STATIC_DRAW);
const inputLoc = gl.getAttribLocation(program, 'inputValue');
gl.enableVertexAttribArray(inputLoc);
gl.vertexAttribPointer(inputLoc, 1, gl.FLOAT, false, 0, 0);


const transformBuffer = gl.createBuffer();
gl.enable(gl.RASTERIZER_DISCARD);

gl.bindBuffer(gl.TRANSFORM_FEEDBACK_BUFFER, transformBuffer);
gl.bufferData(gl.TRANSFORM_FEEDBACK_BUFFER, input.length * 4, gl.STATIC_READ);

gl.bindBufferBase(gl.TRANSFORM_FEEDBACK_BUFFER, 0, transformBuffer);
gl.beginTransformFeedback(gl.POINTS);
gl.drawArrays(gl.POINTS, 0, input.length);
gl.endTransformFeedback();

const fence = gl.fenceSync(gl.SYNC_GPU_COMMANDS_COMPLETE, 0);
gl.flush();
log('waiting...');

setTimeout(waitForResult);
function waitForResult() {
  const status = gl.clientWaitSync(fence, 0, 0);
  if (status === gl.CONDITION_SATISFIED || status === gl.ALREADY_SIGNALED) {
    gl.deleteSync(fence);
    const output = new Float32Array(input.length);
    gl.getBufferSubData(gl.TRANSFORM_FEEDBACK_BUFFER, 0, output);
    log(output);
  } else {
    setTimeout(waitForResult);
  }
}

function log(...args) {
  const elem = document.createElement('pre');
  elem.textContent = args.join(' ');
  document.body.appendChild(elem);
}

Update

If you want the code to work I suggest you use a transformfeedback object. A transformfeedback object is just like a vertex array object except for outputs instead of inputs. A vertex array object contains all the attribute settings (the settings set with gl.vertexAttribPointer, and gl.enableVertexAttribArray, etc.). A transformfeedback object contains all the varying output settings (the settings set with gl.bindBufferBase and gl.bindBufferRange)

The current issue comes from ambiguous language in the spec about using buffers when they are bound for transform feedback.

You can unbind them, in your case call gl.bindBufferBase with null on index 0. Or you can store them in a transformfeedback object and then unbind that object. The reason using a transformfeedback object is recommended is because it holds more state. If you had 4 bounds bound you can unbind them all by just unbinding the transformfeedback object they are bound to (1 call) where as binding null with gl.bindBufferBase/gl.bindBufferRange it would be 4 calls.

'use strict';

/* global document, setTimeout */

const canvas = document.createElement("canvas");
const gl = canvas.getContext("webgl2");

function compileShader(gl, shaderType, shaderSource) {
  const shader = gl.createShader(shaderType);
  gl.shaderSource(shader, shaderSource);
  gl.compileShader(shader);
  const success = gl.getShaderParameter(shader, gl.COMPILE_STATUS);
  if (success) {
    return shader;
  }
  throw new Error(gl.getShaderInfoLog(shader));
}

function createProgram(gl, rawVertex, rawFragment, transformFeedbackAttribs) {
  const program = gl.createProgram();
  gl.attachShader(program, compileShader(gl, gl.VERTEX_SHADER, rawVertex));
  gl.attachShader(program, compileShader(gl, gl.FRAGMENT_SHADER, rawFragment));
  if (transformFeedbackAttribs) {
    gl.transformFeedbackVaryings(program, [transformFeedbackAttribs], gl.INTERLEAVED_ATTRIBS);
  }
  gl.linkProgram(program);
  const success = gl.getProgramParameter(program, gl.LINK_STATUS);
  if (success) {
    return program;
  }
  throw new Error(gl.getProgramInfoLog(program));
}

const vertexShader = `#version 300 es
in float inputValue;
out float outputValue;
void main() {
  outputValue = inputValue * 2.0;
}`;

const fragmentShader = `#version 300 es
precision mediump float;
out vec4 dummy;
void main() {
  dummy = vec4(0.0);
}`;

const program = createProgram(gl, vertexShader, fragmentShader, ['outputValue']);
gl.useProgram(program);

const input = new Float32Array([11, 22, 33, 44]);
const vao = gl.createVertexArray();
gl.bindVertexArray(vao);
const vertexBuffer = gl.createBuffer();
gl.bindBuffer(gl.ARRAY_BUFFER, vertexBuffer);
gl.bufferData(gl.ARRAY_BUFFER, input, gl.STATIC_DRAW);
const inputLoc = gl.getAttribLocation(program, 'inputValue');
gl.enableVertexAttribArray(inputLoc);
gl.vertexAttribPointer(inputLoc, 1, gl.FLOAT, false, 0, 0);


const transformBuffer = gl.createBuffer();
gl.enable(gl.RASTERIZER_DISCARD);

const tf = gl.createTransformFeedback();
gl.bindTransformFeedback(gl.TRANSFORM_FEEDBACK, tf);

gl.bindBuffer(gl.TRANSFORM_FEEDBACK_BUFFER, transformBuffer);
gl.bufferData(gl.TRANSFORM_FEEDBACK_BUFFER, input.length * 4, gl.STATIC_READ);
gl.bindBuffer(gl.TRANSFORM_FEEDBACK_BUFFER, null);

gl.bindBufferBase(gl.TRANSFORM_FEEDBACK_BUFFER, 0, transformBuffer);
gl.beginTransformFeedback(gl.POINTS);
gl.drawArrays(gl.POINTS, 0, input.length);
gl.endTransformFeedback();
gl.bindTransformFeedback(gl.TRANSFORM_FEEDBACK, null);

const fence = gl.fenceSync(gl.SYNC_GPU_COMMANDS_COMPLETE, 0);
gl.flush();
log('waiting...');

setTimeout(waitForResult);
function waitForResult() {
  const status = gl.clientWaitSync(fence, 0, 0);
  if (status === gl.CONDITION_SATISFIED || status === gl.ALREADY_SIGNALED) {
    gl.deleteSync(fence);
    const output = new Float32Array(input.length);
    gl.bindBuffer(gl.ARRAY_BUFFER, transformBuffer);
    gl.getBufferSubData(gl.ARRAY_BUFFER, 0, output);
    log(output);
  } else {
    setTimeout(waitForResult);
  }
}

function log(...args) {
  const elem = document.createElement('pre');
  elem.textContent = args.join(' ');
  document.body.appendChild(elem);
}

Note that just like there is a default vertex array object, the one that's bound originally and re-bound with calling gl.bindVertexArray(null), so to is there a default transformfeedback object.

you might find this helpful in seeing the various objects and their state

gman
  • 100,619
  • 31
  • 269
  • 393
  • Thanks for the pointers and for investigating; I've taken plenty away from this despite it potentially being a bug. Also, I don't know if it's relevant, but behaviour is similar on Firefox – bm13563 Jun 09 '20 at 05:55
  • For me on firefox it worked once I added a check for `ALREADY_SIGNALED` and removed the `getBufferSubData` call that was happening before waiting for the fence. Is that not what you're seeing? – gman Jun 09 '20 at 05:58
  • You're right, it works on Firefox with the changes you suggested - had just woken up and was going off my testing last night. Thanks – bm13563 Jun 09 '20 at 09:04
  • Both of those suggestions work great on Chrome now, thanks – bm13563 Jun 10 '20 at 09:12