I want to write a shader to analyze and transform video data in YUY2 format. Since shaders do not support YUY2 format textures, I'm using different views for the shader. The input texture is a DXGI_FORMAT_R8G8_B8G8_UNORM
shader resource view, and the output texture is a DXGI_FORMAT_R32_UINT
unordered access view to allow for writing.
The problem I have is that writes to the output texture don't do anything. I've simplified my compute shader down to just writing a constant value to the output texture, but no matter what value I write, the output texture is just solid green.
Here's my setup code:
CComPtr<ID3D11DeviceContext4> context;
{
CComPtr<ID3D11DeviceContext> c;
m_Device->GetImmediateContext(&c);
ThrowHR(c.QueryInterface(&context));
}
// Create texture & view for input frame
CComPtr<ID3D11Texture2D> frameTexture;
CComPtr<ID3D11ShaderResourceView> frameView;
{
D3D11_TEXTURE2D_DESC frameDesc = {};
frameDesc.ArraySize = 1;
frameDesc.BindFlags = D3D11_BIND_SHADER_RESOURCE;
frameDesc.CPUAccessFlags = 0;
frameDesc.Height = frame.Height;
frameDesc.Width = frame.Width;
frameDesc.MipLevels = 1;
frameDesc.MiscFlags = 0;
frameDesc.SampleDesc.Count = 1;
frameDesc.SampleDesc.Quality = 0;
frameDesc.Usage = D3D11_USAGE_IMMUTABLE;
frameDesc.Format = DXGI_FORMAT_YUY2;
D3D11_SUBRESOURCE_DATA frameInit = {};
frameInit.pSysMem = frame.Data;
frameInit.SysMemPitch = UINT(frame.Width * 2);
frameInit.SysMemSlicePitch = (UINT)frame.Size;
ThrowHR(m_Device->CreateTexture2D(&frameDesc, &frameInit, &frameTexture));
D3D11_SHADER_RESOURCE_VIEW_DESC frameViewDesc = {};
frameViewDesc.Format = DXGI_FORMAT_R8G8_B8G8_UNORM; // Analog to YUY2 which is supported by SRVs
frameViewDesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D;
frameViewDesc.Texture2D = { 0, 1 };
ThrowHR(m_Device->CreateShaderResourceView(frameTexture, &frameViewDesc, &frameView));
}
// Create texture & view for output frame
CComPtr<ID3D11Texture2D> outputTexture;
CComPtr<ID3D11UnorderedAccessView> outputView;
{
D3D11_TEXTURE2D_DESC outputDesc = {};
outputDesc.ArraySize = 1;
outputDesc.BindFlags = D3D11_BIND_UNORDERED_ACCESS | D3D11_BIND_SHADER_RESOURCE;
outputDesc.CPUAccessFlags = 0;
outputDesc.Height = frame.Height;
outputDesc.Width = frame.Width;
outputDesc.MipLevels = 1;
outputDesc.MiscFlags = 0;
outputDesc.SampleDesc.Count = 1;
outputDesc.SampleDesc.Quality = 0;
outputDesc.Usage = D3D11_USAGE_DEFAULT;
outputDesc.Format = DXGI_FORMAT_YUY2;
ThrowHR(m_Device->CreateTexture2D(&outputDesc, nullptr, &outputTexture));
D3D11_UNORDERED_ACCESS_VIEW_DESC uavDesc = {};
uavDesc.Format = DXGI_FORMAT_R32_UINT; // Supported Read/Write format for UAV
uavDesc.ViewDimension = D3D11_UAV_DIMENSION_TEXTURE2D;
uavDesc.Texture2D = { 0 };
ThrowHR(m_Device->CreateUnorderedAccessView(outputTexture, &uavDesc, &outputView));
}
// Bind shader resources
context->CSSetShader(m_ComputeShader, nullptr, 0);
ID3D11Buffer *const cbufs[] = { m_ParamsCBuffer };
context->CSSetConstantBuffers(0, ARRAYSIZE(cbufs), cbufs);
ID3D11UnorderedAccessView *const uavs[] = { m_StatsView, outputView };
context->CSSetUnorderedAccessViews(0, ARRAYSIZE(uavs), uavs, nullptr);
ID3D11ShaderResourceView *const srvs[] = { frameView };
context->CSSetShaderResources(0, ARRAYSIZE(srvs), srvs);
// Execute shader
context->Dispatch(frame.Width / 2, frame.Height, 1);
// Unbind shader
ZeroMemory((void*)cbufs, sizeof(cbufs));
context->CSSetConstantBuffers(0, ARRAYSIZE(cbufs), cbufs);
ZeroMemory((void*)uavs, sizeof(uavs));
context->CSSetUnorderedAccessViews(0, ARRAYSIZE(uavs), uavs, nullptr);
ZeroMemory((void*)srvs, sizeof(srvs));
context->CSSetShaderResources(0, ARRAYSIZE(srvs), srvs);
context->CSSetShader(nullptr, nullptr, 0);
And here's the shader:
// R8G8_B8G8_UNORM view into YUY2
Texture2D<unorm float4> Input : register(t0);
// R32_UINT view into YUY2
RWTexture2D<uint> Output : register(u1);
// Packs an X8Y8Z8W8_UNORM float4 into a uint
uint packFloat4(float4 unpacked)
{
uint output;
uint x = (uint)(clamp(unpacked.x, 0, 1) * 255) << 0;
uint y = (uint)(clamp(unpacked.y, 0, 1) * 255) << 8;
uint z = (uint)(clamp(unpacked.z, 0, 1) * 255) << 16;
uint w = (uint)(clamp(unpacked.w, 0, 1) * 255) << 24;
return x + y + z + w;
}
// Processes two adjacent pixels at a time due to YUY2 packing
[numthreads(1, 1, 1)]
void main(uint3 gid : SV_GroupID)
{
unorm float4 outPixel = float4(0.5, 0.7, 0.3, 1.0);
uint outPacked = packFloat4(outPixel);
Output[gid.xy] = outPacked;
}