Assuming the following shader code, would it be more efficient to generate the other side of a triangle on the CPU: letting the GPU translate double the amount of vertices by the instance/worldmatrix, or to generate it on the GPU: copying the vertices in a different order and flipping the normal?
Interesting pieces of shader code:
struct VS_IN
{
float4 pos : POSITION;
float3 norm : NORMAL;
matrix instance : INSTANCEMATRIX;
float4 color : INSTANCECOLOR;
};
struct PS_IN
{
float4 pos : SV_POSITION;
float3 norm : NORMAL;
float4 color: COLOR;
};
cbuffer viewProj : register (b0)
{
matrix viewProj;
}
PS_IN VS(VS_IN input)
{
PS_IN output = (PS_IN)0;
output.pos = mul(mul(input.pos, input.instance), viewProj);
output.norm = normalize(mul(input.norm, (float3x3)input.instance));
output.color = input.color;
return output;
};
[maxvertexcount(6)]
void FlipFaceGS(triangle PS_IN input[3], inout TriangleStream<PS_IN> OutputStream)
{
OutputStream.Append(input[0]);
OutputStream.Append(input[1]);
OutputStream.Append(input[2]);
OutputStream.RestartStrip();
input[0].norm = -input[0].norm;
input[1].norm = -input[1].norm;
input[2].norm = -input[2].norm;
OutputStream.Append(input[0]);
OutputStream.Append(input[2]);
OutputStream.Append(input[1]);
OutputStream.RestartStrip();
}
technique10 Render
{
pass P0
{
SetVertexShader(CompileShader(vs_4_0, VS()));
SetGeometryShader(CompileShader(gs_4_0, FlipFaceGS()));
SetPixelShader(CompileShader(ps_4_0, PS()));
}
}