I wanted to create a matrix multiplication with simd. Everything is fine, when matrix is filled with some integers. But there are some issues when my matrices are filled with floating point values. The results are not quite correct. Here is my matrix representation:
union mat4
{
struct
{
float E11, E12, E13, E14;
float E21, E22, E23, E24;
float E31, E32, E33, E34;
float E41, E42, E43, E44;
};
struct
{
vec4 Line0;
vec4 Line1;
vec4 Line2;
vec4 Line3;
};
vec4 Lines[4];
float E[4][4];
float V[16];
__m128 I[4];
};
And my multiplication implementation:
inline mat4
operator*(const mat4& lhs, const mat4& rhs)
{
mat4 res = {};
__m128 v0 = {};
__m128 v1 = {};
__m128 v2 = {};
__m128 v3 = {};
for(int idx = 0; idx < 4; ++idx)
{
v0 = _mm_set1_ps(lhs.V[0+idx*4]);
v1 = _mm_set1_ps(lhs.V[1+idx*4]);
v2 = _mm_set1_ps(lhs.V[2+idx*4]);
v3 = _mm_set1_ps(lhs.V[3+idx*4]);
res.I[idx] = _mm_fmadd_ps(rhs.I[0], v0, res.I[idx]);
res.I[idx] = _mm_fmadd_ps(rhs.I[1], v1, res.I[idx]);
res.I[idx] = _mm_fmadd_ps(rhs.I[2], v2, res.I[idx]);
res.I[idx] = _mm_fmadd_ps(rhs.I[3], v3, res.I[idx]);
}
return res;
}
I don’t think the issue is with the data alignment and everything is being filled up correctly. But the results are way off. I’ll be thankful for any help here.