I'm currently writing a program for YUV420SP => RGB/BGR color space conversion, follow the floatint-point formula calculation, without any SIMD or multi-threading optimization.
The function's input data is unsigned char type, the finally result's type is also unsigned char type. But for the intermediate variables, the formula itself requires float type(the expressions in the right of the =
), but for the float => unsigned char conversion, there are two choices, one is using float r, g, b
the other is int r, g, b
:
unsigned char y = 223; // mock for getting y value
unsigned char u = 200; // mock for getting u value
unsigned char v = 200; // mock for getting v value
unsigned char* rgb0 = (unsigned char*)malloc(MAXN); // for finally result saving
// the YUV=>RGB color conversion
float r, g, b; // [!! choice1 !!] if using this line, code run slower
int r, g, b; // [!! choice2 !!] if using this line, code run much faster
y = std::max(16, (int)y_ptr0[0]);
r = 1.164 * (y - 16) + 1.596 * (v - 128);
g = 1.164 * (y - 16) - 0.813 * (v - 128) - 0.391 * (u - 128);
b = 1.164 * (y - 16) + 2.018 * (u - 128);
rgb0[2-b_idx] = saturate_ucast(r);
rgb0[1] = saturate_ucast(g);
rgb0[b_idx] = saturate_ucast(b);
rgb0 += 3;
What makes me confusing is, for the actual test (convert a width=7680x4320 image), the float r,g,b
is about much slower that using int r, g, b
, on both Linux x86 and Android ARMv8 platform
The full code for the color conversion is:
#include <limits.h>
inline uchar saturate_uchar(int v)
{
return (uchar)((unsigned int)v <= UCHAR_MAX ? v : v > 0 ? UCHAR_MAX : 0);
}
inline uchar saturate_uchar(float v)
{
int iv = round(v);
return saturate_uchar(iv);
}
template<int u_idx, int b_idx>
void yuv420sp2rgb_naive(
const uchar* y_plane, int height, int width, int y_linebytes,
const uchar* uv_plane, int uv_linebytes,
uchar* rgb, int rgb_linebytes,
const Option& opt
)
{
/// param checking
assert (y_plane!=NULL && uv_plane!=NULL && rgb!=NULL);
/// neon-specific param checking
assert (width>=2 && height>=2);
int w = width;
int h = height;
for (int i=0; i <= h-2; i+=2)
{
const unsigned char* y_ptr0 = y_plane + i * y_linebytes;
const unsigned char* y_ptr1 = y_ptr0 + y_linebytes;
unsigned char* rgb0 = rgb + i * rgb_linebytes;
unsigned char* rgb1 = rgb0+ rgb_linebytes;
const unsigned char* uv_ptr = uv_plane + (i/2) * uv_linebytes;
for (size_t j=0; j <= width-2; j += 2)
{
int y;
float r, g, b; // choice1
//int r, g, b; // choice2
// R = 1.164(Y - 16) + 1.596(V - 128)
// G = 1.164(Y - 16) - 0.813(V - 128) - 0.391(U - 128)
// B = 1.164(Y - 16) + 2.018(U - 128)
int u = uv_ptr[u_idx];
int v = uv_ptr[1 - u_idx];
// y00
y = std::max(16, (int)y_ptr0[0]);
r = 1.164 * (y - 16) + 1.596 * (v - 128);
g = 1.164 * (y - 16) - 0.813 * (v - 128) - 0.391 * (u - 128);
b = 1.164 * (y - 16) + 2.018 * (u - 128);
rgb0[2-b_idx] = saturate_uchar(r);
rgb0[1] = saturate_uchar(g);
rgb0[b_idx] = saturate_uchar(b);
rgb0 += 3;
// y01
y = std::max(16, (int)y_ptr0[1]);
r = 1.164 * (y - 16) + 1.596 * (v - 128);
g = 1.164 * (y - 16) - 0.813 * (v - 128) - 0.391 * (u - 128);
b = 1.164 * (y - 16) + 2.018 * (u - 128);
rgb0[2-b_idx] = saturate_uchar(r);
rgb0[1] = saturate_uchar(g);
rgb0[b_idx] = saturate_uchar(b);
rgb0 += 3;
// y10
y = std::max(16, (int)y_ptr1[0]);
r = 1.164 * (y - 16) + 1.596 * (v - 128);
g = 1.164 * (y - 16) - 0.813 * (v - 128) - 0.391 * (u - 128);
b = 1.164 * (y - 16) + 2.018 * (u - 128);
rgb1[2-b_idx] = saturate_uchar(r);
rgb1[1] = saturate_uchar(g);
rgb1[b_idx] = saturate_uchar(b);
rgb1 += 3;
// y11
y = std::max(16, (int)y_ptr1[1]);
r = 1.164 * (y - 16) + 1.596 * (v - 128);
g = 1.164 * (y - 16) - 0.813 * (v - 128) - 0.391 * (u - 128);
b = 1.164 * (y - 16) + 2.018 * (u - 128);
rgb1[2-b_idx] = saturate_uchar(r);
rgb1[1] = saturate_uchar(g);
rgb1[b_idx] = saturate_uchar(b);
rgb1 += 3;
y_ptr0 += 2;
y_ptr1 += 2;
uv_ptr += 2;
}
}
}
platform | choice | time cost |
---|---|---|
linux x64 | float r, g, b | 140 ms |
linux x64 | int r, g, b | 107 ms |
armv8 | float r, g, b | 152 ms |
armv8 | int r, g, b | 111 ms |
Question: why changing variable r,g,b
's type from float
to int
boost speed so much?