I thought memory access would be faster than the multiplication and division (although compiler-optimized) done with alpha blending. But it wasn't as fast as expected.
The 16 megabytes used for the table is not an issue in this case. But it is a problem if table lookup could even be slower than doing all the CPU calculations.
Can anyone explain to me why and what is happening? Will the table lookup beat out with a slower CPU?
#include <stdio.h>
#include <stdlib.h>
#include <limits.h>
#include <time.h>
#define COLOR_MAX UCHAR_MAX
typedef unsigned char color;
color (*blending_table)[COLOR_MAX + 1][COLOR_MAX + 1];
static color blend(unsigned int destination, unsigned int source, unsigned int a) {
return (source * a + destination * (COLOR_MAX - a)) / COLOR_MAX;
}
void initialize_blending_table(void) {
int destination, source, a;
blending_table = malloc((COLOR_MAX + 1) * sizeof *blending_table);
for (destination = 0; destination <= COLOR_MAX; ++destination) {
for (source = 0; source <= COLOR_MAX; ++source) {
for (a = 0; a <= COLOR_MAX; ++a) {
blending_table[destination][source][a] = blend(destination, source, a);
}
}
}
}
struct timer {
double start;
double end;
};
void timer_start(struct timer *self) {
self->start = clock();
}
void timer_end(struct timer *self) {
self->end = clock();
}
double timer_measure_in_seconds(struct timer *self) {
return (self->end - self->start) / CLOCKS_PER_SEC;
}
#define n 300
int main(void) {
struct timer timer;
volatile int i, j, k, l, m;
timer_start(&timer);
initialize_blending_table();
timer_end(&timer);
printf("init %f\n", timer_measure_in_seconds(&timer));
timer_start(&timer);
for (i = 0; i <= n; ++i) {
for (j = 0; j <= COLOR_MAX; ++j) {
for (k = 0; k <= COLOR_MAX; ++k) {
for (l = 0; l <= COLOR_MAX; ++l) {
m = blending_table[j][k][l];
}
}
}
}
timer_end(&timer);
printf("table %f\n", timer_measure_in_seconds(&timer));
timer_start(&timer);
for (i = 0; i <= n; ++i) {
for (j = 0; j <= COLOR_MAX; ++j) {
for (k = 0; k <= COLOR_MAX; ++k) {
for (l = 0; l <= COLOR_MAX; ++l) {
m = blend(j, k, l);
}
}
}
}
timer_end(&timer);
printf("function %f\n", timer_measure_in_seconds(&timer));
return EXIT_SUCCESS;
}
result
$ gcc test.c -O3
$ ./a.out
init 0.034328
table 14.176643
function 14.183924