If you need to use fixed-point, then you must implement addition and multiplication operations. In that case, you need to worry about how many bits you have allocated for the fractional part and how many bits allocated for the integer part. And then you can do "shift" operation as your preference.
In the following code-snippet, I've implemented fixed-point by allocating 22 bits for the fractional part and 9 bits for the integer part. (additional bit will be for the sign)
In multiplication, I've first expanded the bit-length of each value to avoid overflow. After multiplication, left shift will happen to keep the same fractional part for the output of multiplication.
In addition, I've added saturation for the output, in order to avoid any overflow (if overflow happens, then output will keep the maximum absolute value that it can keep irrespective of the sign)
#include <stdio.h>
#include <math.h>
#include <stdint.h>
#define fractional_bits 22
#define fixed_type_bits 32
typedef int32_t fixed_type;
typedef int64_t expand_type;
fixed_type float_to_fixed(float inp)
{
return (fixed_type)(inp * (1 << fractional_bits));
}
float fixed_to_float(fixed_type inp)
{
return ((float)inp) / (1 << fractional_bits);
}
fixed_type fixed_mult(fixed_type inp_1, fixed_type inp_2)
{
return (fixed_type)(((expand_type)inp_1 * (expand_type)inp_2) >> fractional_bits);
}
fixed_type fixed_add(fixed_type inp_1, fixed_type inp_2)
{
fixed_type inp_1_sign = inp_1 >> (fixed_type_bits - 1);
fixed_type inp_2_sign = inp_2 >> (fixed_type_bits - 1);
fixed_type add = inp_1 + inp_2;
fixed_type add_sign = add >> (fixed_type_bits - 1);
if (inp_1_sign != inp_2_sign)
{
return add;
}
else if (add_sign == inp_1_sign)
{
return add;
}
else if (add_sign == -1)
{
return ((1 << (fixed_type_bits - 2)) - 1 + (1 << (fixed_type_bits - 2)));
}
else if (add_sign == 1)
{
return (1 << (fixed_type_bits - 1));
}
}