I'm trying to use the IIR filter from application note AVR223, which is written in the IAR dialect of AVR assembly. The main reason being that in C you can only do full 32*32=32 bits multiplications. So I'm especially interested in the following macros:
MUL_MOV_24 MACRO
// Multiply (signed) high bytes of both coefficient and sample work registers.
// Then copy the resulting low byte into the accumulator's high byte
// (sign bit will be correct).
muls COEFH, DATAH
mov AC2, R0
// Multiply (unsigned) low bytes of the coefficient and sample work registers.
// Copy the resulting high & low byte to the accumulator's low & middle bytes.
mul COEFL, DATAL
mov AC0, R0
mov AC1, R1
// Multiply (signed-unsigned) high coefficient byte and low sample byte.
// Add resulting low byte to accumulator's middle byte. (May generate carry!)
// Add, with carry, resulting high byte to accumulator's high byte.
mulsu COEFH, DATAL
add AC1, R0
adc AC2, R1
// Multiply (signed-unsigned) high sample byte and low coefficient byte.
// Add resulting low byte to accumulator's middle byte. (May generate carry!)
// Add, with carry, resulting high byte to accumulator's high byte.
mulsu DATAH, COEFL
add AC1, R0
adc AC2, R1
ENDM
// SMAC_24 does the same thing as MUL_MOV_24 except it adds to the accumulator
// from the start, instead of overwriting.
SMAC_24 MACRO
muls COEFH, DATAH
add AC2, R0
mul COEFL, DATAL
add AC0, R0
adc AC1, R1 // This may generate a carry..
adc AC2, ZERO // ..which must be added to accumulator's high byte!
mulsu COEFH, DATAL
add AC1, R0
adc AC2, R1
mulsu DATAH, COEFL
add AC1, R0
adc AC2, R1
ENDM
Which I tried to convert to the following functions:
int32_t mul_mov_24(int16_t coef, int16_t data) {
int32_t ac = 0;
asm (
"muls %B[COEF], %B[DATA] \n\t"
"mov %C[AC], r0 \n\t"
"mul %A[COEF], %A[DATA] \n\t"
"mov %A[AC], r0 \n\t"
"mov %B[AC], r1 \n\t"
"mulsu %B[COEF], %A[DATA] \n\t"
"add %B[AC], r0 \n\t"
"adc %C[AC], r1 \n\t"
"mulsu %B[DATA], %A[COEF] \n\t"
"add %B[AC], r0 \n\t"
"adc %C[AC], r1 \n\t"
: [AC] "=r" (ac)
: [COEF] "a" (coef),
[DATA] "a" (data)
: "r0", "r1");
return ac;
}
void smac_24(int32_t *ac, int16_t coef, int16_t data) {
asm (
"clr r2 \n\t"
"muls %B[COEF], %B[DATA] \n\t"
"add %C[AC], r0 \n\t"
"mul %A[COEF], %A[DATA] \n\t"
"add %A[AC], r0 \n\t"
"add %B[AC], r1 \n\t"
"adc %C[AC], r2 \n\t"
"mulsu %B[COEF], %A[DATA] \n\t"
"add %B[AC], r0 \n\t"
"adc %C[AC], r1 \n\t"
"mulsu %B[DATA], %A[COEF] \n\t"
"add %B[AC], r0 \n\t"
"adc %C[AC], r1 \n\t"
: "=r" (*ac)
: [COEF] "a" (coef),
[DATA] "a" (data),
[AC] "0" (*ac)
: "r0", "r1", "r2");
}
However, I must be doing something silly, as depending on how I call them and in which order I get completely bullshit results, or even a reset. I have the feeling I'm doing something wrong with input, output and clobbering.
Links to the full code and application note: https://github.com/pepijndevos/accessibletuner/blob/master/tuner/iir.h http://www.microchip.com/wwwappnotes/appnotes.aspx?appnote=en592139