0
void div_tl_128(unsigned char* data_mat, int b, int Matrix_Size) 
{
    int k = 0;
    int count = Matrix_Size >> Bytes_Shift;
    if(count == 0)
            return;
    __asm__ __volatile__(

                "lsl r1, #4                         \n"
                "vld1.8 {d0-d1}, [r5]                       \n"
                "vld1.8 {d2-d3}, [r3,+r1]                   \n"
                "vld1.8 {d4-d5}, [r4,+r1]                   \n"

                "l2:                                        \n"
                "vld1.8 {d6-d7}, [r0]                       \n"
                "vld1.8 {d8-d9}, [r0]                       \n"
                "vshr.u8 q3, #4                             \n"
                "vand.u8 q4, q0                             \n"
                "vtbl.u8 d6, {q1}, d6                       \n"
                "vtbl.u8 d7, {q1}, d7                       \n"
                "vtbl.u8 d8, {q2}, d8                       \n"
                "vtbl.u8 d9, {q2}, d9                       \n"
                "veor.u8 q3, q3, q4                         \n"
                "vst1.8 {d6-d7}, [r0]                       \n"
                "adds r0, r0, #16                           \n"
                "subs r2, r2, #1                            \n"
                "bne l2                                     \n"
                :
                : "r"(data_mat), "r"(b), "r"(count), "r"(div_result_high2), "r"(div_result_low2), "r"(mask)
                : 
        );
}

div_result_high2 and low2 table are declared in other header file. mask is an array of 128-bit and each 8-bit element is 0x0f

This code should do a table look up division, but it doesn't work. Where am I going wrong?

Deduplicator
  • 44,692
  • 7
  • 66
  • 118
  • 2
    To start with how do you know on which register `div_result_high2` ends up? You should do some more study. http://www.ethernut.de/en/documents/arm-inline-asm.html – auselen Apr 30 '14 at 13:45
  • Ie, you need `vld1.8 {d0-d1}, [%0]`, etc. and add d0-d9 to the clobber list. The `%0` tells GCC to provide a register with the `data_mat` pointer. Shuffling things in registers always looses time and this is the main benefit of *inline assembler*. You might also consider `1:` and `bne 1b` instead of the *l2* label. – artless noise Apr 30 '14 at 14:46
  • You are also corrupting cc without telling the compiler. – Jake 'Alquimista' LEE May 08 '14 at 00:15

0 Answers0