1

I'm writing inline x86 assembly code to copy the contents of an array defined in the C language to the x87 FPU stack to perform further operations. A value that I store on the top of the FPU stack is different when I retrieve it from the FPU stack.

I tried to look at the disassembly the compiler generated; referred to the Turbo C++ 3.0 manual; refereed to the 8086 to Pentium Assembly Programming text book but couldn't find a solution to this problem.

My code is:

#include<stdio.h>

void main()
{
    float array[10] = { 1.13,1.98,1.67,1.19},sum;
asm{
    lea ax,[array]
    push ax
    fld dword ptr[bp-8]
    fstp sum
}
printf("%f", sum);
}

It compiles with no errors, but when run I get -786.997 instead of the expected result of 1.67.

Michael Petch
  • 46,082
  • 8
  • 107
  • 198
Madhu
  • 55
  • 1
  • 5

1 Answers1

2

In this code:

    lea ax,[array]
    push ax
    fld dword ptr[bp-8]
    fstp sum

You are loading the address of array into AX, not the value. You then push the address of array in AX onto the stack. Your FLD instruction then attempts to read data from a fixed offset relative to BP. As @Jester points out, you shouldn't rely on data on the stack being a specific offset from BP as it depends on Turbo-C's code generator and where things are placed on the stack.

If you want to read the third element of an array you can load the address of the array and then access the addresses of individual elements. Load the address of array into either BX, SI, or DI registers as they can be used as a base in 16-bit addressing mode (AX can not).

Your code could have looked like:

#include<stdio.h>

void main()
{
    float array[] = { 1.13,1.98,1.67,1.19 }, sum;
    asm{
        lea bx,[array]      /* Load address of array into BX */
        fld dword ptr[bx+8] /* Load the value at 3rd element. Each float is 4 bytes
                               in 16-bit Turbo-C thus [bx+8] is the third element */
        fstp [sum]          /* Store top of stack ST(0) to SUM and pop top of stack */
    }
    printf("%f", sum);
}

Code that would sum a floating point array from highest to lowest array elements could look like:

#include<stdio.h>

void main()
{
    float array[] = { 1.13,1.98,1.67,1.19 }, sum;
    const int array_size_b = sizeof (array);
                            /* Size of array in bytes */
    asm {
        lea bx,[array]      /* Load address of array into BX */
        mov si, [array_size_b]
                            /* SI = byte offset to element just past end of array */
        fldz                /* Push an initial SUM value (0.0) on the FPU stack */
    }
    sumloop:
    asm {
        fadd dword ptr[bx+si-4]
                            /* Add current float to SUM on top of FPU stack */
        sub si, 4           /* Set index to previous float in array */
        jnz sumloop         /* If not start of array go back and process next element */

        fstp [sum]          /* Retrieve SUM from top of FPU stack&store in variable sum */
    }
    printf("%f", sum);
}

Processing the elements in reverse order simplifies the logic for checking if we have processed the entire array. It could have been done from first element to last with:

#include<stdio.h>

void main()
{
    float array[] = { 1.13,1.98,1.67,1.19 }, sum;
    const int array_size_b = sizeof (array);
    asm {
        lea bx,[array]       /* Load address of array into BX */
        xor si, si           /* SI = index into array = 0 = first element */
        mov cx, [array_size_b]
                             /* CX = byte offset of element just past end of array */
        fldz                 /* Push an initial SUM value (0.0) on the FPU stack */
    }
    sumloop:
    asm {
        fadd dword ptr[bx+si]/* Add the current float to SUM on top of FPU stack */
        add si, 4            /* Advance index to next float in array */
        cmp si, cx           /* Has the index reached the end of array? */
        jl sumloop           /* If not end of array go back and process next element */

        fstp [sum]           /* Retrieve SUM from top of FPU stack&store in variable sum */
    }
    printf("%f", sum);
}

Observations

There are effectively two types of stacks on processors with an x87 FPU (Floating Point Unit). The call stack that SS:SP points at and the x87 FPU register stack. If you push something onto the call stack the FPU stack instructions that pop the top element only pop from the FPU register stack. If you push something on the call stack with push ax you unbalance the call stack and you should consider re-balancing it when your inline assembly is finished. You could use pop ax to do that or add sp, 2.

Michael Petch
  • 46,082
  • 8
  • 107
  • 198