The compiler keeps track of all type information during translation, and it will generate the proper machine code to deal with data of different types or sizes.
Let's take the following code:
#include <stdio.h>
int main( void )
{
long long x, y, z;
x = 5;
y = 6;
z = x + y;
printf( "x = %ld, y = %ld, z = %ld\n", x, y, z );
return 0;
}
After running that through gcc -S, the assignment, addition, and print statements are translated to:
movq $5, -24(%rbp)
movq $6, -16(%rbp)
movq -16(%rbp), %rax
addq -24(%rbp), %rax
movq %rax, -8(%rbp)
movq -8(%rbp), %rcx
movq -16(%rbp), %rdx
movq -24(%rbp), %rsi
movl $.LC0, %edi
movl $0, %eax
call printf
movl $0, %eax
leave
ret
movq
is the mnemonic for moving values into 64-bit words ("quadwords"). %rax
is a general-purpose 64-bit register that's being used as an accumulator. Don't worry too much about the rest of it for now.
Now let's see what happens when we change those long
s to short
s:
#include <stdio.h>
int main( void )
{
short x, y, z;
x = 5;
y = 6;
z = x + y;
printf( "x = %hd, y = %hd, z = %hd\n", x, y, z );
return 0;
}
Again, we run it through gcc -S to generate the machine code, et voila:
movw $5, -6(%rbp)
movw $6, -4(%rbp)
movzwl -6(%rbp), %edx
movzwl -4(%rbp), %eax
leal (%rdx,%rax), %eax
movw %ax, -2(%rbp)
movswl -2(%rbp),%ecx
movswl -4(%rbp),%edx
movswl -6(%rbp),%esi
movl $.LC0, %edi
movl $0, %eax
call printf
movl $0, %eax
leave
ret
Different mnemonics - instead of movq
we get movw
and movswl
, we're using %eax
, which is the lower 32 bits of %rax
, etc.
Once more, this time with floating-point types:
#include <stdio.h>
int main( void )
{
double x, y, z;
x = 5;
y = 6;
z = x + y;
printf( "x = %f, y = %f, z = %f\n", x, y, z );
return 0;
}
gcc -S again:
movabsq $4617315517961601024, %rax
movq %rax, -24(%rbp)
movabsq $4618441417868443648, %rax
movq %rax, -16(%rbp)
movsd -24(%rbp), %xmm0
addsd -16(%rbp), %xmm0
movsd %xmm0, -8(%rbp)
movq -8(%rbp), %rax
movq -16(%rbp), %rdx
movq -24(%rbp), %rcx
movq %rax, -40(%rbp)
movsd -40(%rbp), %xmm2
movq %rdx, -40(%rbp)
movsd -40(%rbp), %xmm1
movq %rcx, -40(%rbp)
movsd -40(%rbp), %xmm0
movl $.LC2, %edi
movl $3, %eax
call printf
movl $0, %eax
leave
ret
New mnemonics (movsd
), new registers (%xmm0
).
So basically, after translation, there's no need to tag the data with type information; that type information is "baked in" to the machine code itself.