i just started learning assembly and making some custom loop for swapping two variables using C++ 's asm{} body with Digital-Mars compiler in C-Free 5.0
Enabled the -o (optimization)
And got the results:
time of for-loop(cycles) 844
time of while-loop(cycles) 735
time of custom-loop-1(cycles) 562
time of custom-loop-2(cycles) 469
i couldnt find Digital-Mars compiler "asm output" option to compare. There is no other optimisation options in the build options. Should i change my compiler? if yes, which one? Can you look at the codes below and tell me why custom loops are faster?
here is the standard for loop:
t1=clock();
for(int i=0;i<200000000;i++)
{
temp=a;//instruction 1
a=b;//instruction 2
b=temp;//3 instructions total
}
t2=clock();
printf("\n time of for-loop(increasing) %i \n",(t2-t1));
here is the standard while loop:
t1=clock();
while(j<200000000)
{
temp=a;//again it is three instructions
a=b;
b=temp;
j++;
}
t2=clock();
printf("\n time of while-loop(cycles) %i \n",(t2-t1));
here is my custom loop 1:
t1=clock();
j=200000000;//setting the count
__asm
{
pushf //backup
push eax //backup
push ebx //backup
push ecx //backup
push edx //backup
mov ecx,0 //init of loop range(0 to 200000000)
mov edx,j
do_it_again: //begin to loop
mov eax,a //basic swap steps between cpu and mem(cache)
mov ebx,b
mov b,eax
mov a,ebx //four instructions total
inc ecx // j++
cmp ecx,edx //i<200000000 ?
jb do_it_again // end of loop block
pop edx //rolling back to history
pop ecx
pop ebx
pop eax
popf
}
t2=clock();
printf("\n time of custom-loop-1(cycles) %i \n",(t2-t1));
here is my second custom loop:
t1=clock();
j=200000000;//setting the count
__asm
{
pushf //backup
push eax
push ebx
push ecx
push edx
mov ecx,0 //init of loop range(0 to 200000000)
mov edx,j
mov eax,a //getting variables to registers
mov ebx,b
do_it_again2: //begin to loop
//swapping with using only 2 variables(only in cpu)
sub eax,ebx //a is now a-b
add ebx,eax //b is now a
sub eax,ebx //a is now -b
xor eax,80000000h //a is now b and four instructions total
inc ecx // j++
cmp ecx,edx //i<200000000 ?
jb do_it_again2 // end of loop block
pop edx //rollback
pop ecx
pop ebx
pop eax
popf
}
t2=clock();
printf("\n time of custom-loop-2(cycles) %i \n",(t2-t1));
full code:
#include<stdio.h>
#include<stdlib.h>
#include<time.h>
int main()
{
int j=0;
int a=0,b=0,temp=0;
srand(time(0));
time_t t1=0;
time_t t2=0;
t1=clock();
for(int i=0;i<200000000;i++)
{
temp=a;//instruction 1
a=b;//instruction 2
b=temp;//3 instructions total
}
t2=clock();
printf("\n time of for-loop(cycles) %i \n",(t2-t1));
t1=clock();
while(j<200000000)
{
temp=a;//again it is three instructions
a=b;
b=temp;
j++;
}
t2=clock();
printf("\n time of while-loop(cycles) %i \n",(t2-t1));
t1=clock();
j=200000000;//setting the count
__asm
{
pushf //backup
push eax //backup
push ebx //backup
push ecx //backup
push edx //backup
mov ecx,0 //init of loop range(0 to 200000000)
mov edx,j
do_it_again: //begin to loop
mov eax,a //basic swap steps between cpu and mem(cache)
mov ebx,b
mov b,eax
mov a,ebx //four instructions total
inc ecx // j++
cmp ecx,edx //i<200000000 ?
jb do_it_again // end of loop block
pop edx //rolling back to history
pop ecx
pop ebx
pop eax
popf
}
t2=clock();
printf("\n time of custom-loop-1(cycles) %i \n",(t2-t1));
t1=clock();
j=200000000;//setting the count
__asm
{
pushf //backup
push eax
push ebx
push ecx
push edx
mov ecx,0 //init of loop range(0 to 200000000)
mov edx,j
mov eax,a //getting variables to registers
mov ebx,b
do_it_again2: //begin to loop
//swapping with using only 2 variables(only in cpu)
sub eax,ebx //a is now a-b
add ebx,eax //b is now a
sub eax,ebx //a is now -b
xor eax,80000000h //a is now b and four instructions total
inc ecx // j++
cmp ecx,edx //i<200000000 ?
jb do_it_again2 // end of loop block
pop edx //rollback
pop ecx
pop ebx
pop eax
popf
}
t2=clock();
printf("\n time of custom-loop-2(cycles) %i \n",(t2-t1));
return 0;
}
i am just learning c++ and assembly and wondered how things going on. Thank you
windows xp, pentium 4 (2 GHz) Digital-Mars in C-Free