I am using Visual Studio 2008 C++ for Windows Mobile 6 ARMV4I and I'm trying to learn to read the ARM assembly code generated by VS to minimize unneessary buffer copies within an application. So, I've created a test application that looks like this:
#include <vector>
typedef std::vector< BYTE > Buf;
class Foo
{
public:
Foo( Buf b ) { b_.swap( b ); };
private:
Buf b_;
};
Buf Create()
{
Buf b( 1024 );
b[ 0 ] = 0x0001;
return b;
}
int _tmain( int argc, _TCHAR* argv[] )
{
Foo f( Create() );
return 0;
}
I'd like to understand if the buffer returned by Create
is copied when given to the Foo
constructor or if the compiler is able to optimize that copy away. In the Release build with optimizations turned on, this generates assembly like this:
class Foo
{
public:
Foo( Buf b ) { b_.swap( b ); };
0001112C stmdb sp!, {r4 - r7, lr}
00011130 mov r7, r0
00011134 mov r3, #0
00011138 str r3, this
0001113C str r3, [r7, #4]
00011140 str r3, [r7, #8]
00011144 ldr r3, this
00011148 ldr r2, this
0001114C mov r5, r7
00011150 mov r4, r1
00011154 str r3, this, #4
00011158 str r2, this, #4
0001115C mov r6, r1
00011160 ldr r2, this
00011164 ldr r3, this
00011168 mov lr, r7
0001116C str r3, this
00011170 str r2, this
00011174 ldr r2, [lr, #8]!
00011178 ldr r3, [r6, #8]!
0001117C str r3, this
00011180 str r2, this
00011184 ldr r3, this
00011188 movs r0, r3
0001118C beq |Foo::Foo + 0x84 ( 111b0h )|
00011190 ldr r3, [r1, #8]
00011194 sub r1, r3, r0
00011198 cmp r1, #0x80
0001119C bls |Foo::Foo + 0x80 ( 111ach )|
000111A0 bl 000112D4
000111A4 mov r0, r7
000111A8 ldmia sp!, {r4 - r7, pc}
000111AC bl |stlp_std::__node_alloc::_M_deallocate ( 11d2ch )|
000111B0 mov r0, r7
000111B4 ldmia sp!, {r4 - r7, pc}
--- ...\stlport\stl\_vector.h -----------------------------
// snip!
--- ...\asm_test.cpp
private:
Buf b_;
};
Buf Create()
{
00011240 stmdb sp!, {r4, lr}
00011244 mov r4, r0
Buf b( 1024 );
00011248 mov r1, #1, 22
0001124C bl |
b[ 0 ] = 0x0001;
00011250 ldr r3, [r4]
00011254 mov r2, #1
return b;
}
int _tmain( int argc, _TCHAR* argv[] )
{
00011264 str lr, [sp, #-4]!
00011268 sub sp, sp, #0x18
Foo f( Create() );
0001126C add r0, sp, #0xC
00011270 bl |Create ( 11240h )|
00011274 mov r1, r0
00011278 add r0, sp, #0
0001127C bl |Foo::Foo ( 1112ch )|
return 0;
00011280 ldr r0, argc
00011284 cmp r0, #0
00011288 beq |wmain + 0x44 ( 112a8h )|
0001128C ldr r3, [sp, #8]
00011290 sub r1, r3, r0
00011294 cmp r1, #0x80
00011298 bls |wmain + 0x40 ( 112a4h )|
0001129C bl 000112D4
000112A0 b |wmain + 0x44 ( 112a8h )|
000112A4 bl |stlp_std::__node_alloc::_M_deallocate ( 11d2ch )|
000112A8 mov r0, #0
}
What patterns can I look for in the assembly code to understand where the Buf
structure is being copied?