I read that when binding something to an std::function dynamic memory allocation may happen. Does rebinding this way cause any memory related problem?
That is true, std::function
can be inefficient. When unsure, inspect the generated assembly.
gcc 10.2 -O3
results: godbolt link (NB: clang results are similar)
main:
push r12
mov edi, 24
push rbp
sub rsp, 88
mov QWORD PTR [rsp+48], 0
mov BYTE PTR [rsp+64], 0
mov QWORD PTR [rsp+16], 0
call operator new(unsigned long)
mov QWORD PTR [rsp], rax
movdqa xmm0, XMMWORD PTR [rsp]
lea rbp, [rsp+32]
mov ecx, OFFSET FLAT:std::_Function_handler<void (), std::_Bind<void (Foo::*(Foo*))()> >::_M_manager(std::_Any_data&, std::_Any_data const&, std::_Manager_operation)
movdqa xmm1, XMMWORD PTR [rsp+32]
mov rdx, QWORD PTR [rsp+56]
mov QWORD PTR [rax+16], rbp
mov QWORD PTR [rax], OFFSET FLAT:Foo::f1()
mov QWORD PTR [rax+8], 0
mov rax, QWORD PTR [rsp+48]
movaps XMMWORD PTR [rsp+32], xmm0
movq xmm0, rcx
movhps xmm0, QWORD PTR .LC0[rip]
mov QWORD PTR [rsp+16], rax
mov QWORD PTR [rsp+24], rdx
movaps XMMWORD PTR [rsp], xmm1
movaps XMMWORD PTR [rsp+48], xmm0
test rax, rax
je .L48
mov edx, 3
mov rsi, rsp
mov rdi, rsp
call rax
jmp .L48
.L68:
test rax, rax
je .L67
mov rdi, rbp
call [QWORD PTR [rsp+56]]
.L48:
cmp BYTE PTR [rsp+64], 0
mov rax, QWORD PTR [rsp+48]
je .L68
test rax, rax
je .L59
mov edx, 3
mov rsi, rbp
mov rdi, rbp
call rax
.L59:
add rsp, 88
xor eax, eax
pop rbp
pop r12
ret
.L67:
call std::__throw_bad_function_call()
mov rbp, rax
jmp .L44
mov r12, rax
jmp .L52
main.cold:
.L44:
mov rax, QWORD PTR [rsp+16]
test rax, rax
je .L45
mov edx, 3
mov rsi, rsp
mov rdi, rsp
call rax
.L45:
mov rax, QWORD PTR [rsp+48]
test rax, rax
je .L47
lea rsi, [rsp+32]
mov edx, 3
mov rdi, rsi
call rax
.L47:
mov rdi, rbp
call _Unwind_Resume
.L52:
mov rax, QWORD PTR [rsp+48]
test rax, rax
je .L53
mov edx, 3
mov rsi, rbp
mov rdi, rbp
call rax
.L53:
mov rdi, r12
call _Unwind_Resume
So already we see dynamic allocation, exception handling, and Foo::f1
is not inlined...
Foo::f1():
push rbp
push rbx
mov rbx, rdi
mov edi, 24
sub rsp, 40
mov QWORD PTR [rsp+16], 0
call operator new(unsigned long)
mov QWORD PTR [rsp], rax
movdqa xmm0, XMMWORD PTR [rsp]
mov ecx, OFFSET FLAT:std::_Function_handler<void (), std::_Bind<void (Foo::*(Foo*))()> >::_M_manager(std::_Any_data&, std::_Any_data const&, std::_Manager_operation)
movdqu xmm1, XMMWORD PTR [rbx]
mov rdx, QWORD PTR [rbx+24]
mov QWORD PTR [rax+16], rbx
mov QWORD PTR [rax], OFFSET FLAT:Foo::f2()
mov QWORD PTR [rax+8], 0
mov rax, QWORD PTR [rbx+16]
movups XMMWORD PTR [rbx], xmm0
movq xmm0, rcx
movhps xmm0, QWORD PTR .LC0[rip]
mov QWORD PTR [rsp+16], rax
mov QWORD PTR [rsp+24], rdx
movaps XMMWORD PTR [rsp], xmm1
movups XMMWORD PTR [rbx+16], xmm0
test rax, rax
je .L33
mov edx, 3
mov rsi, rsp
mov rdi, rsp
call rax
.L33:
call do_actual_work()
mov BYTE PTR [rbx+32], al
add rsp, 40
pop rbx
pop rbp
ret
mov rbp, rax
mov rax, QWORD PTR [rsp+16]
test rax, rax
je .L35
mov edx, 3
mov rsi, rsp
mov rdi, rsp
call rax
.L35:
mov rdi, rbp
call _Unwind_Resume
Yikes, again a memory allocation, conditional jumps, dynamic dispatch and exception handling...
Foo::f2():
push rbp
push rbx
mov rbx, rdi
mov edi, 24
sub rsp, 40
mov QWORD PTR [rsp+16], 0
call operator new(unsigned long)
mov QWORD PTR [rsp], rax
movdqa xmm0, XMMWORD PTR [rsp]
mov ecx, OFFSET FLAT:std::_Function_handler<void (), std::_Bind<void (Foo::*(Foo*))()> >::_M_manager(std::_Any_data&, std::_Any_data const&, std::_Manager_operation)
movdqu xmm1, XMMWORD PTR [rbx]
mov rdx, QWORD PTR [rbx+24]
mov QWORD PTR [rax+16], rbx
mov QWORD PTR [rax], OFFSET FLAT:Foo::f3()
mov QWORD PTR [rax+8], 0
mov rax, QWORD PTR [rbx+16]
movups XMMWORD PTR [rbx], xmm0
movq xmm0, rcx
movhps xmm0, QWORD PTR .LC0[rip]
mov QWORD PTR [rsp+16], rax
mov QWORD PTR [rsp+24], rdx
movaps XMMWORD PTR [rsp], xmm1
movups XMMWORD PTR [rbx+16], xmm0
test rax, rax
je .L23
mov edx, 3
mov rsi, rsp
mov rdi, rsp
call rax
.L23:
call do_actual_work()
mov BYTE PTR [rbx+32], al
add rsp, 40
pop rbx
pop rbp
ret
mov rbp, rax
mov rax, QWORD PTR [rsp+16]
test rax, rax
je .L25
mov edx, 3
mov rsi, rsp
mov rdi, rsp
call rax
.L25:
mov rdi, rbp
call _Unwind_Resume
Again, exact same story.
Foo::f3():
push rbx
mov rbx, rdi
call do_actual_work()
mov BYTE PTR [rbx+32], 1
pop rbx
ret
OK much better, not much to see here.
Now let's try it without std::function
...
bool do_actual_work();
class Foo
{
public:
Foo() { }
bool f1()
{
return do_actual_work();
}
bool f2()
{
return do_actual_work();
}
bool f3()
{
return do_actual_work();
}
void execute()
{
if (f1())
return;
if (f2())
return;
f3();
}
};
int main()
{
Foo f;
f.execute();
}
Generated assembly: godbolt link
main:
sub rsp, 8
call do_actual_work()
test al, al
je .L7
.L3:
xor eax, eax
add rsp, 8
ret
.L7:
call do_actual_work()
test al, al
jne .L3
call do_actual_work()
jmp .L3
That's the entire program!
Moral of the story: do not use std::function
unless it's unavoidable. It's unavoidable for example when you're writing a shared library and you need to take a user-provided callback in a generic way.