Will having a "function tunnel" take a hit to performance?
Most likely not. Especially since you are dealing with templates here, and so the definitions will all be visible and easy for the compiler to inline. Take a look at this link: https://godbolt.org/g/cWR7N3
What I've done is compiled these two code snippets. First, calling the functions from the Node class.
#include <vector>
// these functions are not defined, so that the compiler
// cannot inline them or optimize them out
void insert_impl(void const*);
void erase_impl(void const*);
void erase_impl_vec(void const*);
template<typename T>
class PointerSet
{
public:
void insert(T& v) { insert_impl(&v); }
void erase(T& v) { erase_impl(&v); }
void erase(const std::vector<T>& v) {
erase_impl_vec(&v);
}
};
template<typename T>
class Node
{
PointerSet<T> Links;
public:
void insertLink(T& p){ Links.insert(p); }
void eraseLink(T& p){ Links.erase(p); }
void eraseLink(const std::vector<T>& p){ Links.erase(p); }
};
int main()
{
Node<int> n;
int x;
n.insertLink(x);
n.eraseLink(x);
std::vector<int> v;
n.eraseLink(v);
}
And then calling them directly from the PointerSet
class.
#include <vector>
// these functions are not defined, so that the compiler
// cannot inline them or optimize them out
void insert_impl(void const*);
void erase_impl(void const*);
void erase_impl_vec(void const*);
template<typename T>
class PointerSet
{
public:
void insert(T& v) { insert_impl(&v); }
void erase(T& v) { erase_impl(&v); }
void erase(const std::vector<T>& v) {
erase_impl_vec(&v);
}
};
int main()
{
PointerSet<int> n;
int x;
n.insert(x);
n.erase(x);
std::vector<int> v;
n.erase(v);
}
As you can see in the link (https://godbolt.org/g/cWR7N3), the compiler output identical assembly for each.
main: # @main
push rbx
sub rsp, 48
lea rbx, [rsp + 12]
mov rdi, rbx
call insert_impl(void const*)
mov rdi, rbx
call erase_impl(void const*)
xorps xmm0, xmm0
movaps xmmword ptr [rsp + 16], xmm0
mov qword ptr [rsp + 32], 0
lea rdi, [rsp + 16]
call erase_impl_vec(void const*)
mov rdi, qword ptr [rsp + 16]
test rdi, rdi
je .LBB0_3
call operator delete(void*)
.LBB0_3:
xor eax, eax
add rsp, 48
pop rbx
ret
mov rbx, rax
mov rdi, qword ptr [rsp + 16]
test rdi, rdi
je .LBB0_6
call operator delete(void*)
.LBB0_6:
mov rdi, rbx
call _Unwind_Resume
GCC_except_table0:
.byte 255 # @LPStart Encoding = omit
.byte 3 # @TType Encoding = udata4
.byte 41 # @TType base offset
.byte 3 # Call site Encoding = udata4
.byte 39 # Call site table length
.long .Lfunc_begin0-.Lfunc_begin0 # >> Call Site 1 <<
.long .Ltmp0-.Lfunc_begin0 # Call between .Lfunc_begin0 and .Ltmp0
.long 0 # has no landing pad
.byte 0 # On action: cleanup
.long .Ltmp0-.Lfunc_begin0 # >> Call Site 2 <<
.long .Ltmp1-.Ltmp0 # Call between .Ltmp0 and .Ltmp1
.long .Ltmp2-.Lfunc_begin0 # jumps to .Ltmp2
.byte 0 # On action: cleanup
.long .Ltmp1-.Lfunc_begin0 # >> Call Site 3 <<
.long .Lfunc_end0-.Ltmp1 # Call between .Ltmp1 and .Lfunc_end0
.long 0 # has no landing pad
.byte 0 # On action: cleanup