I think you're misunderstanding how templates are implemented. Templates are compiled on a need-to-use basis into a corresponding class/function.
Consider the following code...
template <typename Type>
Type mymax(Type a, Type b) {
return a > b ? a : b;
}
int main(int argc, char** argv)
{
}
Compiling this, I get the following assembly.
.file "example.cpp"
.text
.globl main
.type main, @function
main:
.LFB1:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
movl %edi, -4(%rbp)
movq %rsi, -16(%rbp)
movl $0, %eax
popq %rbp
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE1:
.size main, .-main
.ident "GCC: (Ubuntu/Linaro 4.8.1-10ubuntu9) 4.8.1"
.section .note.GNU-stack,"",@progbits
You'll notice it only contains the main function. Now I update my code to use the template function.
int main(int argc, char** argv)
{
mymax<double>(3,4);
}
Compiling that I get a much longer assembly output including the template function to handle doubles. The compiler saw the template function was being used by the type "double" so made a function to handle that case.
.file "example.cpp"
.text
.globl main
.type main, @function
main:
.LFB1:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
subq $32, %rsp
movl %edi, -4(%rbp)
movq %rsi, -16(%rbp)
movabsq $4616189618054758400, %rdx
movabsq $4613937818241073152, %rax
movq %rdx, -24(%rbp)
movsd -24(%rbp), %xmm1
movq %rax, -24(%rbp)
movsd -24(%rbp), %xmm0
call _Z5mymaxIdET_S0_S0_
movl $0, %eax
leave
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE1:
.size main, .-main
.section .text._Z5mymaxIdET_S0_S0_,"axG",@progbits,_Z5mymaxIdET_S0_S0_,comdat
.weak _Z5mymaxIdET_S0_S0_
.type _Z5mymaxIdET_S0_S0_, @function
_Z5mymaxIdET_S0_S0_:
.LFB2:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
movsd %xmm0, -8(%rbp)
movsd %xmm1, -16(%rbp)
movsd -8(%rbp), %xmm0
ucomisd -16(%rbp), %xmm0
jbe .L9
movq -8(%rbp), %rax
jmp .L6
.L9:
movq -16(%rbp), %rax
.L6:
movq %rax, -24(%rbp)
movsd -24(%rbp), %xmm0
popq %rbp
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE2:
.size _Z5mymaxIdET_S0_S0_, .-_Z5mymaxIdET_S0_S0_
.ident "GCC: (Ubuntu/Linaro 4.8.1-10ubuntu9) 4.8.1"
.section .note.GNU-stack,"",@progbits
Now let's say I change the code to use that function twice.
int main(int argc, char** argv)
{
mymax<double>(3,4);
mymax<double>(4,5);
}
Again, let's look at the assembly it creates. It's comparable to the previous output because most of that code was just the compiler creating the function mymax where "Type" is changed to a double. No matter how many times I use that function, it will only be declared once.
.file "example.cpp"
.text
.globl main
.type main, @function
main:
.LFB1:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
subq $32, %rsp
movl %edi, -4(%rbp)
movq %rsi, -16(%rbp)
movabsq $4616189618054758400, %rdx
movabsq $4613937818241073152, %rax
movq %rdx, -24(%rbp)
movsd -24(%rbp), %xmm1
movq %rax, -24(%rbp)
movsd -24(%rbp), %xmm0
call _Z5mymaxIdET_S0_S0_
movabsq $4617315517961601024, %rdx
movabsq $4616189618054758400, %rax
movq %rdx, -24(%rbp)
movsd -24(%rbp), %xmm1
movq %rax, -24(%rbp)
movsd -24(%rbp), %xmm0
call _Z5mymaxIdET_S0_S0_
movl $0, %eax
leave
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE1:
.size main, .-main
.section .text._Z5mymaxIdET_S0_S0_,"axG",@progbits,_Z5mymaxIdET_S0_S0_,comdat
.weak _Z5mymaxIdET_S0_S0_
.type _Z5mymaxIdET_S0_S0_, @function
_Z5mymaxIdET_S0_S0_:
.LFB2:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
movsd %xmm0, -8(%rbp)
movsd %xmm1, -16(%rbp)
movsd -8(%rbp), %xmm0
ucomisd -16(%rbp), %xmm0
jbe .L9
movq -8(%rbp), %rax
jmp .L6
.L9:
movq -16(%rbp), %rax
.L6:
movq %rax, -24(%rbp)
movsd -24(%rbp), %xmm0
popq %rbp
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE2:
.size _Z5mymaxIdET_S0_S0_, .-_Z5mymaxIdET_S0_S0_
.ident "GCC: (Ubuntu/Linaro 4.8.1-10ubuntu9) 4.8.1"
.section .note.GNU-stack,"",@progbits
So basically templates don't affect the exec size any more than writing the functions by hand. It's just a convenience. The compiler will create a function for one or more uses of a given type so if I use it 1 or 1000 times, there will only be one instance of it. Now if I update my code to also handle a new type like floats, I'll get another function in my executable, but only one no matter how many times I use that function.