#include <iostream>
#include <armadillo>
using namespace std;
int main()
{
arma::Mat<float> a;
cout << sizeof(a) << "\n";
return 0;
}
The above code gives different results when I use NVCC for CUDA.
$ g++ -o main test.cu.cpp -O3 -larmadillo
$ ./main
112
$ nvcc -o main test.cu.cpp -O3 -larmadillo
$ ./main
104
I wish to make the NVCC version to behave the same as GCC version.
Where does this difference come from? My project requires compiling different part separately, and since the legacy part must use GCC, converting everything to NVCC is not possible.
Edit: here are the compile log between NVCC and GCC, I'm not sure what to look for
[huyduc@ny5-dtlgpu06 test]$ nvcc -o main test.cu.cpp -O3 -larmadillo --verbose
#$ _NVVM_BRANCH_=nvvm
#$ _SPACE_=
#$ _CUDART_=cudart
#$ _HERE_=/usr/local/cuda-11.4/bin
#$ _THERE_=/usr/local/cuda-11.4/bin
#$ _TARGET_SIZE_=
#$ _TARGET_DIR_=
#$ _TARGET_DIR_=targets/x86_64-linux
#$ TOP=/usr/local/cuda-11.4/bin/..
#$ NVVMIR_LIBRARY_DIR=/usr/local/cuda-11.4/bin/../nvvm/libdevice
#$ LD_LIBRARY_PATH=/usr/local/cuda-11.4/bin/../lib:/local/export/scratch/pulse_packages/lib/:/local/export/scratch/pulse_packages/lib64/:/local/export/scratch/pulse_packages/usr/lib:/local/export/scratch/pulse_packages/usr/local/lib/:/local/export/scratch/pulse_packages/mkl/lib:/local/export/scratch/pulse_packages/mkl/mkl/lib/intel64:/local/export/scratch/pulse_packages/mods/libtorch/lib::/usr/local/cuda-11.4/lib64:/usr/lib/x86_64-linux-gnu
#$ PATH=/usr/local/cuda-11.4/bin/../nvvm/bin:/usr/local/cuda-11.4/bin:/local/export/scratch/pulse_packages/bin:/local/export/scratch/pulse_packages/usr/bin:/local/export/scratch/pulse_packages/mods/libtorch/bin:/usr/lib64/ccache:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/ibutils/bin:/home/huyduc/.local/bin:/home/huyduc/bin:/usr/local/cuda-11.4/bin
#$ INCLUDES="-I/usr/local/cuda-11.4/bin/../targets/x86_64-linux/include"
#$ LIBRARIES= "-L/usr/local/cuda-11.4/bin/../targets/x86_64-linux/lib/stubs" "-L/usr/local/cuda-11.4/bin/../targets/x86_64-linux/lib"
#$ CUDAFE_FLAGS=
#$ PTXAS_FLAGS=
#$ gcc -c -x c++ -D__NVCC__ -O3 "-I/usr/local/cuda-11.4/bin/../targets/x86_64-linux/include" -D__CUDACC_VER_MAJOR__=11 -D__CUDACC_VER_MINOR__=4 -D__CUDACC_VER_BUILD__=100 -D__CUDA_API_VER_MAJOR__=11 -D__CUDA_API_VER_MINOR__=4 -m64 "test.cu.cpp" -o "/tmp/tmpxft_000140a2_00000000-5_test.cu.o"
#$ nvlink --arch=sm_52 --register-link-binaries="/tmp/tmpxft_000140a2_00000000-3_main_dlink.reg.c" -m64 -larmadillo "-L/usr/local/cuda-11.4/bin/../targets/x86_64-linux/lib/stubs" "-L/usr/local/cuda-11.4/bin/../targets/x86_64-linux/lib" -cpu-arch=X86_64 "/tmp/tmpxft_000140a2_00000000-5_test.cu.o" -lcudadevrt -o "/tmp/tmpxft_000140a2_00000000-6_main_dlink.sm_52.cubin"
#$ fatbinary -64 -link "--image3=kind=elf,sm=52,file=/tmp/tmpxft_000140a2_00000000-6_main_dlink.sm_52.cubin" --embedded-fatbin="/tmp/tmpxft_000140a2_00000000-4_main_dlink.fatbin.c"
#$ rm /tmp/tmpxft_000140a2_00000000-4_main_dlink.fatbin
#$ gcc -c -x c++ -DFATBINFILE="\"/tmp/tmpxft_000140a2_00000000-4_main_dlink.fatbin.c\"" -DREGISTERLINKBINARYFILE="\"/tmp/tmpxft_000140a2_00000000-3_main_dlink.reg.c\"" -I. -D__NV_EXTRA_INITIALIZATION= -D__NV_EXTRA_FINALIZATION= -D__CUDA_INCLUDE_COMPILER_INTERNAL_HEADERS__ -O3 "-I/usr/local/cuda-11.4/bin/../targets/x86_64-linux/include" -D__CUDACC_VER_MAJOR__=11 -D__CUDACC_VER_MINOR__=4 -D__CUDACC_VER_BUILD__=100 -D__CUDA_API_VER_MAJOR__=11 -D__CUDA_API_VER_MINOR__=4 -m64 "/usr/local/cuda-11.4/bin/crt/link.stub" -o "/tmp/tmpxft_000140a2_00000000-7_main_dlink.o"
#$ g++ -O3 -m64 -Wl,--start-group "/tmp/tmpxft_000140a2_00000000-7_main_dlink.o" "/tmp/tmpxft_000140a2_00000000-5_test.cu.o" -larmadillo "-L/usr/local/cuda-11.4/bin/../targets/x86_64-linux/lib/stubs" "-L/usr/local/cuda-11.4/bin/../targets/x86_64-linux/lib" -lcudadevrt -lcudart_static -lrt -lpthread -ldl -Wl,--end-group -o "main"
[huyduc@ny5-dtlgpu06 test]$ g++ -o gccc test.cu.cpp -O3 -larmadillo --verbose
Using built-in specs.
COLLECT_GCC=g++
COLLECT_LTO_WRAPPER=/local/export/scratch/pulse_packages/lib/gcc/x86_64-pc-linux-gnu/10.2.0/lto-wrapper
Target: x86_64-pc-linux-gnu
Configured with: ../gcc-10.2.0/configure --prefix=/local/export/scratch/pulse_packages --libexecdir=/local/export/scratch/pulse_packages/lib --enable-shared --enable-threads=posix --enable-__cxa_atexit --disable-multilib --enable-bootstrap --enable-clocale=gnu --enable-languages=c,c++,fortran --with-zstd=no
Thread model: posix
Supported LTO compression algorithms: zlib
gcc version 10.2.0 (GCC)
COLLECT_GCC_OPTIONS='-o' 'gccc' '-O3' '-v' '-shared-libgcc' '-mtune=generic' '-march=x86-64'
/local/export/scratch/pulse_packages/lib/gcc/x86_64-pc-linux-gnu/10.2.0/cc1plus -quiet -v -D_GNU_SOURCE test.cu.cpp -quiet -dumpbase test.cu.cpp -mtune=generic -march=x86-64 -auxbase test.cu -O3 -version -o /tmp/cctgH2WC.s
GNU C++14 (GCC) version 10.2.0 (x86_64-pc-linux-gnu)
compiled by GNU C version 10.2.0, GMP version 6.1.0, MPFR version 3.1.4, MPC version 1.0.3, isl version isl-0.18-GMP
GGC heuristics: --param ggc-min-expand=100 --param ggc-min-heapsize=131072
ignoring nonexistent directory "/local/export/scratch/pulse_packages/usr/include/"
ignoring nonexistent directory "/local/export/scratch/pulse_packages/usr/local/include"
ignoring nonexistent directory "/usr/include/x86_64-linux-gnu"
ignoring duplicate directory "/local/export/scratch/pulse_packages/mods/libtorch/include"
ignoring duplicate directory "/local/export/scratch/pulse_packages/include"
ignoring nonexistent directory "/local/export/scratch/pulse_packages/lib/gcc/x86_64-pc-linux-gnu/10.2.0/../../../../x86_64-pc-linux-gnu/include"
#include "..." search starts here:
#include <...> search starts here:
/local/export/scratch/pulse_packages/include/
/local/export/scratch/pulse_packages/mods/libtorch/include
/local/export/scratch/pulse_packages/lib/gcc/x86_64-pc-linux-gnu/10.2.0/../../../../include/c++/10.2.0
/local/export/scratch/pulse_packages/lib/gcc/x86_64-pc-linux-gnu/10.2.0/../../../../include/c++/10.2.0/x86_64-pc-linux-gnu
/local/export/scratch/pulse_packages/lib/gcc/x86_64-pc-linux-gnu/10.2.0/../../../../include/c++/10.2.0/backward
/local/export/scratch/pulse_packages/lib/gcc/x86_64-pc-linux-gnu/10.2.0/include
/usr/local/include
/local/export/scratch/pulse_packages/lib/gcc/x86_64-pc-linux-gnu/10.2.0/include-fixed
/usr/include
End of search list.
GNU C++14 (GCC) version 10.2.0 (x86_64-pc-linux-gnu)
compiled by GNU C version 10.2.0, GMP version 6.1.0, MPFR version 3.1.4, MPC version 1.0.3, isl version isl-0.18-GMP
GGC heuristics: --param ggc-min-expand=100 --param ggc-min-heapsize=131072
Compiler executable checksum: fcab5cdad8fab5c0a9dfd14a10ab3fb4
COLLECT_GCC_OPTIONS='-o' 'gccc' '-O3' '-v' '-shared-libgcc' '-mtune=generic' '-march=x86-64'
/local/export/scratch/pulse_packages/lib/gcc/x86_64-pc-linux-gnu/10.2.0/../../../../x86_64-pc-linux-gnu/bin/as -v --64 -o /tmp/ccPwcVUC.o /tmp/cctgH2WC.s
GNU assembler version 2.36.1 (x86_64-pc-linux-gnu) using BFD version (GNU Binutils) 2.36.1
COMPILER_PATH=/local/export/scratch/pulse_packages/lib/gcc/x86_64-pc-linux-gnu/10.2.0/:/local/export/scratch/pulse_packages/lib/gcc/x86_64-pc-linux-gnu/10.2.0/:/local/export/scratch/pulse_packages/lib/gcc/x86_64-pc-linux-gnu/:/local/export/scratch/pulse_packages/lib/gcc/x86_64-pc-linux-gnu/10.2.0/:/local/export/scratch/pulse_packages/lib/gcc/x86_64-pc-linux-gnu/:/local/export/scratch/pulse_packages/lib/gcc/x86_64-pc-linux-gnu/10.2.0/../../../../x86_64-pc-linux-gnu/bin/
LIBRARY_PATH=/local/export/scratch/pulse_packages/lib/../lib64/:/local/export/scratch/pulse_packages/lib64/../lib64/:/local/export/scratch/pulse_packages/lib/gcc/x86_64-pc-linux-gnu/10.2.0/:/local/export/scratch/pulse_packages/lib/gcc/x86_64-pc-linux-gnu/10.2.0/../../../../lib64/:/lib/../lib64/:/usr/lib/../lib64/:/local/export/scratch/pulse_packages/lib/:/local/export/scratch/pulse_packages/lib64/:/local/export/scratch/pulse_packages/mkl/lib/:/local/export/scratch/pulse_packages/mkl/mkl/lib/intel64/:/local/export/scratch/pulse_packages/mods/libtorch/lib/:./:/local/export/scratch/pulse_packages/lib/gcc/x86_64-pc-linux-gnu/10.2.0/../../../../x86_64-pc-linux-gnu/lib/:/local/export/scratch/pulse_packages/lib/gcc/x86_64-pc-linux-gnu/10.2.0/../../../:/lib/:/usr/lib/
COLLECT_GCC_OPTIONS='-o' 'gccc' '-O3' '-v' '-shared-libgcc' '-mtune=generic' '-march=x86-64'
/local/export/scratch/pulse_packages/lib/gcc/x86_64-pc-linux-gnu/10.2.0/collect2 -plugin /local/export/scratch/pulse_packages/lib/gcc/x86_64-pc-linux-gnu/10.2.0/liblto_plugin.so -plugin-opt=/local/export/scratch/pulse_packages/lib/gcc/x86_64-pc-linux-gnu/10.2.0/lto-wrapper -plugin-opt=-fresolution=/tmp/ccd147SC.res -plugin-opt=-pass-through=-lgcc_s -plugin-opt=-pass-through=-lgcc -plugin-opt=-pass-through=-lc -plugin-opt=-pass-through=-lgcc_s -plugin-opt=-pass-through=-lgcc --eh-frame-hdr -m elf_x86_64 -dynamic-linker /lib64/ld-linux-x86-64.so.2 -o gccc /lib/../lib64/crt1.o /lib/../lib64/crti.o /local/export/scratch/pulse_packages/lib/gcc/x86_64-pc-linux-gnu/10.2.0/crtbegin.o -L/local/export/scratch/pulse_packages/lib/../lib64 -L/local/export/scratch/pulse_packages/lib64/../lib64 -L/local/export/scratch/pulse_packages/lib/gcc/x86_64-pc-linux-gnu/10.2.0 -L/local/export/scratch/pulse_packages/lib/gcc/x86_64-pc-linux-gnu/10.2.0/../../../../lib64 -L/lib/../lib64 -L/usr/lib/../lib64 -L/local/export/scratch/pulse_packages/lib -L/local/export/scratch/pulse_packages/lib64 -L/local/export/scratch/pulse_packages/mkl/lib -L/local/export/scratch/pulse_packages/mkl/mkl/lib/intel64 -L/local/export/scratch/pulse_packages/mods/libtorch/lib -L. -L/local/export/scratch/pulse_packages/lib/gcc/x86_64-pc-linux-gnu/10.2.0/../../../../x86_64-pc-linux-gnu/lib -L/local/export/scratch/pulse_packages/lib/gcc/x86_64-pc-linux-gnu/10.2.0/../../.. /tmp/ccPwcVUC.o -larmadillo -lstdc++ -lm -lgcc_s -lgcc -lc -lgcc_s -lgcc /local/export/scratch/pulse_packages/lib/gcc/x86_64-pc-linux-gnu/10.2.0/crtend.o /lib/../lib64/crtn.o
COLLECT_GCC_OPTIONS='-o' 'gccc' '-O3' '-v' '-shared-libgcc' '-mtune=generic' '-march=x86-64'