I have some difficulties setting 2 GPUs for peer to peer communication. I am using Cuda 4.0 and programming with fortran. PGI compiler
I wrote a program which confirm I have 4 GPUs available on my node.
I decided to use two of them but having the following error: 0: DEALLOCATE: invalid device pointer.
subroutine directTransfer()
use cudafor
implicit none
integer, parameter :: N = 4*1024*1024
real, pinned, allocatable :: a(:), b(:)
real, device, allocatable :: a_d(:), b_d(:)
!these hold free and total memory before and after
!allocation, used to verify allocation happening on proper devices
integer (int_ptr_kind()),allocatable ::
& freeBefore(:), totalBefore(:),
& freeAfter(:), totalAfter(:)
integer :: istat, nDevices, i, accessPeer, timingDev
type(cudaDeviceProp)::prop
type(cudaEvent)::startEvent,stopEvent
real :: time
!allocate host arrays
allocate(a(N), b(N))
allocate(freeBefore(0:nDevices -1),
& totalBefore(0:nDevices -1))
allocate(freeAfter(0:nDevices -1),
& totalAfter(0:nDevices -1))
write(*,*) 'Start!'
!get devices ionfo (including total and free memory)
!before allocation
istat = cudaGetDeviceCount(nDevices)
if(nDevices < 2) then
write(*,*) 'Need at least two CUDA capable devices'
stop
end if
write(*,"('Number of CUDA-capable devices: ',
& i0, /)"),nDevices
do i = 0, nDevices - 1
istat = cudaGetDeviceProperties(prop, i)
istat = cudaSetDevice(i)
istat = cudaMemGetInfo(freeBefore(i), totalBefore(i))
end do
!!!Here is the trouble zone!!!!
istat = cudaSetDevice(0)
allocate(a_d(N))
istat = cudaSetDevice(1)
allocate(b_d(N))
deallocate(freeBefore, totalBefore,freeAfter,totalAfter)
deallocate(a,b,a_d,b_d)
end subroutine directTransfer
With the following I have no error:
istat = cudaSetDevice(0)
allocate(a_d(N))
!istat = cudaSetDevice(1)
!allocate(b_d(N))
With this, also no error:
!istat = cudaSetDevice(0)
!allocate(a_d(N))
istat = cudaSetDevice(1)
allocate(b_d(N))
But this return error
istat = cudaSetDevice(0)
allocate(a_d(N))
istat = cudaSetDevice(1)
allocate(b_d(N))
So it seems I cannot set 2GPUs to start my program. Could you help me understand why it is not possible to set 2GPUs and a hint to solve this?