I have made a parallel version of a heat transfer simulation program with Open MP in Fortran 95, and it seems to work (results are equal). However, the time elapsed ("wall clock" time) is the same as the sequential version. My default number of threads is 4, but even if I change it, the computing time is around the same... I am new with Open MP and parallelization too, so I am struggling to understand where are my errors. Maybe you could help me to find them?
Edit:
Here is my code
program heat
use omp_lib
implicit none
! Variables
real,PARAMETER :: alpha = 1e-02 ! heat diffusive coefficient
integer,PARAMETER :: s = 10 ! Kelvin/s
integer,PARAMETER :: L = 1 ! cubic side length
real,PARAMETER :: dt = 1e-03 ! time step
real,PARAMETER :: dx = 1e-02 ! space step
real,PARAMETER :: dy = 1e-02 ! space step
real,PARAMETER :: dz = 1e-02 ! space step
integer,PARAMETER :: N = int(L/dx) ! Finite volumes
integer,PARAMETER :: t_obs = 120 ! secondes
integer,PARAMETER :: ite = int(t_obs/dt) ! iterations
real :: c = (alpha*dt)/(dx**2) ! scheme stability criteria
integer :: total_length = (N+2)
real,ALLOCATABLE,DIMENSION(:,:,:) :: T
! Boundaries conditions
real :: T_R = 298 ! right 298
real :: T_L = 298 ! left 298
real :: T_F = 323 ! front 323
real :: T_B = 323 ! back 323
real :: T_U = 373 ! up 373
real :: T_D = 373 ! down 373
integer :: i,j,k,iteration
print*, "c = ",c
ALLOCATE(T(total_length,total_length,total_length))
! Initial condition
do i = 1,N+2
do j = 1, N+2
do k = 1, N+2
if (j == 1 .and. k /= 1 .and. k /= N+2) then
T(i,k,j) = T_F
else if (i == N+2 .and. j /= N+2 .and. k /= N+2 .and. k /= 1) then
T(i,k,j) = T_R
else if (i == 1 .and. j /= 1 .and. k /= 1 .and. k /= N+2) then
T(i,k,j) = T_L
else if (i /= 1 .and. j == N+2 .and. k /= 1 .and. k /= N+2) then
T(i,k,j) = T_B
else if (k == N+2) then
T(i,k,j) = T_U
else if (k == 1) then
T(i,k,j) = T_D
else
T(i,k,j) = 283
end if
end do
end do
end do
! Datas in file:
open(10,file='para_heat_3D_unsteady2.dat',FORM = 'UNFORMATTED')
! Iterative loop
!$omp parallel default(shared) private(i,j,k)
do iteration = 0,15
!$omp single
print*, iteration
!$omp end single
if (iteration == 0) then
!$omp single
i = 1
do while (i < N+3)
do j = 1, N+2
WRITE(10) T(i,:,j)
! WRITE(10,*) T(i,:,j) ! formatted
end do
i = i + 1
end do
!$omp end single
else
! Boundary Conditions
T(:,:,1) = T_F
T(N+2,:,:) = T_R
T(1,:,:) = T_L
T(:,:,N+2) = T_B
T(:,N+2,:) = T_U
T(:,1,:) = T_D
!$omp do
do i = 2, N+1
do j = 2, N+1
do k = 2, N+1
if (i == N+1 .and. j == N+1 .and. k == N+1) then
T(i,k,j) = T(i,k,j) * (1-9*c) + T_R * (2*c) + T(i-1,k,j)*c + T_B *(2*c) + c*T(i,k,j-1) + &
2*c*T_U + c*T(i,k-1,j) + s*dt
else if (i == 2 .and. j == N+1 .and. k == N+1) then
T(i,k,j) = T(i,k,j) * (1-9*c) + T_L * (2*c) + c*T(i+1,k,j) + 2*c*T_B + c*T(i,k,j-1) + 2*c*T_U &
+ c*T(i,k-1,j) + s*dt
else if (i == N+1 .and. j == N+1 .and. k == 2) then
T(i,k,j) = T(i,k,j) * (1-9*c) + 2*c*T_R + c*T(i-1,k,j) + 2*c*T_B + c*T(i,k,j-1) + 2*c*T_D + &
c*T(i,k+1,j) + s*dt
else if (i == 2 .and. j == N+1 .and. k == 2) then
T(i,k,j) = T(i,k,j) * (1-9*c) + 2*c*T_L + c*T(i+1,k,j) + 2*c*T_B + c*T(i,k,j-1) + 2*c*T_D + &
c*T(i,k+1,j) + s*dt
else if (i /= 1 .and. i /= N+2 .and. j == 2 .and. k == N+1) then
T(i,k,j) = T(i,k,j) * (1-8*c) + c*T(i+1,k,j) + c*T(i-1,k,j) + c*T(i,k,j+1) + 2*c*T_F + 2*c*T_U &
+ c*T(i,k-1,j) + s*dt
else if (i == 2 .and. j == 2 .and. k /= 1 .and. k /= N+2) then
T(i,k,j) = T(i,k,j) * (1-8*c) + c*T(i+1,k,j) + 2*c*T_L + c*T(i,k,j+1) + 2*c*T_F + &
c*(T(i,k+1,j) + T(i,k-1,j)) + s*dt
else if (i /= 1 .and. i /= N+2 .and. j == 2 .and. k == 2) then
T(i,k,j) = T(i,k,j) * (1-8*c) + c*T(i-1,k,j) + c*T(i+1,k,j) + c*T(i,k,j+1) + 2*c*T_F + &
c*T(i,k+1,j) + 2*c*T_D + s*dt
else if (i == N+1 .and. j == 2 .and. k /= 1 .and. k /= N+2) then
T(i,k,j) = T(i,k,j) * (1-8*c) + 2*c*T_R + c*T(i-1,k,j) + c*T(i,k,j+1) + 2*c*T_F + &
c*(T(i,k+1,j) + T(i,k-1,j)) + s*dt
else if (i == N+1 .and. j /= 1 .and. j /= N+2 .and. k == 2) then
T(i,k,j) = T(i,k,j) * (1-8*c) + 2*c*T_R + c*T(i-1,k,j) + c*(T(i,k,j+1) + T(i,k,j-1)) + &
c*T(i,k+1,j) + 2*c*T_D + s*dt
else if (i == N+1 .and. j == N+1 .and. k /= 1 .and. k /= N+2) then
T(i,k,j) = T(i,k,j) * (1-8*c) + 2*c*T_R + c*T(i-1,k,j) + 2*c*T_B + c*T(i,k,j-1) + c*T(i,k+1,j) &
+ c*T(i,k-1,j) + s*dt
else if (i == N+1 .and. j /= 1 .and. j /= N+2 .and. k == N+1) then
T(i,k,j) = T(i,k,j) * (1-8*c) + 2*c*T_R + c*T(i-1,k,j) + c*T(i,k,j+1) + c*T(i,k,j-1) + 2*c*T_U &
+ c*T(i,k-1,j) + s*dt
else if (j == N+1 .and. k == N+1 .and. i /= 1 .and. i /= N+2) then
T(i,k,j) = T(i,k,j) * (1-8*c) + c*T(i+1,k,j) + c*T(i-1,k,j) + 2*c*T_B + c*T(i,k,j-1) + 2*c*T_U &
+ c*T(i,k-1,j) + s*dt
else if (i == 2 .and. k == N+1 .and. j /= 1 .and. j /= N+2) then
T(i,k,j) = T(i,k,j) * (1-8*c) + 2*c*T_L + c*T(i+1,k,j) + 2*c*T_U + c*T(i,k,j-1)+ c*T(i,k,j+1) &
+ c*T(i,k-1,j) + s*dt
else if (i == 2 .and. j == N+1 .and. k /= 1 .and. k /= N+2) then
T(i,k,j) = T(i,k,j) * (1-8*c) + 2*c*T_L + c*T(i+1,k,j) + 2*c*T_B + c*T(i,k,j-1) + c*T(i,k+1,j) &
+ c*T(i,k-1,j) + s*dt
else if (k == 2 .and. j == N+1 .and. i /= 1 .and. i /= N+2) then
T(i,k,j) = T(i,k,j) * (1-8*c) + c*T(i+1,k,j) + c*T(i-1,k,j) + 2*c*T_B + c*T(i,k,j-1) &
+ c*T(i,k+1,j) + 2*c*T_D + s*dt
else if (i == 2 .and. k == 2 .and. j /= 1 .and. j /= N+2) then
T(i,k,j) = T(i,k,j) * (1-8*c) + 2*c*T_L + c*T(i+1,k,j) + c*T(i,k,j+1) + c*T(i,k,j-1) + &
c*T(i,k+1,j) + 2*c*T_D + s*dt
else if (i == N+1 .and. j == 2 .and. k == N+1) then
T(i,k,j) = T(i,k,j) * (1-9*c) + c*T(i+1,k,j) + 2*c*T_L + c*T(i,k,j+1) + 2*c*T_F + c*T(i,k-1,j) &
+ 2*c*T_U + s*dt
else if (i == N+1 .and. j == 2 .and. k == 2) then
T(i,k,j) = T(i,k,j) * (1-9*c) + c*T(i-1,k,j) + 2*c*T_R + c*T(i,k,j+1) + 2*c*T_F + c*T(i,k-1,j) &
+ 2*c*T_U + s*dt
else if (i == 2 .and. j == 2 .and. k == 2) then
T(i,k,j) = T(i,k,j) * (1-9*c) + c*T(i+1,k,j) + 2*c*T_L + c*T(i,k,j+1) + 2*c*T_F + 2*c*T_D + &
c*T(i,k+1,j) + s*dt
else if (i == N+1 .and. j == 2 .and. k == 2) then
T(i,k,j) = T(i,k,j) * (1-9*c) + c*T(i-1,k,j) + 2*c*T_R + c*T(i,k,j+1) + 2*c*T_F + 2*c*T_D + &
c*T(i,k+1,j) + s*dt
else
T(i,k,j) = T(i,k,j) * (1-6*c) + c*T(i-1,k,j) + c*T(i+1,k,j) + c*T(i,k,j+1) + c*T(i,k,j-1) + &
c*T(i,k-1,j) + c*T(i,k+1,j) + s*dt
end if
end do
end do
end do
!$omp end do
! Print in a file T values
!$omp single
i = 1
do while (i < N+3)
do j = 1, N+2
WRITE(10) T(i,:,j)
! WRITE(10,*) T(i,:,j) ! formatted
end do
i = i + 1
end do
!$omp end single
end if
end do
!$omp end parallel
DEALLOCATE(T)
end program heat
So for these 15 iterations, with the formatted way the time elapsed is now around 16 seconds, whatever the number of threads, and is a little bit less than sequential (around 21 sec). With the unformatted way it is highly faster (around 1 sec) but I receive symbols like "�.." (maybe binary ?) and I don't know how to post-treat it and plot something with it... I compile it with: gfortran -fopenmp -g -fcheck=all -Wall para_heat_3D_unsteady.f95 I have no environment variables set.