I've been trying to use f2py to interface an optimized fortran code for vector and matrix multiplication with python. To obtain a performance comparison useful for my purposes I perform the same product inside a cycle 100000 times. With a full fortran code the product takes 2.4 sec (ifort), while with f2py it takes approx 11 sec. Just for reference, with numpy it takes approx 20 sec. I ask both the fortran and the python part to write the time difference before and after the cycle and with f2py they both write 11 sec, so the code is not losing time in passing arrays. I triyed to understand if it is the way in which numpy array are stored, but I can't understand the problem. Do you have any idea? Thanks in advance
fortran Main
program Main
implicit none
save
integer :: seed, i, j, k
integer, parameter :: states =15
integer, parameter :: tessere = 400
real, dimension(tessere,states,states) :: matrix
real, dimension(states) :: vector
real :: start, finish
real :: prod(tessere)
do i=1,tessere
do j=1,states
do k=1,states
matrix(i,j,k) = i+j+k
end do
enddo
end do
do i=1,states
vector(i) = i
enddo
call doubleSum(vector,vector,matrix,states,tessere,prod)
end program
fortran subroutine:
subroutine doubleSum(ket, bra, M , states, tessere,prod)
integer :: its, j, k,t
integer :: states
integer :: tessere
real, dimension(tessere,states,states) :: M
real, dimension(states) :: ket
real, dimension(states) :: bra
real, dimension(tessere) :: prod
real,dimension(tessere,states) :: ctmp
call cpu_time(start)
do t=1,100000
ctmp=0.d0
do k=1,states
do j=1,states
do its=1,tessere
ctmp(its,k)=ctmp(its,k)+ M(its,k,j)*ket(j)
enddo
enddo
enddo
do its=1,tessere
prod(its)=dot_product(bra,ctmp(its,:))
enddo
enddo
call cpu_time(finish)
print '("Time = ",f6.3," seconds.")',finish-start
end subroutine
python script
import numpy as np
import time
import cicloS
M= np.random.rand(400,15,15)
ket=np.random.rand(15)
#M=np.asfortranarray(M)
#ket=np.asfortranarray(ket)
import time
start=time.time()
prod=cicloS.doublesum(ket,ket,M)
end=time.time()
print(end-start)
.pyf file generated with f2py and edited
! -*- f90 -*-
! Note: the context of this file is case sensitive.
python module cicloS
interface
subroutine doublesum(ket,bra,m,states,tessere,prod)
real dimension(states),intent(in) :: ket
real dimension(states),depend(states),intent(in) :: bra
real dimension(tessere,states,states),depend(states,states),intent(in) :: m
integer, optional,check(len(ket)>=states),depend(ket) :: states=len(ket)
integer, optional,check(shape(m,0)==tessere),depend(m) :: tessere=shape(m,0)
real dimension(tessere),intent(out) :: prod
end subroutine doublesum
end interface
end python module cicloS