0

I am making a module in Fortran 90 to run PARPACK on a given matrix. I have an existing ARPACK code which functions normally as expected. I tried converting it into PARPACK and it runs into memory clear errors. I am fairly new to coding and fortran, please excuse any blunders I've made.

The code:

!ARPACK module
module parpack

implicit none

contains

subroutine parp
    
!   use mpi
    include '/usr/lib/x86_64-linux-gnu/openmpi/include/mpif.h'

    integer     comm, myid, nprocs, rc, nloc, status(MPI_STATUS_SIZE)

    integer, parameter ::   pres=8
    
    integer     nev, ncv, maxn, maxnev, maxncv
    parameter       (maxn=10**7, maxnev=maxn-1, maxncv=maxn)

!   Arrays for SNAUPD

    integer             iparam(11), ipntr(14)
    logical, allocatable ::     select(:)
    real(kind=pres), allocatable :: workd(:), workl(:), worktmp1(:), worktmp2(:)

!   Scalars for SNAUPD

    character               bmat*1, which*2
    integer             ido, n, info, ierr, ldv
    integer             i, j, ishfts, maxitr, mode1, nconv
    integer(kind=pres)          lworkl
    real(kind=pres)         tol

!   Arrays for SNEUPD

    real(kind=pres), allocatable :: d(:,:), resid(:), v(:,:), workev(:), z(:,:)
    
!   Scalars for SNEUPD

    logical rvec, first
    real        sigmar, sigmai
!============================================== 

    real(kind=pres), allocatable :: mat(:,:)

    open (11, file = 'matrix.dat', status = 'old')

    read (11,*) n



!=============================================
    
!   Dimension of the problem
    
    nev = n/10  
    ncv = nev+2
    ldv = n 
    bmat = 'I'  
    which = 'LM'    

!   Additional environment variables

    ido = 0         
    tol = 0.0E+0        
    info = 0            
    lworkl = 3*ncv**2+6*ncv 



!   Algorithm Mode specifications:

    ishfts = 1  
    maxitr = 300
    mode1 = 1   
    
    iparam(1) = ishfts
    iparam(3) = maxitr
    iparam(7) = mode1

!   Distribution to nodes
    
!=============================================

!   Matrix allocation
    allocate (mat(n,n)) 

!   PDNAUPD 
    allocate (workd(5*n))
    allocate (workl(lworkl))
    allocate (resid(n))
    allocate (worktmp1(n))
    allocate (worktmp2(n))
    
!   PDNEUPD
    allocate (d(n,3))
    allocate (v(ldv,ncv))
    allocate (workev(3*n))
    allocate (z(ldv,ncv))
    allocate (select(ncv))
    

!===========================================
!   Read Matrix from the provided file

    mat = 0

    read(11,*) mat

    mat = transpose(mat)

!===========================================
!   MPI Calling 
    call MPI_INIT(ierr)
    comm = MPI_COMM_WORLD
    call MPI_COMM_RANK(comm, myid, ierr)
    call MPI_COMM_SIZE(comm, nprocs, ierr)
    
    nloc = n/nprocs

!   if ( mod(n, nprocs) .gt. myid ) nloc = nloc + n
    
!===============================================    
20  continue
    
    call pdnaupd(comm, ido, bmat, nloc, which, nev, tol, resid, ncv, v, ldv, iparam, ipntr, workd, workl, lworkl, info) !Top level solver

    call MPI_BARRIER(comm,ierr)

    print *, ido, info, iparam(5)   !for testing
!===============================================
    if (ido .eq. -1 .or. ido .eq. 1) then
    
        worktmp1 = 0
        if (myid .ne. 0) then   !It is slave
            call MPI_SEND(workd(ipntr(1)), nloc, MPI_DOUBLE_PRECISION, 0, 0, comm, ierr)
            
        else            !It is host
            worktmp1(1:nloc) = workd(ipntr(1):ipntr(1)+nloc-1)
            i = nprocs
            if (i .gt. 1) then
                do i=1,nprocs-1
                    call MPI_RECV(worktmp1(i*nloc+1), nloc, MPI_DOUBLE_PRECISION, i, 0, comm, status, ierr)
                end do
            endif
        endif
        
        call MPI_BARRIER(comm,ierr)
        
        if (myid .eq. 0) then   !It is host

!           Matrix multiplication
            worktmp2 = 0
            call matmultiply(n, mat, worktmp1, worktmp2)
            workd(ipntr(2):ipntr(2)+nloc-1) = worktmp2(1:nloc)

            i = nprocs
            
            if (i .gt. 1) then
                do i=1,nprocs-1
                    call MPI_SEND(worktmp2(i*nloc+1), nloc, MPI_DOUBLE_PRECISION, i, 100*i, comm, ierr)
                end do
            endif
        else            !It is slave
            call MPI_RECV(workd(ipntr(2)), nloc, MPI_DOUBLE_PRECISION, 0, 100*myid, comm, status, ierr)
        endif
        go to 20
!       call matmultiply(n, mat, workd(ipntr(1):ipntr(1)+n-1), workd(ipntr(2):ipntr(2)+n-1))

!       go to 20
    endif
!   print *, info   !for testing
!===============================================================
!   Post-processing for eigenvalues


    rvec = .true.
    
    if (myid .eq. 0) then
        call pdneupd ( comm, rvec, 'A', select, d, d(1,2), z, ldv, sigmar, sigmai, &
        workev, bmat, n, which, nev, tol, resid, ncv, v, ldv, iparam, ipntr, &
        workd, workl, lworkl, info)
    endif

!   print *, info   !for testing

    close(11)
    call MPI_FINALIZE(ierr)
return
end subroutine
!==============================================================================================

!   Additional Function definitions
subroutine matmultiply(n, mat, v, w)

    integer     n, i, j
    integer, parameter ::   pres=8
    real(kind = pres)   mat(n,n), temp(n)
    real(kind = pres)   v(n), w(n)
    
    temp = 0
    do j = 1,n
        do i = 1,n
            temp(j) = temp(j) + mat(i,j)*v(i)
        end do
    end do
    w = temp
return
end subroutine

end module

I apologize for the ton of redundant lines and comments, I am yet to clean it up for finalization.

When I run the code on a single thread with ./a.out, I get the following output:

Invalid MIT-MAGIC-COOKIE-1 key           1           0  1629760560
           1           0  1629760560
           1           0  1629760560
           1           0  1629760560
.
.
. <A long chain as the code is exhausting all iterations>
.<first of the numbers is ido, which starts with 1 instead of -1 for some reason, second being
.info and third being iparam(5) which is a random number until the final iteration>
.
          99           1           1
munmap_chunk(): invalid pointer

Program received signal SIGABRT: Process abort signal.

Backtrace for this error:
#0  0x7f5a863d0d01 in ???
#1  0x7f5a863cfed5 in ???
#2  0x7f5a8620420f in ???
#3  0x7f5a8620418b in ???
#4  0x7f5a861e3858 in ???
#5  0x7f5a8624e3ed in ???
#6  0x7f5a8625647b in ???
#7  0x7f5a862566cb in ???
#8  0x560f05ac1819 in ???
#9  0x560f05abd7bc in checker
    at /home/srivatsank/Desktop/fortran/lap_vs_arp/ptest/ptest.f90:45
#10  0x560f05abd8d9 in main
    at /home/srivatsank/Desktop/fortran/lap_vs_arp/ptest/ptest.f90:3
Aborted (core dumped)

line 45 in ptest is call parp line 3 in ptest is use parpack(name of the module) The main code is as follows:

program checker

    use parpack
    use arpack
!   use lapack
    implicit none
    !Program to test LAPACK and ARPACK

!   1. Variable definition

    integer         a,n,i
    real, allocatable ::        mat(:,:)
    real                t0, t1
    
    a=2
!   Loop
!   do 20 a = 1,3
    
!   Open File   
        open(unit=10, file = 'matrix.dat', status = 'replace')
        
!   2. Generate Symmetric matrices
        n = 10**a
        allocate (mat(n,n))
        call RANDOM_NUMBER(mat)
        
!   3. Save symmetric matrices to r.dat     
        write (10,*) n
        
        do 30 i=1,n
            write(10,*) mat(i,:)
30      end do


        deallocate(mat)
        close(10)

!   4. Test time taken by each of the routines

!       call cpu_time(t0)
!       call arp
!       call cpu_time(t1)
!       print *, 'n:', n, 'ARPACK time taken:', t1-t0
        call cpu_time(t0)
        call parp
        call cpu_time(t1)
        print *, 'n:', n, 'PARPACK time taken:', t1-t0


!20 end do


end program checker

The memory error occurs at the very end of the subroutine, when the mail program tries to exit from the subroutine. I have verified this by printing statements as the last line in the subroutine. And on running mpirun -np 4 a.out, the code just enters the pdneupd process and sits there for eternity. Could anyone help?

  • 2
    Can you show us how you compile? You should be using debug flags. – jack Dec 28 '20 at 08:42
  • Can you cut down on the length of your module, e.g. remove comments, align code, etc.? It would be alot easier to debug.. – jack Dec 28 '20 at 08:43
  • 1
    The backtrace says the error occurs at line 45 of the routine checker. You haven't provided us with that, so we can't help you. Please provide us with a short but COMPLETE program that shows the problem – Ian Bush Dec 28 '20 at 12:03
  • Hey @jack, I have removed the comments. I compiled it with ```mpif90 ptest.f90 arp_sub.o parp_sub.o -lparpack -larpack -o ptest -g``` where parp_sub.o contains the module. – Srivatsank Pullabhotla Dec 28 '20 at 17:50
  • @IanBush I just called the subroutine. I've updated the query to reflect the entire code too. As mentioned earlier, the code ran into errors only at the moment of exiting the subroutine. I have verified this by printing lines before the return statement. I also believe that the issue is due to an error in PDNAUPD which I'm unable to ascertain, as ido after the first iteration is 1 instead of -1(which I noticed was the case with DNAUPD when I ran straight ARPACK) and it is converging only one value despite changing number of iterations. – Srivatsank Pullabhotla Dec 28 '20 at 17:54
  • Hey guys, had a chance to look at it? – Srivatsank Pullabhotla Dec 30 '20 at 05:59

0 Answers0