3

I am trying to implement qsort algorithm in Fortran.

The implemented qsort is intended to operate over an array of a derived type which contains also another derived type.

The derived types are defined in a separate module as:

MODULE DATA_MODEL
        
        ! -------------------
        ! CONSTANTS
        ! -------------------
        integer,parameter               :: max_records = 100000000

        type :: timestamp
                integer                 :: year
                integer                 :: month
                integer                 :: day
                integer                 :: hour
                integer                 :: minute
                integer                 :: second
        end type
        type :: tape
                type(timestamp)         :: ts
                integer                 :: value1
                integer                 :: value2
        end type

END MODULE

This is what I have tried to implement the quicksort algorithm.

! DESCRIPTION:
!       THIS MODULE IMPLEMENTS QSORT ALGORITH USING LOMUTO PARTITION SCHEME
! PSEUDOCODE:
!       ALGORITHM QUICKSORT(A, LO, HI) IS
!           IF LO < HI THEN
!               P := PARTITION(A, LO, HI)
!               QUICKSORT(A, LO, P - 1)
!               QUICKSORT(A, P + 1, HI)
! 
!       ALGORITHM PARTITION(A, LO, HI) IS
!           PIVOT := A[HI]
!           I := LO
!           FOR J := LO TO HI DO
!               IF A[J] < PIVOT THEN
!                   SWAP A[I] WITH A[J]
!                   I := I + 1
!           SWAP A[I] WITH A[HI]
!           RETURN I
! 
! SORTING THE ENTIRE ARRAY IS ACCOMPLOMISHED BY QUICKSORT(A, 0, LENGTH(A) - 1).

module qsort

        use data_model

contains

        subroutine quicksort(a, lo, hi)

                implicit none

                ! SUBROUTINE PARAMETERS
                type(tape),allocatable,intent(in out)   :: a
                integer,intent(in)                      :: lo, hi

                ! ALGORITHM INTERNAL VARIABLES
                integer                                 :: p

                if (lo < hi) then
                        call partition(a, lo, hi, p)
                        call quicksort(a, lo, p - 1)
                        call quicksort(a, p + 1, hi)
                end if

        end subroutine

        subroutine  partition(a, lo, hi, p)
                
                implicit none

                ! SUBROUTINE PARAMETERS
                type(tape),allocatable,intent(inout)    :: a
                integer,intent(in)                      :: lo
                integer,intent(in)                      :: hi
                integer,intent(out)                     :: p

                ! ALGORITHM INTERNAL VARIABLES
                type(tape)                      :: pivot
                type(tape)                      :: swap
                integer                         :: i,j

                pivot = a(hi)
                i = lo
                do j = lo, hi
                        if (compare(a(j), pivot)) then
                               swap = a(i)
                               a(i) = a(j)
                               a(j) = swap
                               i = i + 1
                        endif
                end do
                swap = a(i)
                a(i) = a(hi)
                a(hi) = swap

                p = i

        end subroutine

        function compare(a,b)

                implicit none

                ! FUNCTION PARAMETERS
                type(tape)              :: a
                type(tape)              :: b
                logical                 :: compare

                if (a%ts%year < b%ts%year) then
                        compare = .true.
                else if (a%ts%year > a%ts%year) then
                        compare = .false.
                else if (a%ts%month < b%ts%month) then
                        compare = .true.
                else if (a%ts%month > b%ts%month) then
                        compare = .false.
                else if (a%ts%day < b%ts%day) then
                        compare = .true.
                else if (a%ts%day > b%ts%day) then
                        compare = .false.
                else if (a%ts%hour < b%ts%hour) then
                        compare = .true.
                else if (a%ts%hour > b%ts%hour) then
                        compare = .false.
                else if (a%ts%minute < b%ts%minute) then
                        compare = .true.
                else if (a%ts%minute > b%ts%minute) then
                        compare = .false.
                else if (a%ts%second < b%ts%second) then
                        compare = .true.
                else if (a%ts%second > b%ts%second) then
                        compare = .false.
                else
                        compare = .false.
                end if

        end function

end module

This is the errors I get while trying to compile it:

$ flang -c data_model.f95 
$ flang -c qsort.f95 
F90-S-0072-Assignment operation illegal to external procedure a (qsort.f95: 79)
F90-S-0076-Subscripts specified for non-array variable a (qsort.f95: 80)
F90-S-0076-Subscripts specified for non-array variable a (qsort.f95: 84)
F90-S-0076-Subscripts specified for non-array variable a (qsort.f95: 85)
F90-S-0076-Subscripts specified for non-array variable a (qsort.f95: 85)
F90-S-0076-Subscripts specified for non-array variable a (qsort.f95: 86)
  0 inform,   0 warnings,   6 severes, 0 fatal for partition
$ 

Edit 1: I have modified the source code with the subroutine based code, which makes more sense as we want to modify the arguments.

Edit 2: modifying the definition of a to type(tape),intent(in out) :: a(:) in both quicksort and partition subroutines make the module to compile without errors – see comments.

Ian Bush
  • 6,996
  • 1
  • 21
  • 27
M.E.
  • 4,955
  • 4
  • 49
  • 128
  • 1
    Your `quicksort` procedure should be a `subroutine` instead of a `function` here, because you are interested in the side effects, not a return value. And in that case, you would call it like this: `call quicksort(a, lo, hi)` – Rodrigo Rodrigues May 22 '21 at 10:45
  • I am modifying the code to use subroutines, both for `partition` and `quicksort`. My understanding is that `compare` can be a function. As a side question, are parameters to functions passed by value or reference in Fortran? -including arrays- – M.E. May 22 '21 at 10:49
  • 1
    Parameters are passed by reference always. In more recent versions of the language (why are you in 1995 anyway??), there is a parameter modifier `value` that... guess what, also pass by reference, but a reference o a copy of the variable, so similar in effect as passing by value – Rodrigo Rodrigues May 22 '21 at 10:52
  • 2
    "Parameters are passed by reference always". Not true, and the standard makes no such guarantee. But any change in a dummy argument will be reflected by a corresponding change in the actual argument, and that is more often that not implemented by passing by reference. – Ian Bush May 22 '21 at 10:58
  • @M.E. Could you include the text of the error in the question, please? – Ian Bush May 22 '21 at 10:59
  • So if I understood it correctly, if I want to modify a specific parameter (such as in this case) I shall use a `subroutine` with the right `intent` clause, and if I use a function, the function shall no modify any of the parameters. – M.E. May 22 '21 at 11:00
  • @IanBush I will edit the question including the subroutine version and the error list (which makes more sense now that it is clear that I have to use subroutines) – M.E. May 22 '21 at 11:04
  • It looks to me as though a should be an array, but you haven't declared it as such anywhere – Ian Bush May 22 '21 at 11:11
  • 1
    Please show how you call `quicksort`, but it's clear that you need to declare the argument `a` as an array. (And decide why it needs to be allocatable.) – francescalus May 22 '21 at 11:12
  • I am not calling it yet, just trying to compile the module. I am expecting to handle an allocatable array of derived type `tape`. The qsort sorts by the derived type `timestamp` embedded in the derived type `tape`. – M.E. May 22 '21 at 11:13
  • @IanBush I was expecting that this `type(tape),allocatable,intent(in out) :: a` was actually declaring that `a` is an allocatable array. But seems that the syntax is not correct. – M.E. May 22 '21 at 11:15
  • 1
    You can pass an allocatable array to a procedure without declaring the argument as `allocatable` in the procedure, it will be handled as a normal array. It only makes sense to declare the argument as `allocatable` if you plan to allocate/deallocate inside the procedure. – Rodrigo Rodrigues May 22 '21 at 11:16
  • 1
    Fortran (2003+) allows allocatable scalars. Arrays always need to be explicitly declared as arrays. You probably want an assumed shape array (`type(tape), intent(in) :: a(:)`.) – francescalus May 22 '21 at 11:16
  • 1
    I see, I think I have to use `a(:)` or `dimension(:)`? – M.E. May 22 '21 at 11:16
  • Ok I have modified the definition to `type(tape),intent(in out) :: a(:)` in both `quicksort` and `partition` and now it compiles without errors. Thanks for the help and patience. I know this was a basic question but it has been super useful. – M.E. May 22 '21 at 11:19
  • 1
    (Your `! DESCRIPTION` reminds me of times when printing was faster when restricted to upper case. 2021, ink jet printers are spewing out more than ten inches per second - full colour.) – greybeard May 22 '21 at 11:24
  • Fortran goes with a touch of vintage. IMHO it improves readability as it helps you to focus on the description flow. You face the risk of being called CAPTAIN CAPS LOCK though. – M.E. May 22 '21 at 11:41
  • I will learn about type-bound operators as that seems easier (and more reusable). – M.E. May 22 '21 at 12:25
  • The `quicksort` subroutine needs the `recursive` attribute as it calls itself. – jack May 22 '21 at 13:39
  • Further side notes: (1) you need `implicit none` only once at the beginning of your module. (2) Quicksort is a very general algorithm and it makes sense to implement it for various data types. This can be achieved by _template programming_ (via preprocessor statements). See for example this [answer](https://stackoverflow.com/a/66078604/10774817). – jack May 22 '21 at 13:43
  • related to @jack comment, is it needed to use `recursive` for quicksort subroutine? I have not included it and it works. – M.E. May 22 '21 at 20:56

2 Answers2

4

I saw that you got unblocked with your problem with the help of the comments, but let me give you some suggestions to make your implementation more modular, easy to use and modern.

Disclaimer: Some of my suggestions might need a more recent Fortran version than 95.

You can improve your timestamp type definition by providing overloads for the relational operators.

type :: timestamp
    integer :: year, month, day, hour = 0, minute = 0, second = 0
contains
    procedure, private :: eq, ne, gt, ge, lt, le
    generic :: operator(==) => eq
    generic :: operator(/=) => ne
    generic :: operator(>) => gt
    generic :: operator(>=) => ge
    generic :: operator(<) => lt
    generic :: operator(<=) => le
end type

(A subtle change there is that I have default values for hour, minute and second. So you can instantiate like this: timestamp(2021,5,22))

To get this working, you just need to provide implementations for the functions eq, ne, gt, ge, lt, le available in the module you define your type. Note that, when writing a generic type bound procedure, you must declare your bound parameter as class(timestamp) instead of type(timestamp).

elemental function lt(left, right) result(result)
    class(timestamp), intent(in) :: left, right
    logical :: result
    result = compare(left, right) < 0
end function

elemental function compare(this, other) result(result)
    class(timestamp), intent(in) :: this, other
    integer :: result
    if (this%year /= other%year) then
        result = sign(1, this%year - other%year)
    else if (this%month /= other%month) then
        result = sign(1, this%month - other%month)
    else if (this%day /= other%day) then
        result = sign(1, this%day - other%day)
    else if (this%hour /= other%hour) then
        result = sign(1, this%hour - other%hour)
    else if (this%minute /= other%minute) then
        result = sign(1, this%minute - other%minute)
    else if (this%second /= other%second) then
        result = sign(1, this%second - other%second)
    else 
        result = 0
    end if
end function

Another good practice you can implement is to control access of your module elements by using public and private.

module data_model
    implicit none
    public :: timestamp, tape
    private
    type :: timestamp
        ! (...)
    end type
    type :: tape
        type(timestamp) :: ts
        integer :: value1, value2
    end type
contains
    ! (...) implementations of eq, ne, gt, ge, lt, le
end

Then, when you use this module from another program unit, only the public names will be available. You can also use only specific name with the use only clause:

module qsort
    use data_model, only: tape
    implicit none
    public :: quicksort
    private
contains
    ! (...) your quicksort implementation
end

Finally, let me suggest some tweaks on your quicksort implementation.

First, I suggest that you don't need to pass around the boundaries lo and hi everywhere together with your array. One of the most distinctive features of Fortran is how easy it is to operate on array segments. You can call the quicksort procedure on a contiguous portion of your array, and the procedure can work on it in a boundaries-agnostic way, if you use assumed-shape arrays, like this: type(tape) :: a(:). Inside the procedure, the array segment is rebounded to start on index 1, no matter what are the bounds on the call site.

Besides that, as I mentioned in the comments, you don't need to declare the array argument as allocatable in this case. Even if the original array you are passing is originally allocatable, you can pass an allocatable array to a procedure without declaring the argument as allocatable in the procedure, it will be handled as a normal array. It only makes sense to declare the argument as allocatable if you plan to allocate/deallocate inside the procedure.

pure recursive subroutine quicksort(a)
    type(tape), intent(inout) :: a(:)
    integer :: p
    if (size(a) == 0) return
    call partition(a, p)
    call quicksort(a(:p-1))
    call quicksort(a(p+1:))
end

I declared this procedure as pure in this case, but that would depend on your specific use case. Making it pure helps me to remind declaring intents correctly and have well-though procedures (and there is a performance gain in some cases), but this brings many restrictions (like not being able to print inside the procedure). You can search for pure procedures to learn more.

Both quicksort and partition are implemented as subroutines here. I like to do this way always that the procedure performs important side-effects, like updates on the passed argument. If I need a returned value, I can have an intent(out) argument, like the argument out in partition, that returns the pivot position.

pure subroutine partition(a, out)
    type(tape), intent(inout) :: a(:)
    integer, intent(out) :: out
    integer :: i, j
    i = 1
    do j = 1, size(a)
        if (a(j)%ts < a(size(a))%ts) then
            call swap(a(i), a(j))
            i = i + 1
        end if
    end do
    call swap(a(i), a(size(a)))
    out = i
end

elemental subroutine swap(a, b)
    type(tape), intent(inout) :: a, b
    type(tape) :: temp
    temp = a
    a = b
    b = temp
end

You may note at a(j)%ts < a(size(a))%ts that I am making use of the overloaded operator < to compare two timestamp. This way, the comparison logic belongs to the same module as the type definition.

Finally, you can use the modules and make some tests on your quicksort implementation!

program main
    use data_model, only: tape, timestamp
    use qsort, only: quicksort
    implicit none

    type(tape) :: a(8) = [ &
        tape(timestamp(2020, 01, 08), 0, 0), &
        tape(timestamp(2021, 01, 30), 0, 0), &
        tape(timestamp(2020, 01, 06), 0, 0), &
        tape(timestamp(2019, 12, 14), 0, 0), & 
        tape(timestamp(2020, 01, 08), 0, 0), &
        tape(timestamp(2020, 05, 05), 0, 0), &
        tape(timestamp(2021, 04, 30), 0, 0), &
        tape(timestamp(2020, 10, 22), 0, 0) &
    ]

    call quicksort(a(3:7)) ! will sort in place, only from index 3 to 7
    call quicksort(a) ! will sort whole array
end

Works like a charm!

Rodrigo Rodrigues
  • 7,545
  • 1
  • 24
  • 36
  • Nice answer! I will probably delete mine (its just about the type-bound operator). The naming and types of dummy arguments of `lt` and `compare` are a bit unclear to me. Why have you not chosen `lt(this, other)` using `class(..) this` and `type(..) other`. Same for `compare(left, right)` using `type(..) left, right`. – jack May 22 '21 at 14:11
  • There is no real need to declare `lt, compare, swap` as `elemental`, is there? Or is it just some kind of best-practice? – jack May 22 '21 at 14:24
  • @jack, just a practice of mine. If something can be elemental, I declare it so. But I agree, not relevant to the OP's questions, though. – Rodrigo Rodrigues May 22 '21 at 14:28
  • Regarding the dummy types: In the functions `compare` and `lt`, having both the bound and the other arguments declared as `class(tape)` makes it possible to compare any two types derived from `tape`. If it is desired/correct or not, depends on the business logic of the type. – Rodrigo Rodrigues May 22 '21 at 14:48
  • However, having them declared as `class(tape)` in `quicksort` and `partition` is plain *wrong* (and i just fixed it, thanks for the tip), because `swap` is not implemented in a class-generic way (it would be possible, bout totally off-scope here). – Rodrigo Rodrigues May 22 '21 at 14:51
  • @jack I would recommend not deleting the other answer. It might be less complete but also helps to understand that functionality about type-bound operators – M.E. May 22 '21 at 20:58
  • Is there any performance drawback for using classes instead of plain derived types? – M.E. May 22 '21 at 20:59
  • @M.E. Reagarding the performance: The usual answer is that there is nothing in the Fortran standard which would indicate any performance drawbacks. Thus, it is compiler dependent and you would need to run tests yourself. I'd guess that you will not be able to time any differences at all... – jack May 24 '21 at 15:26
1

This is not an answer directly related to the quicksort algorithm but rather on how to implement type-bound operators.


You can move the compare function inside the data_model module. This decouples the modules further s.t. the quicksort module only contains the quicksort algorithm.

The compare function can be implemented by a type-bound operator operator(<). The following shows a quick implementation (only for year/month/day) and it should help you to edit your own code accordingly.

module timestamp_m
  implicit none

  private
  public timestamp

  type timestamp
    integer :: y, m, d
  contains
    generic            :: operator(<) => timestamp_lt
    procedure, private :: timestamp_lt
  end type

contains

  logical function timestamp_lt(this, rhs) result(tf)
    !! result of:     this < rhs
    class(timestamp), intent(in) :: this
    type(timestamp),  intent(in) :: rhs

    ! compare year
    if      (this%y < rhs%y) then
      tf = .true.
    else if (this%y > rhs%y) then
      tf = .false.
    else

      ! compare month
      if      (this%m < rhs%m) then
        tf = .true.
      else if (this%m > rhs%m) then
        tf = .false.
      else

        ! compare day
        if (this%d < rhs%d) then
          tf = .true.
        else
          tf = .false.
        end if
      end if
    end if
  end function

end module

You will need to adjust one line in your quicksort module:

module qsort
..
  subroutine quicksort(a, lo, hi)
    ..
    ! if (compare(a(j), pivot)) then      ! OLD. replace by:
    if (a(j)%ts < pivot%ts) then
    ..
jack
  • 1,658
  • 1
  • 7
  • 18