2

Here is a simple C program reading a file in parallel with MPI IO:

#include <stdio.h>
#include <stdlib.h>

#include "mpi.h"

#define N 10

main( int argc, char **argv )
{
    int rank, size;
    MPI_Init(&argc, &argv);
    MPI_Comm_rank( MPI_COMM_WORLD, &rank );
    MPI_Comm_size( MPI_COMM_WORLD, &size );

    int i0 = N *  rank / size;
    int i1 = N * (rank+1) / size;
    printf("rank: %d, i0: %d, i1: %d\n", rank, i0, i1);

    int i;
    double* data = malloc( (i1-i0)*sizeof(double) );
    for (i = 0 ; i < i1-i0 ; i++)
        data[i] = 123.;

    MPI_File f;
    MPI_File_open(MPI_COMM_WORLD, "data.bin", MPI_MODE_RDONLY, 
                  MPI_INFO_NULL, &f);

    MPI_File_set_view(f, i0, MPI_DOUBLE, MPI_DOUBLE, "native",
                      MPI_INFO_NULL);

    MPI_Status status;
    MPI_File_read(f, data, i1-i0, MPI_DOUBLE, &status);

    int count;
    MPI_Get_count(&status, MPI_DOUBLE, &count);
    printf("rank %d, %d value read\n", rank, count);

    for (i = 0 ; i < i1-i0 ; i++) {
        printf("rank: %d index: %d value: %.2f\n", rank, i, data[i]);
    }

    MPI_File_close(&f);

    MPI_Finalize();

    free(data);

    return 0;
}

With one processus:

./read_mpi_io

Values read are correct:

rank: 0, i0: 0, i1: 10
rank 0, 10 value read
rank: 0 index: 0 value: 0.00
rank: 0 index: 1 value: 1.00
rank: 0 index: 2 value: 2.00
rank: 0 index: 3 value: 3.00
rank: 0 index: 4 value: 4.00
rank: 0 index: 5 value: 5.00
rank: 0 index: 6 value: 6.00
rank: 0 index: 7 value: 7.00
rank: 0 index: 8 value: 8.00
rank: 0 index: 9 value: 9.00

But with two processus:

mpirun -n 2 ./read_mpi_io

I get wrong values (zeros):

rank: 0, i0: 0, i1: 5
rank: 1, i0: 5, i1: 10
rank 0, 5 value read
rank: 0 index: 0 value: 0.00
rank 1, 5 value read
rank: 1 index: 0 value: 0.00
rank: 0 index: 1 value: 1.00
rank: 0 index: 2 value: 2.00
rank: 1 index: 1 value: 0.00
rank: 1 index: 2 value: 0.00
rank: 1 index: 3 value: 0.00
rank: 1 index: 4 value: 0.00
rank: 0 index: 3 value: 3.00
rank: 0 index: 4 value: 4.00

What's wrong in my C code?

David Froger
  • 645
  • 6
  • 15

1 Answers1

2

Your problem in in the call to MPI_File_set_view(): it's second argument, the offset to start the view from, is expected in bytes, not in number of elements. So here, you need to multiply your i0 argument by the size of an element you want to read, namely sizeof(double).

Replacing the corresponding line by:

MPI_File_set_view( f, i0 * sizeof( double ), MPI_DOUBLE, MPI_DOUBLE,
                   "native", MPI_INFO_NULL );

Just solves the issue and makes the code to work as expected.

Gilles
  • 9,269
  • 4
  • 34
  • 53