I am learning MPI one sided communications introduced in MPI-2/MPI-3, and came across this online course page about MPI_Accumulate
:
MPI_Accumulate allows the caller to combine the data moved to the target process with data already present, such as accumulation of a sum at a target process. The same functionality could be achieved by using MPI_Get to retrieve data (followed by synchronization); performing the sum operation at the caller; then using MPI_Put to send the updated data back to the target process. Accumulate simplifies this messiness ...
But there are only a limited number of operations that are allowed to be used with MPI_Accumulate
(max, min, sum, product etc.), and user-defined operations are not allowed. I was wondering how to implement the above mentioned messiness, using MPI_Get
, sync, op, and MPI_Put
. Are there any tutorials or working code examples in C/C++?
Thanks
In order to test, I adapted the a piece of code from this SO question, in which one sided communication is used to create an integer counter that is kept synchronized across MPI processes. The target problem line using MPI_Accumulate
is marked.
The code compiles as is and returns in about 15 seconds. But when I tried to replace MPI_Accumulate
with an equivalent sequence of basic operations as shown in the comment block immediately after the problem line, the compiled program hangs indefinitely.
Can anyone please help explain what went wrong, and
what's the right way to replace MPI_Accumulate
in this context?
P.S. I compiled the code with
g++ -std=c++11 -I.. mpistest.cpp -lmpi
and executed the binary with
mpiexec -n 4 a.exe
Code:
//adpated from https://stackoverflow.com/questions/4948788/
#include <mpi.h>
#include <stdlib.h>
#include <stdio.h>
#include <thread>
#include <chrono>
struct mpi_counter_t {
MPI_Win win;
int hostrank; //id of the process that host values to be exposed to all processes
int rank; //process id
int size; //number of processes
int val;
int *hostvals;
};
struct mpi_counter_t *create_counter(int hostrank) {
struct mpi_counter_t *count;
count = (struct mpi_counter_t *)malloc(sizeof(struct mpi_counter_t));
count->hostrank = hostrank;
MPI_Comm_rank(MPI_COMM_WORLD, &(count->rank));
MPI_Comm_size(MPI_COMM_WORLD, &(count->size));
if (count->rank == hostrank) {
MPI_Alloc_mem(count->size * sizeof(int), MPI_INFO_NULL, &(count->hostvals));
for (int i=0; i<count->size; i++) count->hostvals[i] = 0;
MPI_Win_create(count->hostvals, count->size * sizeof(int), sizeof(int),
MPI_INFO_NULL, MPI_COMM_WORLD, &(count->win));
}
else {
count->hostvals = NULL;
MPI_Win_create(count->hostvals, 0, 1,
MPI_INFO_NULL, MPI_COMM_WORLD, &(count->win));
}
count -> val = 0;
return count;
}
int increment_counter(struct mpi_counter_t *count, int increment) {
int *vals = (int *)malloc( count->size * sizeof(int) );
int val;
MPI_Win_lock(MPI_LOCK_EXCLUSIVE, count->hostrank, 0, count->win);
for (int i=0; i<count->size; i++) {
if (i == count->rank) {
MPI_Accumulate(&increment, 1, MPI_INT, 0, i, 1, MPI_INT, MPI_SUM,count->win); //Problem line: increment hostvals[i] on host
/* //Question: How to correctly replace the above MPI_Accumulate call with the following sequence? Currently, the following causes the program to hang.
MPI_Get(&vals[i], 1, MPI_INT, 0, i, 1, MPI_INT, count->win);
MPI_Win_fence(0,count->win);
vals[i] += increment;
MPI_Put(&vals[i], 1, MPI_INT, 0, i, 1, MPI_INT, count->win);
MPI_Win_fence(0,count->win);
//*/
} else {
MPI_Get(&vals[i], 1, MPI_INT, 0, i, 1, MPI_INT, count->win);
}
}
MPI_Win_unlock(0, count->win);
//do op part of MPI_Accumulate's work on count->rank
count->val += increment;
vals[count->rank] = count->val;
//return the sum of vals
val = 0;
for (int i=0; i<count->size; i++)
val += vals[i];
free(vals);
return val;
}
void delete_counter(struct mpi_counter_t **count) {
if ((*count)->rank == (*count)->hostrank) {
MPI_Free_mem((*count)->hostvals);
}
MPI_Win_free(&((*count)->win));
free((*count));
*count = NULL;
return;
}
void print_counter(struct mpi_counter_t *count) {
if (count->rank == count->hostrank) {
for (int i=0; i<count->size; i++) {
printf("%2d ", count->hostvals[i]);
}
puts("");
}
}
int main(int argc, char **argv) {
MPI_Init(&argc, &argv);
const int WORKITEMS=50;
struct mpi_counter_t *c;
int rank;
int result = 0;
c = create_counter(0);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
srand(rank);
while (result < WORKITEMS) {
result = increment_counter(c, 1);
if (result <= WORKITEMS) {
printf("%d working on item %d...\n", rank, result);
std::this_thread::sleep_for (std::chrono::seconds(rand()%2));
} else {
printf("%d done\n", rank);
}
}
MPI_Barrier(MPI_COMM_WORLD);
print_counter(c);
delete_counter(&c);
MPI_Finalize();
return 0;
}
One additional question, should I use MPI_Win_fence
here instead of locks?
--EDIT--
I used lock/unlock in increment_counter
as follows, the program runs but behaves strangely. In the final print out, the master node does all the work. Still confused.
int increment_counter(struct mpi_counter_t *count, int increment) {
int *vals = (int *)malloc( count->size * sizeof(int) );
int val;
MPI_Win_lock(MPI_LOCK_EXCLUSIVE, count->hostrank, 0, count->win);
for (int i=0; i<count->size; i++) {
if (i == count->rank) {
//MPI_Accumulate(&increment, 1, MPI_INT, 0, i, 1, MPI_INT, MPI_SUM,count->win); //Problem line: increment hostvals[i] on host
///* //Question: How to correctly replace the above MPI_Accumulate call with the following sequence? reports that 0 does all the work
MPI_Get(&vals[i], 1, MPI_INT, 0, i, 1, MPI_INT, count->win);
MPI_Win_unlock(0, count->win);
vals[i] += increment;
MPI_Put(&vals[i], 1, MPI_INT, 0, i, 1, MPI_INT, count->win);
MPI_Win_lock(MPI_LOCK_EXCLUSIVE, count->hostrank, 0, count->win);
//*/
} else {
MPI_Get(&vals[i], 1, MPI_INT, 0, i, 1, MPI_INT, count->win);
}
}
MPI_Win_unlock(0, count->win);
//do op part of MPI_Accumulate's work on count->rank
count->val += increment;
vals[count->rank] = count->val;
//return the sum of vals
val = 0;
for (int i=0; i<count->size; i++)
val += vals[i];
free(vals);
return val;
}