2

I am trying to allocate an array of structs, with each struct also containing dynamic arrays. They will later be communicated via MPI_Sendrecv:

struct cell {
    double a, b, c, *aa, *bb;
} *Send_l, *Send_r;

I want Send_l and Send_r to have count number of elements, the arrays aa and bb should contain sAS number of elements. This is all done after MPI_Init.

void allocateForSendRecv(int count) {
    int sAS = 5;
    int iter = 0;

    Send_l = (struct cell *)malloc(count * (sizeof(struct cell)));
    for (iter = 0; iter < count; iter++) {
        Send_l[iter].aa = (double *)malloc((sAS - 1) * sizeof(double));
        Send_l[iter].bb = (double *)malloc((sAS - 1) * sizeof(double));
    }
    //sAS-1, as sizeof(struct cell) already contains a single (double) for aa and bb.

    Send_r = (struct cell *)malloc(count * (sizeof(struct cell)));
    for (iter = 0; iter < count; iter++) {
        Send_r[iter].aa = (double *)malloc((sAS - 1) * sizeof(double));
        Send_r[iter].bb = (double *)malloc((sAS - 1) * sizeof(double));
    }
}

With this, I can freely allocate, fill and deallocate, however when I call the following, my results diverge from my reference (using all stack arrays).

MPI_Sendrecv(&(Send_r[0]), count, ..., &(Send_l[0]), count, ...)

I haven't found the exact reason, but posts about similar issues made me assume its due to my non-contiguous memory allocation. Ive tried to solve the problem by using a single malloc call, only to get a segmentation fault when I fill my arrays aa and bb:

    Send_l = malloc(count * (sizeof(*Send_l)) + count *(sizeof(*Send_l) + 2 * (sAS - 1) * sizeof(double)));

    Send_r = malloc(count * (sizeof(*Send_r)) + count *(sizeof(*Send_r) + 2 * (sAS - 1) * sizeof(double)));

I have reused some code to allocate 2D arrays and applied it to this struct problem, but haven't been able to make it work. Am I right in assuming that, with a functioning single malloc call and therefore contiguous memory allocation, my MPI_Sendrecv would work fine? Alternatively, would using MPI_Type_create_struct solve my non-contiguous memory problem?

Minimal example (without MPI) of segmentation fault. Using allocateSendRecv, everything is fine. But the single alloc in allocateInOneSendRecv gives me issues.

#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>

struct cell {
    double a, b, c, *aa, *bb;
} *Send_l, *Send_r;

void allocateSendRecv(int count, int sAS);
void fillSendRecv(int count, int sAS);
void freeSendRecv(int count);
void printSendRecv(int count, int sAS);
void allocateInOneSendRecv(int count, int sAS);

int main(int argc, char *argv[])
{
    const int count = 2;
    const int sAS = 9;
    allocateSendRecv(count, sAS);
    //allocateInOneSendRecv(count, sAS);
    fillSendRecv(count, sAS);
    printSendRecv(count, sAS);
    freeSendRecv(count);
    return 0;
}

void allocateSendRecv(int count, int sAS) {
    int iter = 0;

    printf("Allocating!\n");

    Send_r = (struct cell *)malloc(count * (sizeof(struct cell)));
    for (iter = 0; iter < count; iter++) {
        Send_r[iter].aa = (double *)malloc((sAS - 1) * sizeof(double));
        Send_r[iter].bb = (double *)malloc((sAS - 1) * sizeof(double));
    }

    Send_l = (struct cell *)malloc(count * (sizeof(struct cell)));
    for (iter = 0; iter < count; iter++) {
        Send_l[iter].aa = (double *)malloc((sAS - 1) * sizeof(double));
        Send_l[iter].bb = (double *)malloc((sAS - 1) * sizeof(double));
    }
}

void allocateInOneSendRecv(int count, int sAS) {
    printf("Allocating!\n");

    Send_l = malloc(count * (sizeof(*Send_l)) + count *(sizeof(*Send_l) + 2 * (sAS - 1) * sizeof(double)));

    Send_r = malloc(count * (sizeof(*Send_r)) + count *(sizeof(*Send_r) + 2 * (sAS - 1) * sizeof(double)));
}

void freeSendRecv(int count) {
    int iter = 0;

    printf("Deallocating!\n");

    free(Send_r);

    free(Send_l);
}

void fillSendRecv(int count, int sAS) {
    int iter = 0;
    int iter2= 0;
    double dummyDouble = 5.0;

    printf("Filling!\n");

    for (iter = 0; iter < count; iter++) {
        Send_l[iter].a = dummyDouble;
        Send_l[iter].b = dummyDouble;
        Send_l[iter].c = dummyDouble;
        for (iter2 = 0; iter2 < sAS; iter2++) {
            Send_l[iter].aa[iter2] = dummyDouble;
            Send_l[iter].bb[iter2] = dummyDouble;
        }

        dummyDouble++;

        Send_r[iter].a = dummyDouble;
        Send_r[iter].b = dummyDouble;
        Send_r[iter].c = dummyDouble;
        for (iter2 = 0; iter2 < sAS; iter2++) {
            Send_r[iter].aa[iter2] = dummyDouble;
            Send_r[iter].bb[iter2] = dummyDouble;
        }
        dummyDouble++;
    }
}

void printSendRecv(int count, int sAS) {
    int iter = 0;

    printf("Printing!\n");

    for (iter = 0; iter < count; iter++) {
        printf("%f \n", Send_l[iter].a);
        printf("%f \n", Send_l[iter].b);
        printf("%f \n", Send_l[iter].c);
        printf("%f \n", Send_l[iter].aa[sAS - 1]);
        printf("%f \n\n", Send_l[iter].bb[sAS - 1]);

        printf("%f \n", Send_r[iter].a);
        printf("%f \n", Send_r[iter].b);
        printf("%f \n", Send_r[iter].c);
        printf("%f \n", Send_r[iter].aa[sAS - 1]);
        printf("%f \n\n", Send_r[iter].bb[sAS - 1]);
    }
}
chqrlie
  • 131,814
  • 10
  • 121
  • 189
BlubMan
  • 23
  • 4
  • You first need to set the `aa` and `bb` fields (e.g. have them point into the allocated memory), and only after you can populate them. But if applicable, keep it easy and declare `struct cell { double a; double b; double a[sAS-1]; double b[sAS-1];}` – Gilles Gouaillardet Oct 18 '22 at 12:49
  • 2
    If you are still stuck, please upload a [mcve] to your question. – Gilles Gouaillardet Oct 18 '22 at 12:50
  • @Gilles Gouaillardet Ive uploaded a small portion of the src that shows my issue. And unfortunately, aa[sAS-1] and bb[sAS-1] is exactly what i am trying to transition away from – BlubMan Oct 18 '22 at 13:11
  • so you will have to implement `void compute_aa_and_bb(struct cell * Cells, int count, int sAS)`, and invoke it right after `allocateInOneSendRecv()` on the sender size, and right after `MPI_Recv()` on the receiver size. – Gilles Gouaillardet Oct 18 '22 at 13:27
  • `allocateInOneSendRecv()` is the one crashing on `x64-Release`on my machine. Using `allocateSendRecv()` runs ok. The structure only have pointer and double so it should not have problems with alignment. – arfneto Oct 18 '22 at 13:27
  • @Gilles Gouaillardet what should this function do? Should it have aa and bb point to the byte position within Send_l and Send_r. And if so, how what that look like? – BlubMan Oct 18 '22 at 13:38
  • Pointers are not arrays. Arrays are not pointers. – William Pursell Oct 20 '22 at 14:58

3 Answers3

2

Your current problem is that you can only pass the start address of Send_l (resp. Send_r). From that point, all memory has to be contiguous and you must know its total size and give it later to MPI_SendRecv.

But after allocation, you must ensure that aa and bb members are correctly initialized to point inside the allocated bloc of memory.

A possible code could be:

void allocateSendRecv(int count, int subCount) {
    int iter;

    // total size of each struct
    size_t sz = sizeof(struct cell) + 2 * subCount * sizeof(double);

    // one single contiguous allocation
    Send_r = malloc(count * sz); // nota: never cast malloc in C language!

    // per each cell make aa and bb point into the allocated memory
    for (iter = 0; iter < count; iter++) {
        Send_r[iter].aa = ((double*)(Send_r + count)) + 2 *  subCount * iter;
        Send_r[iter].bb = Send_r[iter].aa + subCount;
    }

    // id. for Send_l
    Send_l = malloc(count * sz);
    for (iter = 0; iter < count; iter++) {
        Send_l[iter].aa = ((double*)(Send_l + count)) + 2 * subCount * iter;
        Send_l[iter].bb = Send_l[iter].aa + subCount;
    }
}

Here I have first the array of cell structures and then 1 aa array and 1 bb array per structure in that order.

That is enough to get rid of the segmentation fault...

Serge Ballesta
  • 143,923
  • 11
  • 122
  • 252
  • Thanks, this is working well. For the MPI part: I split the allocation and pointer assignment into two functions. This way I can call the pointer assign again after sending the whole bloc and properly access it, otherwise these pointers run into the same problem was before. – BlubMan Oct 19 '22 at 10:22
  • Do not you think it is dangerous someone along the line `free` one of this inside pointers? The program will crash the same way. Or not? – arfneto Oct 20 '22 at 13:46
  • C language only allows to free blocs that have been allocated with malloc. Any attempt to free anything else ,be it a part of an allocated bloc, or a static or dynamic variable invokes Undefined Behaviour. – Serge Ballesta Oct 20 '22 at 13:51
  • @SergeBallesta this is the reason I wrote about the way you coded this. And --- as the original program --- not including the size of the `struct` in the `struct` itself makes it hard to reconstruct the data at the other side... Each buffer has its own values, as `count` and `subCount` in the OP code. And it dissapears. – arfneto Oct 20 '22 at 16:15
  • 1
    @arfneto: Maybe you could try to guess the size from the size of what is transmitted via `MPI_Sendrecv` (I do not know mpi...) – Serge Ballesta Oct 20 '22 at 17:36
  • it is all we would have anyway @SergeBallesta. For this reason I wrote about the need of encapsulatiing the sizes inside the struct. It the original OP program there is also an array of `count` pairs of pointers of `subCount` cells so even knowing the total size is not enough. In a _cell_ is just 3+2x `doubles`. This is what we do in RPC, in Windows COM, java, MS MFC ... – arfneto Oct 20 '22 at 21:05
0

The single global struct

struct cell
{
    double a, b, c, *aa, *bb;
} * Send_l, *Send_r;

is a bit fragile:

  • aa and bb are allocated as arrays of double but the subCount -1 size is not there. It is buried into the code.
  • Send_l and Send_r are also pointers to arrays of struct cell but the count size is not there. It is also buried into the code. The single struct is global and it is also weak.

This makes hard to test, allocate or free data. I will left a C example using a bit of encapsulation and that you can adapt to your case under MPI. I will use your code and functions with a bit of OOP orientation :)

The example includes 2 programs and functions to serialize and deserialize the data. For testing, the data is written to a file by the 1st program and read back by the second one. The same printSendRecv() shows the data before and after the data is written to disk.

A Cell structure

typedef struct
{
    double  a;
    double  b;
    double  c;
    double* aa; 
    double* bb;

} Cell;

The Send structure

typedef struct
{
    Cell     l;
    Cell     r;

} Send;

The Set structure


typedef struct
{
    unsigned count;
    unsigned subCount;
    Send*    send;

} Set;

So a Set has all that is needed to describe its contents.

function prototypes

Set* allocateSendRecv(size_t, size_t);
int  fillSendRecv(Set*);
Set* freeSendRecv(Set*);
int  printSendRecv(Set*, const char*);

Using encapsulation and a bit of RAII from C++ you can rewrite allocateSendRecv() and freeSendRecv() as constructor and destructor of the struct as:

Set* allocateSendRecv(size_t count, size_t subCount)
{
    // count is the number of send buffers
    // subcount is the size of the arrays inside each cell
    printf(
        "AllocateSendRecv(count = %llu, subCount = %llu)\n", count,
        subCount);
    Set* nw      = (Set*)malloc(sizeof(Set));
    nw->count    = count;
    nw->subCount = subCount;
    nw->send     = (Send*)malloc(count * sizeof(Send));
    // now that we have Send allocate the Cell arrays
    for (size_t i = 0; i < count; i++)
    {
        nw->send[i].l.aa =
            (double*)malloc(subCount * sizeof(double));
        nw->send[i].l.bb =
            (double*)malloc(subCount * sizeof(double));
        nw->send[i].r.aa =
            (double*)malloc(subCount * sizeof(double));
        nw->send[i].r.bb =
            (double*)malloc(subCount * sizeof(double));
    }
    return nw;
}

Set* freeSendRecv(Set* set)
{
    if (set == NULL) return NULL;
    printf(
        "\nDeallocating(count = %llu, subCount = %llu)\n",
        set->count, set->subCount);

    for (size_t i = 0; i < set->count; i++)
    {
        free(set->send[i].l.aa);
        free(set->send[i].l.bb);
    }
    free(set->send);
    free(set);
    return NULL;
}

Writing this way the tst pointer is invalidated in the call to freeSendRecv(). In this case tst is allocated with count and subCount as 2 and 5 and this goes inside the Set. fillSendRecv() uses incremental fill values to make it easy to pinpoint some eventual displacement. printSendRecv() accpets a string for an optional message. Values are printed before and after the creation of the Set.

Example: serialize and deserialize a buffer

serialize()

In order to write to disk or to transmit the data first aa and bb arrays must be expanded. The example uses v2-out x y 4 file to create and show a struct using these values and then write if to file

int  main(int argc, char** argv)
{
    char   f_name[256] = {0};
    if (argc < 3) usage();
    strcpy(f_name, argv[3]);
    size_t count    = atoll(argv[1]);
    size_t subCount = atoll(argv[2]);

    Set* tst = allocateSendRecv(count,subCount);
    fillSendRecv(tst);
    printSendRecv(tst, "printSendRecv():    ");
    to_disk(tst, f_name);
    tst = freeSendRecv(tst);
    return 0;
}

These functions take a Set and write to a file:

int    to_disk(Set*, const char*);
int    write_cell(Cell*, const size_t, FILE*);

deserialize()

Since the Set has all that is needed to recreate the Set just the file name is needed. The example uses v2-in file to read back the data from file and show it on screen

int  main(int argc,char** argv)
{
    char f_name[256] = {0};
    if (argc < 2) usage();
    strcpy(f_name, argv[1]);

    Set* tst = from_disk(f_name);
    printSendRecv(tst, "As read from disk:    ");
    tst = freeSendRecv(tst);
    return 0;
}

These functions read a file and return a pointer to a Set with the data:

Set*   from_disk(const char*);
int    read_cell(FILE*, Cell*, const size_t);

output of an example

Here the programs are

  • v2-out to create a Set and write to a file in disk
  • v2-in to read a file created by v2-out and load into a new Set
  • dump.bin is created and Set has count = 2 and subCount = 4
PS C:\SO>
PS C:\SO> .\v2-out 2 4 dump-2-4.bin
AllocateSendRecv(count = 2, subCount = 4)
FillSendRecv()
printSendRecv():        Count is 2, subCount is 4
        Set 1 of 2
        l:
        [a,b,c] = [    42.001,    42.002,    42.003]
        aa:     42.004     42.005     42.006     42.007
        bb:     42.008     42.009     42.010     42.011

        r:
        [a,b,c] = [    42.012,    42.013,    42.014]
        aa:     42.015     42.016     42.017     42.018
        bb:     42.019     42.020     42.021     42.022


        Set 2 of 2
        l:
        [a,b,c] = [    42.023,    42.024,    42.025]
        aa:     42.026     42.027     42.028     42.029
        bb:     42.030     42.031     42.032     42.033

        r:
        [a,b,c] = [    42.034,    42.035,    42.036]
        aa:     42.037     42.038     42.039     42.040
        bb:     42.041     42.042     42.043     42.044



writing 'Set' to "dump-2-4.bin"

Deallocating(count = 2, subCount = 4)



PS C:\SO> .\v2-in dump-2-4.bin

read 'Set' from "dump-2-4.bin"
From disk: Count = 2, SubCount = 4
AllocateSendRecv(count = 2, subCount = 4)
new 'Set' created
As read from disk:        Count is 2, subCount is 4
        Set 1 of 2
        l:
        [a,b,c] = [    42.001,    42.002,    42.003]
        aa:     42.004     42.005     42.006     42.007
        bb:     42.008     42.009     42.010     42.011

        r:
        [a,b,c] = [    42.012,    42.013,    42.014]
        aa:     42.015     42.016     42.017     42.018
        bb:     42.019     42.020     42.021     42.022


        Set 2 of 2
        l:
        [a,b,c] = [    42.023,    42.024,    42.025]
        aa:     42.026     42.027     42.028     42.029
        bb:     42.030     42.031     42.032     42.033

        r:
        [a,b,c] = [    42.034,    42.035,    42.036]
        aa:     42.037     42.038     42.039     42.040
        bb:     42.041     42.042     42.043     42.044




Deallocating(count = 2, subCount = 4)

The example in 2 files

a header v2.h

#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
typedef struct
{
    double  a;
    double  b;
    double  c;
    double* aa;
    double* bb;

} Cell;

typedef struct
{
    Cell l;
    Cell r;

} Send;

typedef struct
{
    size_t count;
    size_t subCount;
    Send*    send;

} Set;

Set* allocateSendRecv(size_t, size_t);
int  fillSendRecv(Set*);
Set* freeSendRecv(Set*);
int  printSendRecv(Set*, const char*);
// helpers
Set*   from_disk(const char*);
double get_next(void);
int    print_cell(Cell*, size_t, const char*);
int    read_cell(FILE*, Cell*, const size_t);
int    to_disk(Set*, const char*);
int    write_cell(Cell*, const size_t, FILE*);

code in file v2.c

#include "v2.h"
#include <stdio.h>
#pragma pack(show)

Set* allocateSendRecv(size_t count, size_t subCount)
{
    // count is the number of send buffers
    // subcount is the size of the arrays inside each cell
    printf(
        "AllocateSendRecv(count = %llu, subCount = %llu)\n", count,
        subCount);
    Set* nw      = (Set*)malloc(sizeof(Set));
    nw->count    = count;
    nw->subCount = subCount;
    nw->send     = (Send*)malloc(count * sizeof(Send));
    // now that we have Send allocate the Cell arrays
    for (size_t i = 0; i < count; i++)
    {
        nw->send[i].l.aa =
            (double*)malloc(subCount * sizeof(double));
        nw->send[i].l.bb =
            (double*)malloc(subCount * sizeof(double));
        nw->send[i].r.aa =
            (double*)malloc(subCount * sizeof(double));
        nw->send[i].r.bb =
            (double*)malloc(subCount * sizeof(double));
    }
    return nw;
}

int fillSendRecv(Set* s)
{
    printf("FillSendRecv()\n");
    if (s == NULL) return -1;

    for (size_t i = 0; i < s->count; i += 1)
    {
        // l
        s->send[i].l.a = get_next();
        s->send[i].l.b = get_next();
        s->send[i].l.c = get_next();
        for (size_t j = 0; j < s->subCount; j += 1)
            s->send[i].l.aa[j] = get_next();
        for (size_t j = 0; j < s->subCount; j += 1)
            s->send[i].l.bb[j] = get_next();

        // r
        s->send[i].r.a = get_next();
        s->send[i].r.b = get_next();
        s->send[i].r.c = get_next();
        for (size_t j = 0; j < s->subCount; j += 1)
            s->send[i].r.aa[j] = get_next();
        for (size_t j = 0; j < s->subCount; j += 1)
            s->send[i].r.bb[j] = get_next();
    }
    return 0;
}

Set* freeSendRecv(Set* set)
{
    if (set == NULL) return NULL;
    printf(
        "\nDeallocating(count = %llu, subCount = %llu)\n",
        set->count, set->subCount);

    for (size_t i = 0; i < set->count; i++)
    {
        free(set->send[i].l.aa);
        free(set->send[i].l.bb);
    }
    free(set->send);
    free(set);
    return NULL;
}

int printSendRecv(Set* s, const char* msg)
{
    if (s == NULL) return -1;
    if (msg != NULL) printf("%s", msg);

    printf(
        "    Count is %llu, subCount is %llu\n", s->count,
        s->subCount);
    for (size_t i = 0; i < s->count; i += 1)
    {
        printf("\tSet %llu of %llu\n", 1 + i, s->count);
        print_cell(&s->send[i].l, s->subCount, "\tl:\n");
        print_cell(&s->send[i].r, s->subCount, "\tr:\n");
        printf("\n");
    }
    printf("\n");
    return 0;
}

// helpers

Set* from_disk(const char* file)
{
    printf("read 'Set' from \"%s\"\n", file);
    FILE* in = fopen(file, "rb");
    if (in == NULL) return NULL;
    size_t res = 0;

    size_t count      = 0;
    res               = fread(&count, sizeof(count), 1, in);
    size_t subCount   = 0;
    res = fread(&subCount, sizeof(subCount), 1, in);
    printf("From disk: Count = %llu, SubCount = %llu\n",
        count,subCount);
    Set* nw = allocateSendRecv(count, subCount);
    if (nw == NULL)
    {
        fclose(in);
        return NULL;  // could not alloc
    }
    printf("new 'Set' created\n");
    nw->count    = count;
    nw->subCount = subCount;

    // so we have the exact structure to hold ALL data
    for (size_t i = 0; i < nw->count; i += 1)
    {
        read_cell(in, &nw->send[i].l, nw->subCount);
        read_cell(in, &nw->send[i].r, nw->subCount);
    }
    fclose(in);
    return nw;
}

double get_next(void)
{
    static double ix = 42.;
    ix += .001;
    return ix;
}

int print_cell(Cell* cell, size_t sz, const char* msg)
{
    printf(
        "%s\t[a,b,c] = [%10.3f,%10.3f,%10.3f]\n", msg,
        cell->a, cell->b, cell->c);
    printf("\taa: ");
    for (size_t j = 0; j < sz; j += 1)
        printf("%10.3f ", cell->aa[j]);
    printf("\n\tbb: ");
    for (size_t j = 0; j < sz; j += 1)
        printf("%10.3f ", cell->bb[j]);
    printf("\n\n");
    return 0;
}

int read_cell(FILE* in, Cell* cell, const size_t size)
{
    if (in == NULL) return -2;
    if (cell == NULL) return -1;
    size_t res = 0;
    // a,b,c,aa,bb
    res += fread(&cell->a, 1, 3 * sizeof(double), in);
    res += fread(cell->aa, 1, size * sizeof(double), in);
    res += fread(cell->bb, 1, size * sizeof(double), in);
    return 0;
}

int to_disk(Set* set, const char* file)
{
    printf("writing 'Set' to \"%s\"\n", file);
    FILE* out = fopen(file, "wb");
    if (out == NULL) return -1;
    size_t res = 0;
    res = fwrite(&set->count, sizeof(set->count), 1, out);
    res = fwrite(&set->subCount, sizeof(set->subCount), 1, out);
    for (size_t i = 0; i < set->count; i += 1)
    {
        write_cell(&set->send[i].l, set->subCount, out);
        write_cell(&set->send[i].r, set->subCount, out);
    }
    fclose(out);
    return 0;
}

int write_cell(Cell* cell, const size_t size, FILE* out)
{
    if (cell == NULL) return -1;
    if (out == NULL) return -2;
    size_t res = 0;
    // a,b,c, aa, bb
    res += fwrite(&cell->a, 1, 3 * sizeof(double), out);
    res += fwrite(cell->aa, 1, size * sizeof(double), out);
    res += fwrite(cell->bb, 1, size * sizeof(double), out);
    //printf("write_cell(): %llu bytes written to disk\n", res);
    return 0;
}

main() for the 2 examples is above in text

casting the return for malloc()

Yes, I always cast the return of malloc() as I and many others do no like anything implicit. And also because malloc() accepts any expression that evaluates to a size an lloking at the expression not always say something about the area. Many times the program allocates data for many structures, some enclosed. This little program has 3. So using the cast works as a reminder for the programmmers of what the program intends to allocate, and can avoid many bugs, since the expression many times is not sufficient to show what is what.
This thing about malloc() and cast comes from the C-FAQ, an old never-updated thing that is a compilation of articles from usenet all dating before 2000. And even in that time people wrote there about the possible reasons to CAST the pointer. One of the reason pro-casting in the (C-FAQ)[https://c-faq.com/malloc/sd3.html] is that it could alert the programmer for have forgotten to use an include for stdlib.h. I mean it:

Suppose that you call malloc but forget to #include <stdlib.h>.
The compiler is likely to assume that malloc is a function
returning int, which is of course incorrect, and will lead to trouble
Therefore, the seemingly redundant casts are used by people who are
(a) concerned with portability to all pre-ANSI compilers, or
(b) of the opinion that implicit conversions are a bad thing.

I would add the reason I described above.

arfneto
  • 1,227
  • 1
  • 6
  • 13
  • I am afraid you are missing the point. The end goal is to send/receive an array of `struct cell` with MPI **in one shot**, and the easiest way of achieving this is to have all the data contiguous (e.g. no jagged arrays or similar). – Gilles Gouaillardet Oct 19 '22 at 00:29
  • Imagine you have to split your program into two different ones. First program: right after `fillSendRecv()`, dump all data used by `tst` to a file with a **single** libc `write(...)` call and then `printSendRecv()`. Second program: reads this file and `printSendRecv()`. Both outputs should be identical. Can you do that? – Gilles Gouaillardet Oct 19 '22 at 02:12
  • 1
    Do not assume I did not read the code you posted. Let me ask you this: why do you use more than one `malloc()` to allocate contiguous memory? – Gilles Gouaillardet Oct 19 '22 at 03:00
  • I added an example and functions to serialize and deserialize the buffer. 50 lines more but can be more useful. – arfneto Oct 19 '22 at 18:28
  • You failed to write all the data used by `tst` to a file with a **single** libc `write()` call as I challenged you (if you'd rather use `fwrite()`, then be my guest). The whole point was to allocate all the data in contiguous memory so it can be sent/received via MPI (or written/read to/from a file to keep things easier) **in one shot** in order not to add any performance/memory overhead. So called serialization/deserialization was clearly not the expected answer here. – Gilles Gouaillardet Oct 19 '22 at 23:55
  • this is not a game. I wrote for free as an example only. And not as your challenge, sorry man. The structure has pointers inside and the data must be serialized as you should know or learn – arfneto Oct 20 '22 at 00:46
  • look at the other two answers, they do not require any serialization. – Gilles Gouaillardet Oct 20 '22 at 01:09
  • Maybe you know this is only a minimal reproducible example (MRE)? You can for sure for example put the 3 `double ` inside the allocated area and have all in a single space. But it is to change the input and it can many many times not be possible. Serialization (as implemented everywhere) is the simple way and so an example using this can be more useful – arfneto Oct 20 '22 at 13:52
  • In High Performance Computing, serialization should be avoided as much as possible, and it is possible here. But if this is only the best you can do, then take this opportunity to step up your game. – Gilles Gouaillardet Oct 20 '22 at 14:05
0

You can use anonymous struct but it has some caveats:

#define CELL(n) \
struct { \
    double a, b, c, aa[n], bb[n]; \
}

the limitations are you need to cannot use global variables as is, and you have to pass void * to subroutines (and then cast inside the body). If you need global variables, you can only use pointers declared as void *

For example

#include <stdio.h>
#include <stdlib.h>

#define CELL(n) \
struct { \
    double a, b, c, aa[n], bb[n]; \
}

void * Send_r;
void * Send_l;

void * allocateCells(int count, int sAS) {
    return malloc (count * sizeof(CELL(sAS))); // no cast here
}

void fillCells(void * _cells, int count, int sAS, double dummyDouble) {
    int iter = 0;
    int iter2= 0;

    printf("Filling!\n");

    CELL(sAS) * cells = _cells;

    for (iter = 0; iter < count; iter++) {
        cells[iter].a = dummyDouble;
        cells[iter].b = dummyDouble;
        cells[iter].c = dummyDouble;
        for (iter2 = 0; iter2 < sAS; iter2++) {
            cells[iter].aa[iter2] = dummyDouble;
            cells[iter].bb[iter2] = dummyDouble;
        }
    }
}

void dumpCells(void * _cells, int count, int sAS, char *file) {
    FILE *fd = fopen(file, "w");
    CELL(sAS) * cells = _cells;
    fwrite(cells, sizeof(*cells), count, fd);
    fclose(fd);
}

int main(int argc, char *argv[]) {
    int sAS = 5;
    int count1 = 10;
    Send_r = allocateCells(count1, sAS);
    fillCells(Send_r, count1, sAS, 5.0);
    dumpCells(Send_r, count1, sAS, "1.bin");
    int sAS2 = 20;
    int count2 = 30;
    Send_l = allocateCells(count2, sAS2);
    fillCells(Send_l, count2, sAS2, 6.0);
    dumpCells(Send_l, count2, sAS2, "2.bin");
}
Gilles Gouaillardet
  • 8,193
  • 11
  • 24
  • 30
  • Did you test this? In which compiler? What is bb[]? How do you know its size at the time of use? – arfneto Oct 20 '22 at 13:41
  • `gcc (GCC) 8.4.1 20200928 (Red Hat 8.4.1-1)` – Gilles Gouaillardet Oct 20 '22 at 13:54
  • And how you and gcc 8.4 know the size of `bb[]` in order to maybe write the `struct` contents to disk? And even if it is possible what if a few of these things are allocated with different `sAS`? – arfneto Oct 20 '22 at 13:58
  • I fixed that typo. – Gilles Gouaillardet Oct 20 '22 at 14:02
  • and declared `count` and corrected the functions names after that. But still it is of little use since the value of `sAS` can not be changed inside the program. Not so good for high performance computing. – arfneto Oct 20 '22 at 14:37
  • wrong, `sAS` can be changed and does not even have to be known at compile time. – Gilles Gouaillardet Oct 20 '22 at 14:43
  • compile time? This has nothing to do with compile time. Test your code. You can not change `sAS` and call this function again. At run time the area is allocated based on the value of `sAS` so it can not be allocated again with another value. Also if you write this to disk on reading how would you know the value used when recorded? – arfneto Oct 20 '22 at 14:56
  • check the updated program. and let it go. – Gilles Gouaillardet Oct 20 '22 at 15:15
  • I will tell you again: you can not change `sAS` and allocate new structures in the same program. And you can not recreate the structure back since this value is not known anywhere. I will not write _deserialize_ since you say it is not HPC level. – arfneto Oct 20 '22 at 15:41
  • also the `define` used is equivalent to the expression `(3 + sAS + sAS) * sizeof(double)`. I will not write about coding standards, but using this `define` implies that your compiler and company accepts the use of `VLA`. Even if the use of a macro is mandatory probably `#define CELL_SIZE(n) sizeof(struct{double a, b, c, aa[n], bb[n];})` or some similar would have better chance in a code review. – arfneto Oct 20 '22 at 15:50
  • You are arguing for the sake of arguing and you stopped making sense long time ago. Bye. – Gilles Gouaillardet Oct 20 '22 at 22:29
  • your program does not work, for the reasons I told you. Your arguments do not stand for the same reasons. Do you think my arguments are wrong? good: write code. Post here arguments, code reversing a buffer to the original. You can even learn something. Bye! – arfneto Oct 21 '22 at 03:25