1

I've this two structs:

struct dataUnit{
int id;
int N;
int *data;
} typedef dataUnit;

struct dataStruct{
    double threshold;
    int numOfPic;
    int numOfObj;
    // int currPic; --> Probably required
    dataUnit *pictures;
    dataUnit *objects;
} typedef dataStruct;

My mission here is to find matching of the objects inside each picture. I'm doing it using findMatchesInImage:

findMatchesInImage(dataUnit picture, dataUnit *objects, int numOfObj, double thresh)

I want to parallelize the calls to findMatchesInImage using MPI so that each available process will work on its own image.

My problem is with sending the data. I know that I've made my life harder here with the way I've defined my structs since

MPI is designed to use arrays of structures rather than structures of arrays.

Although I'm open to data structure change suggestions, it'll be hard for me to transform the rest of my code that already uses this structure.

How can I currently send the data? I was thinking about sending the data in two batches, but I'm still unsure what's best.

MRE:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>

struct dataUnit
{
    int id;
    int N;
    int *data;
} typedef dataUnit;

struct dataStruct
{
    double threshold;
    int numOfPic;
    int numOfObj;
    dataUnit *pictures;
    dataUnit *objects;
} typedef dataStruct;

struct match
{
    int objID;
    int i;
    int j;
} typedef match;

struct ImageMatches
{
    match matches[3];
    int numOfMatches;
    int picID;
} typedef ImageMatches;

dataUnit readDataUnit(FILE *fp)
{
    char line[4000];
    dataUnit *currDataUnit = malloc(sizeof(dataUnit));
    currDataUnit->id = (int)strtol(fgets(line, 4000, fp), NULL, 10);
    currDataUnit->N = (int)strtol(fgets(line, 4000, fp), NULL, 10);
    int totalSize = currDataUnit->N * currDataUnit->N;
    currDataUnit->data = malloc(sizeof(int) * totalSize);
    for (int i = 0; i < currDataUnit->N; i++)
    {
        int j = 0;
        fgets(line, 4000, fp);
        char *token = strtok(line, " ");
        while (token && j < currDataUnit->N)
        {
            currDataUnit->data[i * currDataUnit->N + j] = (int)strtol(token, NULL, 10);
            token = strtok(NULL, " ");
            j++;
        }
    }
    return *currDataUnit;
}

void readData(char *path, dataStruct *data)
{
    FILE *fp;
    char line[4000];

    fp = fopen(path, "r");
    if (fp == NULL)
        exit(EXIT_FAILURE);

    int firstLine = 1;
    int numOfPic = 0, numOfObj = 0;
    while (fgets(line, 4000, fp) != NULL)
    {
        if (firstLine)
        {
            data->threshold = strtod(line, NULL);
            firstLine = 0;
        }
        else if (numOfPic == 0)
        {
            numOfPic = (int)strtol(line, NULL, 10);
            data->numOfPic = numOfPic;
            data->pictures = malloc(sizeof(dataUnit) * numOfPic);
            for (int i = 0; i < numOfPic; i++)
            {
                data->pictures[i] = readDataUnit(fp);
            }
        }
        else if (numOfObj == 0)
        {
            numOfObj = (int)strtol(line, NULL, 10);
            data->numOfObj = numOfObj;
            data->objects = malloc(sizeof(dataUnit) * numOfObj);
            for (int i = 0; i < numOfObj; i++)
            {
                data->objects[i] = readDataUnit(fp);
            }
        }
    }

    fclose(fp);
}

int findMatchInArea(dataUnit *picture, dataUnit *object, int i, int j, double thresh)
{
    double score = 0;
    int totalSize = (object->N) * (object->N);

    for (int idx = 0; idx < totalSize; idx++)
    {
        int row = idx / object->N;
        int col = idx % object->N;
        int picVal = picture->data[(row + i) * picture->N + (col + j)];
        int objVal = object->data[row * object->N + col];
        score += fabs((picVal - objVal) / (double)picVal);
    }

    return (score / totalSize) <= thresh ? 1 : 0;
}

ImageMatches *findMatchesInImage(dataUnit *picture, dataUnit *objects, int numOfObj, double thresh)
{
    ImageMatches *imageMatches = calloc(1, sizeof(ImageMatches));
    imageMatches->picID = picture->id;

    for (int i = 0; i < numOfObj; i++)
    {
        int windowsLength = picture->N - objects[i].N + 1;
        int numOfWindows = windowsLength * windowsLength;
        for (int j = 0; j < numOfWindows; j++)
        {
            if (findMatchInArea(picture, &(objects[i]), j / windowsLength, j % windowsLength, thresh))
            {
                imageMatches->matches[imageMatches->numOfMatches].objID = objects[i].id;
                imageMatches->matches[imageMatches->numOfMatches].i = j / windowsLength;
                imageMatches->matches[imageMatches->numOfMatches].j = j % windowsLength;
                imageMatches->numOfMatches++;
            }
        }
        if (imageMatches->numOfMatches == 3)
        {
            return imageMatches;
        }
    }
    return imageMatches;
}
int main(int argc, char *argv[])
{
    dataStruct data;
    readData(argv[1], &data);
    ImageMatches imageMatches[data.numOfPic];

    for (int i = 0; i < data.numOfPic; i++)
    {
        imageMatches[i] = *findMatchesInImage(&(data.pictures[i]), data.objects, data.numOfObj, data.threshold);
    }

    return 0;
}

minimal input (Should be a .txt file passed as argument):

0.1
2
1
5
42  68  35   1  70
34  16  40  59   5
10  17  36  52   1
77  30  69  93  26
74  71  64  69  93
2
4
85  94  95   6
97  59  87  21
29  32  42  24
6  40  52  74
1
1
3
19  79  23
10  40  66
55  37  59

0.1 - threshold

2 - number of images

1/2 - image id followed by image sizes (5/4) and images

1 - number of objects

1 - object id followed by object size and the object

RedYoel
  • 302
  • 2
  • 16
  • _Side note:_ `struct foo { ... } typedef foo;` [although legal] isn't very idiomatic. This is the first time I've ever seen it used [and others will have trouble in understanding it]. Better to use (e.g.) `typedef struct foo { ... } foo;` – Craig Estey Mar 21 '23 at 19:26
  • _Side note:_ Passing a `struct` by _value_ [again, although legal] isn't used very much. It involves excess copying (of the data from the struct to an area on the stack) and doesn't scale too well. Better to pass a _pointer_ to the struct. To see the scaling issue, add (e.g.) `int foo[1000000000];` to the struct definition. A pointer can handle this just fine. If you want the struct to be R/O, just do: `const struct foo *ptr` – Craig Estey Mar 21 '23 at 19:29
  • We really need to see more of your code, with the `MPI*` calls you _do_ have. It is certainly possible to send your structs and the data, but you'll need separate `MPI_Send` calls. One for each struct (with count 1). And, then, separate calls for (e.g. `data` with count `N`). You send the structs first, so the receiver knows how large the array it will receive next will be (and can `malloc` space for it). Also, you may want `MPI_Scatter/MPI_Gather` instead of just `MPI_Send/MPI_Recv` [unless you do the splitting of the data for each node manually]. – Craig Estey Mar 21 '23 at 19:38
  • @CraigEstey I don't have any MPI calls yet. Is there any other part you'd like to see? – RedYoel Mar 23 '23 at 09:05
  • The _code_ that generates/allocates/links the structs you have. And, code that shows how to traverse the data. How many instances of each? Is `dataStruct` a "singleton"? That is, a single instance that is a "master" global that points to the `dataUnit` arrays. Or, do you have an _array_ of `dataStruct`s? Please _edit_ your _question_ I'm guessing that you don't have too much code yet, so post an [MRE](https://stackoverflow.com/help/minimal-reproducible-example). It should be downloadable and compile cleanly – Craig Estey Mar 23 '23 at 18:07
  • @CraigEstey I've added a shorter version of what I've got, including an input sample. – RedYoel Mar 25 '23 at 09:37
  • TL;DR MPI works best when the data is contiguous. If this is not doable, the next option is to manually pack/unpack data into contiguous buffers before/after sending/receiving it. Unless the memory and/or performance overhead is a too high, then go back to square one and reconsider data structure. – Gilles Gouaillardet Mar 25 '23 at 09:39
  • How many total images are there, how many total workers (CPU cores basically), how many computers, and how long does processing a single image typically take? – John Zwinck Mar 25 '23 at 10:14
  • @JohnZwinck I don't know those details in advance, therefore I'd like to make the distribution dynamic. I do know that I've 16 cores, but I don't know how many computers it will run. The whole program took about 16 seconds for an example input file that I've (More images, larger images (up to 200^2), and more objects as well). – RedYoel Mar 25 '23 at 12:40

2 Answers2

1

Caveat: Not a total solution, but some code cleanups and refactoring in preparation for adding MPI functions. As well as some suggestions about how to proceed.


Some issues ...

  1. In findMatchesInImage, the code does a calloc and returns this. But, main dereferences this (without calling free). This is a memory leak.
  2. Better to have main pass down a pointer to the correct ImageMatches array element.
  3. There is a bunch of replicated code that obscures things a bit (e.g.) imageMatches->matches[imageMatches->numOfMatches].objID et. al. instead of using a pointer: curmatch->objID
  4. There are [many] 2D arrays of dimension NxN. However, the code calculates a linear offset in several places [up to totalSize] and then does int row = idx % object->N; int col = idx / object->N;
  5. Better to have two for loops that iterate on row and col
  6. Doing so will make the breakup of the problem for MPI easier [later on].

Restructuring for MPI:

  1. The simplest splitup might be to send all objects to all ranks. Then, we send each picture to a separate rank. (i.e.) Each rank would operate on a single picture and search for all objects within it.

  2. If we had objN objects, and rankN nodes, we could send to each worker rank a partial list of the objects. That is rank1 gets the first objN / rankN objects and rank2 gets the next chunk, etc. Then, rank0 sends each picture to all ranks, one at a time.

  3. We might further subdivide this so that a given worker rank only processes a limited number of rows of data in a picture and its objects

A lot of how many of these splitups we do will depend upon the number of pictures, number of images, etc.

The main rank may have to read in only a single picture at at time.

For example, if we have (e.g.) 5,000 pictures and 100 objects, it may make sense to only do the first method. That is, we can't send all 5,000 pictures at once.

With 5 pictures and 100 images, we could consider the other methods to get more parallelism.


To do this the most likely candidate for restructuring is findMatchInArea:

  • It returns a boolean if the score is within the threshold.
  • If we split it up (i.e.) the function only receives a smaller segment of the data), the dimensions given to it will be smaller, so it could only calculate score and not score / totalSize.
  • Better to pass back the raw value of score and have these values summed by MPI rank 0.
  • Then, we can compare the amalgamated score value against the threshold.

In the code below, I've used cpp conditionals to denote old vs. new code:

#if 0
// old code
#else
// new code
#endif

#if 1
// new code
#endif

Note: this can be cleaned up by running the file through unifdef -k


Here's the first round of cleanup:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>

#if DEBUG
#define dbgprt(_fmt...) \
    printf(_fmt)
#else
#define dbgprt(_fmt...) \
    do { } while (0)
#endif

struct dataUnit {
    int id;
    int N;
    int *data;
} typedef dataUnit;

struct dataStruct {
    double threshold;
    int numOfPic;
    int numOfObj;
    dataUnit *pictures;
    dataUnit *objects;
} typedef dataStruct;

struct match {
    int objID;
    int i;
    int j;
} typedef match;

#define MAXMATCH        3
struct ImageMatches {
#if 0
    match matches[3];
#else
    match matches[MAXMATCH];
#endif
    int numOfMatches;
    int picID;
} typedef ImageMatches;

#if 0
dataUnit
readDataUnit(FILE *fp)
#else
void
readDataUnit(FILE *fp,dataUnit *currDataUnit)
#endif
{
    char line[4000];
#if 0
    dataUnit *currDataUnit = malloc(sizeof(dataUnit));
#endif

    currDataUnit->id = (int) strtol(fgets(line, 4000, fp), NULL, 10);
    currDataUnit->N = (int) strtol(fgets(line, 4000, fp), NULL, 10);
    int totalSize = currDataUnit->N * currDataUnit->N;

    currDataUnit->data = malloc(sizeof(int) * totalSize);

    printf("readDataUnit: id=%d N=%d totalSize=%d\n",
        currDataUnit->id,currDataUnit->id,totalSize);

    for (int i = 0; i < currDataUnit->N; i++) {
        int j = 0;

        fgets(line, 4000, fp);
        char *token = strtok(line, " ");

        while (token && j < currDataUnit->N) {
            currDataUnit->data[i * currDataUnit->N + j] =
                (int) strtol(token, NULL, 10);
            token = strtok(NULL, " ");
            j++;
        }
    }

#if 0
    return *currDataUnit;
#endif
}

void
readData(char *path, dataStruct *data)
{
    FILE *fp;
    char line[4000];

    fp = fopen(path, "r");
    if (fp == NULL)
        exit(EXIT_FAILURE);

    int firstLine = 1;
    int numOfPic = 0,
        numOfObj = 0;

    while (fgets(line, 4000, fp) != NULL) {
        if (firstLine) {
            data->threshold = strtod(line, NULL);
            firstLine = 0;
        }
        else if (numOfPic == 0) {
            numOfPic = (int) strtol(line, NULL, 10);
            data->numOfPic = numOfPic;
            data->pictures = malloc(sizeof(dataUnit) * numOfPic);
            for (int i = 0; i < numOfPic; i++) {
#if 0
                data->pictures[i] = readDataUnit(fp);
#else
                readDataUnit(fp,&data->pictures[i]);
#endif
            }
        }
        else if (numOfObj == 0) {
            numOfObj = (int) strtol(line, NULL, 10);
            data->numOfObj = numOfObj;
            data->objects = malloc(sizeof(dataUnit) * numOfObj);
            for (int i = 0; i < numOfObj; i++) {
#if 0
                data->objects[i] = readDataUnit(fp);
#else
                readDataUnit(fp,&data->objects[i]);
#endif
            }
        }
    }

    fclose(fp);
}

int
findMatchInArea(dataUnit *picture, dataUnit *object, int picoff_row, int picoff_col,
    double thresh)
{
    double score = 0;
#if 0
    int totalSize = (object->N) * (object->N);
#else
    int picN = picture->N;
    int objN = object->N;
#endif

    dbgprt("findMatchInArea: ENTER picture=%d object=%d picoff_row=%d picoff_col=%d picN=%d objN=%d thresh=%g\n",
        picture->id,object->id,picoff_row,picoff_col,picN,objN,thresh);

#if ORIG
    for (int idx = 0; idx < totalSize; idx++) {
        int row = idx / object->N;
        int col = idx % object->N;

        int picVal = picture->data[(row + picoff_row) * picture->N +
            (col + picoff_col)];
        int objVal = object->data[row * object->N + col];

        score += fabs((picVal - objVal) / (double) picVal);
    }

    int ret = (score / totalSize) <= thresh ? 1 : 0;
#else
    int picVal;
    int objVal;

#if 0
    const int *picdata = picture->data;
    const int *objdata = object->data;
    for (int row = 0; row < N; row++) {
        for (int col = 0; col < N; col++) {
            picVal = picdata[(row + picoff_row) * picN + (col + picoff_col)];
            objVal = objdata[row * objN + col];
            score += fabs((picVal - objVal) / (double) picVal);
        }
    }
#else
    for (int row = 0; row < objN; row++) {
        const int *picdata = &picture->data[((picoff_row + row) * picN) +
            picoff_col];
        const int *objdata = &object->data[(row * objN) + picoff_col];
        for (int col = 0; col < objN; col++) {
            picVal = picdata[col];
            objVal = objdata[col];
            score += fabs((picVal - objVal) / (double) picVal);
        }
    }
#endif

    int ret = (score / (objN * objN)) <= thresh ? 1 : 0;
#endif

    dbgprt("findMatchInArea: EXIT ret=%d score=%g scoreN=%g\n",
        ret,score,score / (objN * objN));

    return ret;
}

#if 0
ImageMatches *
findMatchesInImage(dataUnit *picture, dataUnit *objects, int numOfObj,
    double thresh)
#else
void
findMatchesInImage(ImageMatches *imageMatches, dataUnit *picture,
    dataUnit *objects, int numOfObj,
    double thresh)
#endif
{
#if 0
    ImageMatches *imageMatches = calloc(1, sizeof(ImageMatches));
#else
    imageMatches->numOfMatches = 0;
#endif

    imageMatches->picID = picture->id;

#if 1
    int stop = 0;
#endif

    for (int i = 0; i < numOfObj; i++) {
#if 0
        int windowsLength = picture->N - objects[i].N + 1;
#else
        dataUnit *curobj = &objects[i];
        int windowsLength = picture->N - curobj->N + 1;
#endif
        int numOfWindows = windowsLength * windowsLength;

        for (int j = 0; j < numOfWindows; j++) {
#if 0
            if (findMatchInArea(picture, &(objects[i]), j / windowsLength,
                j % windowsLength, thresh)) {

                imageMatches->matches[imageMatches->numOfMatches].objID =
                    objects[i].id;
                imageMatches->matches[imageMatches->numOfMatches].i =
                    j / windowsLength;
                imageMatches->matches[imageMatches->numOfMatches].j =
                    j % windowsLength;
#else
            if (findMatchInArea(picture, curobj, j / windowsLength,
                j % windowsLength, thresh)) {
                match *matchcur =
                    &imageMatches->matches[imageMatches->numOfMatches];
                matchcur->objID = curobj->id;
                matchcur->i = j / windowsLength;
                matchcur->j = j % windowsLength;
#endif

                imageMatches->numOfMatches++;

#if 1
                stop = (imageMatches->numOfMatches == MAXMATCH);
                if (stop)
                    break;
#endif
            }
#if 1
            if (stop)
                break;
#endif
        }

#if 0
        if (imageMatches->numOfMatches == 3)
            return imageMatches;
#else
        if (stop)
            break;
#endif
    }

#if 0
    return imageMatches;
#endif
}

int
main(int argc, char *argv[])
{
    dataStruct data;

    readData(argv[1], &data);

    ImageMatches imageMatches[data.numOfPic];

    for (int i = 0; i < data.numOfPic; i++) {
#if 0
        imageMatches[i] = *findMatchesInImage(&data.pictures[i],
            data.objects, data.numOfObj, data.threshold);
#else
        ImageMatches *cur = &imageMatches[i];
        findMatchesInImage(cur,&data.pictures[i],
            data.objects, data.numOfObj, data.threshold);
        printf("main: i=%d numOfMatches=%d\n",i,cur->numOfMatches);
#endif
    }

    return 0;
}

After using unifdef -k we get:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>

#if DEBUG
#define dbgprt(_fmt...) \
    printf(_fmt)
#else
#define dbgprt(_fmt...) \
    do { } while (0)
#endif

struct dataUnit {
    int id;
    int N;
    int *data;
} typedef dataUnit;

struct dataStruct {
    double threshold;
    int numOfPic;
    int numOfObj;
    dataUnit *pictures;
    dataUnit *objects;
} typedef dataStruct;

struct match {
    int objID;
    int i;
    int j;
} typedef match;

#define MAXMATCH    3
struct ImageMatches {
#if 0
    match matches[3];
#else
    match matches[MAXMATCH];
#endif
    int numOfMatches;
    int picID;
} typedef ImageMatches;

void
readDataUnit(FILE *fp,dataUnit *currDataUnit)
{
    char line[4000];

    currDataUnit->id = (int) strtol(fgets(line, 4000, fp), NULL, 10);
    currDataUnit->N = (int) strtol(fgets(line, 4000, fp), NULL, 10);
    int totalSize = currDataUnit->N * currDataUnit->N;

    currDataUnit->data = malloc(sizeof(int) * totalSize);

    printf("readDataUnit: id=%d N=%d totalSize=%d\n",
        currDataUnit->id,currDataUnit->id,totalSize);

    for (int i = 0; i < currDataUnit->N; i++) {
        int j = 0;

        fgets(line, 4000, fp);
        char *token = strtok(line, " ");

        while (token && j < currDataUnit->N) {
            currDataUnit->data[i * currDataUnit->N + j] =
                (int) strtol(token, NULL, 10);
            token = strtok(NULL, " ");
            j++;
        }
    }

}

void
readData(char *path, dataStruct *data)
{
    FILE *fp;
    char line[4000];

    fp = fopen(path, "r");
    if (fp == NULL)
        exit(EXIT_FAILURE);

    int firstLine = 1;
    int numOfPic = 0,
        numOfObj = 0;

    while (fgets(line, 4000, fp) != NULL) {
        if (firstLine) {
            data->threshold = strtod(line, NULL);
            firstLine = 0;
        }
        else if (numOfPic == 0) {
            numOfPic = (int) strtol(line, NULL, 10);
            data->numOfPic = numOfPic;
            data->pictures = malloc(sizeof(dataUnit) * numOfPic);
            for (int i = 0; i < numOfPic; i++) {
                readDataUnit(fp,&data->pictures[i]);
            }
        }
        else if (numOfObj == 0) {
            numOfObj = (int) strtol(line, NULL, 10);
            data->numOfObj = numOfObj;
            data->objects = malloc(sizeof(dataUnit) * numOfObj);
            for (int i = 0; i < numOfObj; i++) {
                readDataUnit(fp,&data->objects[i]);
            }
        }
    }

    fclose(fp);
}

int
findMatchInArea(dataUnit *picture, dataUnit *object, int picoff_row, int picoff_col,
    double thresh)
{
    double score = 0;
    int picN = picture->N;
    int objN = object->N;

    dbgprt("findMatchInArea: ENTER picture=%d object=%d picoff_row=%d picoff_col=%d picN=%d objN=%d thresh=%g\n",
        picture->id,object->id,picoff_row,picoff_col,picN,objN,thresh);

#if ORIG
    for (int idx = 0; idx < totalSize; idx++) {
        int row = idx / object->N;
        int col = idx % object->N;

        int picVal = picture->data[(row + picoff_row) * picture->N +
            (col + picoff_col)];
        int objVal = object->data[row * object->N + col];

        score += fabs((picVal - objVal) / (double) picVal);
    }

    int ret = (score / totalSize) <= thresh ? 1 : 0;
#else
    int picVal;
    int objVal;

    for (int row = 0; row < objN; row++) {
        const int *picdata = &picture->data[((picoff_row + row) * picN) +
            picoff_col];
        const int *objdata = &object->data[(row * objN) + picoff_col];
        for (int col = 0; col < objN; col++) {
            picVal = picdata[col];
            objVal = objdata[col];
            score += fabs((picVal - objVal) / (double) picVal);
        }
    }

    int ret = (score / (objN * objN)) <= thresh ? 1 : 0;
#endif

    dbgprt("findMatchInArea: EXIT ret=%d score=%g scoreN=%g\n",
        ret,score,score / (objN * objN));

    return ret;
}

void
findMatchesInImage(ImageMatches *imageMatches, dataUnit *picture,
    dataUnit *objects, int numOfObj,
    double thresh)
{
    imageMatches->numOfMatches = 0;

    imageMatches->picID = picture->id;

    int stop = 0;

    for (int i = 0; i < numOfObj; i++) {
        dataUnit *curobj = &objects[i];
        int windowsLength = picture->N - curobj->N + 1;
        int numOfWindows = windowsLength * windowsLength;

        for (int j = 0; j < numOfWindows; j++) {
            if (findMatchInArea(picture, curobj, j / windowsLength,
                j % windowsLength, thresh)) {
                match *matchcur =
                    &imageMatches->matches[imageMatches->numOfMatches];
                matchcur->objID = curobj->id;
                matchcur->i = j / windowsLength;
                matchcur->j = j % windowsLength;

                imageMatches->numOfMatches++;

                stop = (imageMatches->numOfMatches == MAXMATCH);
                if (stop)
                    break;
            }
            if (stop)
                break;
        }

        if (stop)
            break;
    }

}

int
main(int argc, char *argv[])
{
    dataStruct data;

    readData(argv[1], &data);

    ImageMatches imageMatches[data.numOfPic];

    for (int i = 0; i < data.numOfPic; i++) {
        ImageMatches *cur = &imageMatches[i];
        findMatchesInImage(cur,&data.pictures[i],
            data.objects, data.numOfObj, data.threshold);
        printf("main: i=%d numOfMatches=%d\n",i,cur->numOfMatches);
    }

    return 0;
}

Here's the second round of cleanup:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>

#if DEBUG
#define dbgprt(_fmt...) \
    printf(_fmt)
#else
#define dbgprt(_fmt...) \
    do { } while (0)
#endif

struct dataUnit {
    int id;
    int N;
    int *data;
} typedef dataUnit;

struct dataStruct {
    double threshold;
    int numOfPic;
    int numOfObj;
    dataUnit *pictures;
    dataUnit *objects;
} typedef dataStruct;

struct match {
    int objID;
    int i;
    int j;
} typedef match;

#define MAXMATCH        3
struct ImageMatches {
    match matches[MAXMATCH];
    int numOfMatches;
    int picID;
} typedef ImageMatches;

void
readDataUnit(FILE *fp,dataUnit *currDataUnit)
{
    char line[4000];

    currDataUnit->id = (int) strtol(fgets(line, 4000, fp), NULL, 10);
    currDataUnit->N = (int) strtol(fgets(line, 4000, fp), NULL, 10);
    int totalSize = currDataUnit->N * currDataUnit->N;

    currDataUnit->data = malloc(sizeof(int) * totalSize);

    dbgprt("readDataUnit: ENTER id=%d N=%d totalSize=%d\n",
        currDataUnit->id,currDataUnit->id,totalSize);

    for (int i = 0; i < currDataUnit->N; i++) {
        int j = 0;

        fgets(line, 4000, fp);
        char *token = strtok(line, " ");

        while (token && j < currDataUnit->N) {
            currDataUnit->data[i * currDataUnit->N + j] =
                (int) strtol(token, NULL, 10);
            token = strtok(NULL, " ");
            j++;
        }
    }

    dbgprt("readDataUnit: EXIT\n");
}

void
readData(char *path, dataStruct *data)
{
    FILE *fp;
    char line[4000];

    dbgprt("readData: ENTER\n");

    fp = fopen(path, "r");
    if (fp == NULL)
        exit(EXIT_FAILURE);

    int firstLine = 1;
    int numOfPic = 0,
        numOfObj = 0;

    while (fgets(line, 4000, fp) != NULL) {
        if (firstLine) {
            data->threshold = strtod(line, NULL);
            firstLine = 0;
        }
        else if (numOfPic == 0) {
            numOfPic = (int) strtol(line, NULL, 10);
            data->numOfPic = numOfPic;
            data->pictures = malloc(sizeof(dataUnit) * numOfPic);
            for (int i = 0; i < numOfPic; i++) {
                readDataUnit(fp,&data->pictures[i]);
            }
        }
        else if (numOfObj == 0) {
            numOfObj = (int) strtol(line, NULL, 10);
            data->numOfObj = numOfObj;
            data->objects = malloc(sizeof(dataUnit) * numOfObj);
            for (int i = 0; i < numOfObj; i++) {
                readDataUnit(fp,&data->objects[i]);
            }
        }
    }

    fclose(fp);

    dbgprt("readData: EXIT threshold=%g numOfPic=%d numOfObj=%d\n",
        data->threshold,data->numOfPic,data->numOfObj);
}

double
findMatchInArea(dataUnit *picture, dataUnit *object,
    int picoff_row, int picoff_col, double thresh)
{
    int picN = picture->N;
    int objN = object->N;
    double score = 0;

    dbgprt("findMatchInArea: ENTER picture=%d object=%d picoff=[%d,%d] picN=%d objN=%d\n",
        picture->id,object->id,picoff_row,picoff_col,picN,objN);

#if ORIG
    for (int idx = 0; idx < totalSize; idx++) {
        int row = idx / object->N;
        int col = idx % object->N;

        int picVal = picture->data[(row + picoff_row) * picture->N +
            (col + picoff_col)];
        int objVal = object->data[row * object->N + col];

        score += fabs((picVal - objVal) / (double) picVal);
    }

#else
    int picVal;
    int objVal;

    for (int row = 0; row < objN; row++) {
        const int *picdata = &picture->data[((picoff_row + row) * picN) +
            picoff_col];
        const int *objdata = &object->data[(row * objN) + picoff_col];
        for (int col = 0; col < objN; col++) {
            picVal = picdata[col];
            objVal = objdata[col];
            score += fabs((picVal - objVal) / (double) picVal);
        }
    }

#endif
    double scoreN = score / (objN * objN);

    dbgprt("findMatchInArea: EXIT score=%g scoreN=%g\n",
        score,scoreN);

    return scoreN;
}

void
findMatchesInImage(ImageMatches *imageMatches, dataUnit *picture,
    dataUnit *objects, int numOfObj,
    double thresh)
{
    imageMatches->numOfMatches = 0;

    imageMatches->picID = picture->id;

    int stop = 0;

    for (int i = 0; i < numOfObj; i++) {
        dataUnit *curobj = &objects[i];

#if 0
        int windowsLength = picture->N - curobj->N + 1;
        int numOfWindows = windowsLength * windowsLength;

        for (int j = 0; j < numOfWindows; j++) {
            if (findMatchInArea(picture, curobj, j / windowsLength,
                j % windowsLength, thresh)) {
                match *matchcur =
                    &imageMatches->matches[imageMatches->numOfMatches];
                matchcur->objID = curobj->id;
                matchcur->i = j / windowsLength;
                matchcur->j = j % windowsLength;

                imageMatches->numOfMatches++;

                stop = (imageMatches->numOfMatches == MAXMATCH);
                if (stop)
                    break;
            }
            if (stop)
                break;
        }
#else
        int winN = picture->N - curobj->N + 1;

        for (int picoff_row = 0; picoff_row < winN; picoff_row++) {
            for (int picoff_col = 0; picoff_col < winN; picoff_col++) {
                double scoreN = findMatchInArea(picture, curobj, picoff_row,
                    picoff_col, thresh);
                if (scoreN > thresh)
                    continue;

                match *matchcur =
                    &imageMatches->matches[imageMatches->numOfMatches];
                matchcur->objID = curobj->id;
                matchcur->i = picoff_row;
                matchcur->j = picoff_col;

                imageMatches->numOfMatches++;

                stop = (imageMatches->numOfMatches == MAXMATCH);
                if (stop)
                    break;
            }

            if (stop)
                break;
        }

        if (stop)
            break;
    }
#endif
}

int
main(int argc, char *argv[])
{
    dataStruct data;

    readData(argv[1], &data);

#if 0
    ImageMatches imageMatches[data.numOfPic];
#else
    ImageMatches *imageMatches = malloc(sizeof(ImageMatches) * data.numOfPic);
#endif

    for (int picidx = 0; picidx < data.numOfPic; picidx++) {
        ImageMatches *matchlist = &imageMatches[picidx];

        findMatchesInImage(matchlist,&data.pictures[picidx],
            data.objects, data.numOfObj, data.threshold);

        printf("main: picidx=%d numOfMatches=%d\n",
            picidx,matchlist->numOfMatches);

        for (int matchidx = 0;  matchidx < matchlist->numOfMatches;
            ++matchidx) {
            match *matchcur = &matchlist->matches[matchidx];
            printf("main: matchidx=%d i=%d j=%d\n",
                matchidx,matchcur->i,matchcur->j);
        }
    }

    return 0;
}

After unifdef -k -UORIG:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>

#if DEBUG
#define dbgprt(_fmt...) \
    printf(_fmt)
#else
#define dbgprt(_fmt...) \
    do { } while (0)
#endif

struct dataUnit {
    int id;
    int N;
    int *data;
} typedef dataUnit;

struct dataStruct {
    double threshold;
    int numOfPic;
    int numOfObj;
    dataUnit *pictures;
    dataUnit *objects;
} typedef dataStruct;

struct match {
    int objID;
    int i;
    int j;
} typedef match;

#define MAXMATCH        3
struct ImageMatches {
    match matches[MAXMATCH];
    int numOfMatches;
    int picID;
} typedef ImageMatches;

void
readDataUnit(FILE *fp,dataUnit *currDataUnit)
{
    char line[4000];

    currDataUnit->id = (int) strtol(fgets(line, 4000, fp), NULL, 10);
    currDataUnit->N = (int) strtol(fgets(line, 4000, fp), NULL, 10);
    int totalSize = currDataUnit->N * currDataUnit->N;

    currDataUnit->data = malloc(sizeof(int) * totalSize);

    dbgprt("readDataUnit: ENTER id=%d N=%d totalSize=%d\n",
        currDataUnit->id,currDataUnit->id,totalSize);

    for (int i = 0; i < currDataUnit->N; i++) {
        int j = 0;

        fgets(line, 4000, fp);
        char *token = strtok(line, " ");

        while (token && j < currDataUnit->N) {
            currDataUnit->data[i * currDataUnit->N + j] =
                (int) strtol(token, NULL, 10);
            token = strtok(NULL, " ");
            j++;
        }
    }

    dbgprt("readDataUnit: EXIT\n");
}

void
readData(char *path, dataStruct *data)
{
    FILE *fp;
    char line[4000];

    dbgprt("readData: ENTER\n");

    fp = fopen(path, "r");
    if (fp == NULL)
        exit(EXIT_FAILURE);

    int firstLine = 1;
    int numOfPic = 0,
        numOfObj = 0;

    while (fgets(line, 4000, fp) != NULL) {
        if (firstLine) {
            data->threshold = strtod(line, NULL);
            firstLine = 0;
        }
        else if (numOfPic == 0) {
            numOfPic = (int) strtol(line, NULL, 10);
            data->numOfPic = numOfPic;
            data->pictures = malloc(sizeof(dataUnit) * numOfPic);
            for (int i = 0; i < numOfPic; i++) {
                readDataUnit(fp,&data->pictures[i]);
            }
        }
        else if (numOfObj == 0) {
            numOfObj = (int) strtol(line, NULL, 10);
            data->numOfObj = numOfObj;
            data->objects = malloc(sizeof(dataUnit) * numOfObj);
            for (int i = 0; i < numOfObj; i++) {
                readDataUnit(fp,&data->objects[i]);
            }
        }
    }

    fclose(fp);

    dbgprt("readData: EXIT threshold=%g numOfPic=%d numOfObj=%d\n",
        data->threshold,data->numOfPic,data->numOfObj);
}

double
findMatchInArea(dataUnit *picture, dataUnit *object,
    int picoff_row, int picoff_col, double thresh)
{
    int picN = picture->N;
    int objN = object->N;
    double score = 0;

    dbgprt("findMatchInArea: ENTER picture=%d object=%d picoff=[%d,%d] picN=%d objN=%d\n",
        picture->id,object->id,picoff_row,picoff_col,picN,objN);

    int picVal;
    int objVal;

    for (int row = 0; row < objN; row++) {
        const int *picdata = &picture->data[((picoff_row + row) * picN) +
            picoff_col];
        const int *objdata = &object->data[(row * objN) + picoff_col];
        for (int col = 0; col < objN; col++) {
            picVal = picdata[col];
            objVal = objdata[col];
            score += fabs((picVal - objVal) / (double) picVal);
        }
    }

    double scoreN = score / (objN * objN);

    dbgprt("findMatchInArea: EXIT score=%g scoreN=%g\n",
        score,scoreN);

    return scoreN;
}

void
findMatchesInImage(ImageMatches *imageMatches, dataUnit *picture,
    dataUnit *objects, int numOfObj,
    double thresh)
{
    imageMatches->numOfMatches = 0;

    imageMatches->picID = picture->id;

    int stop = 0;

    for (int i = 0; i < numOfObj; i++) {
        dataUnit *curobj = &objects[i];

        int winN = picture->N - curobj->N + 1;

        for (int picoff_row = 0; picoff_row < winN; picoff_row++) {
            for (int picoff_col = 0; picoff_col < winN; picoff_col++) {
                double scoreN = findMatchInArea(picture, curobj, picoff_row,
                    picoff_col, thresh);
                if (scoreN > thresh)
                    continue;

                match *matchcur =
                    &imageMatches->matches[imageMatches->numOfMatches];
                matchcur->objID = curobj->id;
                matchcur->i = picoff_row;
                matchcur->j = picoff_col;

                imageMatches->numOfMatches++;

                stop = (imageMatches->numOfMatches == MAXMATCH);
                if (stop)
                    break;
            }

            if (stop)
                break;
        }

        if (stop)
            break;
    }
}

int
main(int argc, char *argv[])
{
    dataStruct data;

    readData(argv[1], &data);

    ImageMatches *imageMatches = malloc(sizeof(ImageMatches) * data.numOfPic);

    for (int picidx = 0; picidx < data.numOfPic; picidx++) {
        ImageMatches *matchlist = &imageMatches[picidx];

        findMatchesInImage(matchlist,&data.pictures[picidx],
            data.objects, data.numOfObj, data.threshold);

        printf("main: picidx=%d numOfMatches=%d\n",
            picidx,matchlist->numOfMatches);

        for (int matchidx = 0;  matchidx < matchlist->numOfMatches;
            ++matchidx) {
            match *matchcur = &matchlist->matches[matchidx];
            printf("main: matchidx=%d i=%d j=%d\n",
                matchidx,matchcur->i,matchcur->j);
        }
    }

    return 0;
}
Craig Estey
  • 30,627
  • 4
  • 24
  • 48
1

Since you say a single image takes 16 seconds to analyze, there is no need to batch them, you can send just one image whenever a worker is ready.

Treat rank 0 as the coordinator. Its job is to load data and distribute tasks to available workers. When it starts, it sends one task to each worker (because of course they're all ready).

All other ranks are workers. They get a task, call findMatchesInImage(picture, objects), and send the result back to the coordinator, who then knows that worker is ready and sends the next task.

For each task, the coordinator needs to send:

struct dataUnit{
  int id;
  int N;
  int *data;
} typedef dataUnit;

double threshold;
int numOfObj;
dataUnit picture;
dataUnit *objects;

Serialize all of that into an array of bytes which looks roughly like this:

[thresh][numOfObj][picId][picN][picData][objId0][objN0][objData0]...

For best performance you can use MPI_ISend() so the coordinator doesn't have to wait for a worker to receive a task. If you go this route, the serialized data needs to remain accessible until the worker receives it, but you don't need to check the receive status if you keep the data alive in the coordinator until the worker sends back the result.

Whenever the coordinator is idle, it can serialize the data for the next task, so it's ready to go when it receives a result from any worker.

John Zwinck
  • 239,568
  • 38
  • 324
  • 436
  • Thank you, I think I'll go this way - but what about the answer from `findMatchesInImage` ? it will require another serialization, right? – RedYoel Mar 26 '23 at 16:21
  • Yes each answer (result) will have to be serialized. But that's easy because `ImageMatches` is a fixed-size struct that just contains some integers. – John Zwinck Mar 27 '23 at 18:34