0

We have a fast camera (>= 700 fps), we need to calculate FFT on images and do some action. However, the FFT works for some frames and then crashes. The problem doesn't appear when I use slow frame rate like 10 fps.

To further isolate the problem, I wrote a test program to do FFT in a loop, and it also crashes. Please let me know what am I doing wrong.

UPDATE: I compiled with -g and run it with lldb, I still didn't see valid stacktrace.

➜  cmake-build-debug lldb IntelTest
(lldb) target create "IntelTest"
Current executable set to '/Users/harshmathur/CourseworkRepo/IntelTest/cmake-build-debug/IntelTest' (x86_64).
(lldb) run
Process 13947 launched: '/Users/harshmathur/CourseworkRepo/IntelTest/cmake-build-debug/IntelTest' (x86_64)
DftiGetValue DFTI_PACKED_FORMAT : 57
Process 13947 stopped
* thread #1, queue = 'com.apple.main-thread', stop reason = EXC_BAD_ACCESS (code=1, address=0x111844100)
    frame #0: 0x0000000110568a4c libmkl_avx2.2.dylib`mkl_dft_avx2_ipps_cFFTfwd_64_64fc + 2428
libmkl_avx2.2.dylib`mkl_dft_avx2_ipps_cFFTfwd_64_64fc:
->  0x110568a4c <+2428>: vmovupd %ymm2, (%rsi)
    0x110568a50 <+2432>: vmovupd %ymm7, 0x200(%rsi)
    0x110568a58 <+2440>: vaddpd %ymm13, %ymm6, %ymm2
    0x110568a5d <+2445>: vsubpd %ymm13, %ymm6, %ymm6

UPDATE: Also, If NN is set to 32, The below code works without any error or aborts, It is only when NN > 32, it abruptly stops.

The code follows.

#include <iostream>
#include <mkl.h>
#define NN 128
#define NPIXFFT NN * (1 + NN / 2)
using namespace std;

typedef struct {
    double re;
    double im;
} mkl_double_complex;


int getFFTWPlans(DFTI_DESCRIPTOR_HANDLE *descHandle);

int main() {

    int i;
    MKL_LONG status;
    DFTI_DESCRIPTOR_HANDLE descHandle;
    getFFTWPlans(&descHandle);
    double *image = (double*)malloc(sizeof(double) * NN * NN);
    double *recoveredImage = (double*)malloc(sizeof(double) * NN * NN);
    mkl_double_complex *imageFT = (mkl_double_complex*) mkl_malloc(
            NPIXFFT * sizeof(mkl_double_complex),64);

    for (i=0; i < NN * NN; i++) {
        image[i] = i * 2 + 1;
    }

    while (1) {
        status = DftiComputeForward(descHandle, image, imageFT);
        if (status != 0) {
            cout <<"DftiComputeForward Failed: " << status << endl;
            break;
        }
        status = DftiComputeBackward(descHandle, imageFT, recoveredImage);
        if (status != 0) {
            cout <<"DftiComputeBackward Failed: " << status << endl;
            break;
        }
    }
    return 0;
}

int getFFTWPlans(DFTI_DESCRIPTOR_HANDLE *descHandle){

    MKL_LONG lengths[2];
    lengths[0] = NN;
    lengths[1] = NN;
    MKL_LONG status = DftiCreateDescriptor(descHandle, DFTI_DOUBLE, DFTI_REAL, 2, lengths);

    if (status != 0) {
        cout << "DftiCreateDescriptor failed : " << status << endl;
        return -1;
    }
    status = DftiSetValue(*descHandle, DFTI_PLACEMENT, DFTI_NOT_INPLACE);
    if (status != 0) {
        cout << "DftiSetValue DFTI_PLACEMENT failed : " << status << endl;
        return -2;
    }

    status = DftiSetValue(*descHandle, DFTI_THREAD_LIMIT, 1);
    if (status != 0) {
        cout << "DftiSetValue DFTI_THREAD_LIMIT failed : " << status << endl;
        return -2;
    }

    MKL_LONG format;
    status = DftiGetValue(*descHandle, DFTI_PACKED_FORMAT, &format);
    if (status != 0) {
        cout << "DftiGetValue DFTI_PACKED_FORMAT failed : " << status << endl;
        return -3;
    }
    cout << "DftiGetValue DFTI_PACKED_FORMAT : " << format << endl;

    status = DftiCommitDescriptor(*descHandle);
    if (status != 0) {
        cout << "DftiCommitDescriptor failed : " << status << endl;
        return -4;
    }

    return status;
}
user16217248
  • 3,119
  • 19
  • 19
  • 37
Harsh M
  • 625
  • 2
  • 11
  • 25
  • Crashes how, with what error message? When you run it under GDB, where does it crash, like which instruction? Have you used `valgrind` or similar to look for bugs like writing outside of objects, perhaps overwriting some data structures. – Peter Cordes Apr 20 '23 at 13:31
  • @PeterCordes For few iterations the code works, after that no error message, just programs stops. I do not get any error mesage, that is my main problem. In my original code and this code If I comment ComputeForward/Backward function, It works, otherwise after few iterations, say 80-90, the program just stops abruptly – Harsh M Apr 20 '23 at 14:48
  • So like I said, use a debugger to see where it stops. Or `strace` or `ltrace` to see how it exits, or maybe set breakpoints on the libc `exit` function and similar, so your debugger can get control to see a backtrace before actually exiting. – Peter Cordes Apr 20 '23 at 14:54
  • @PeterCordes I am new to intel compilers and the backtrace function doesn’t exist for glibc in windows to add as a signal to sigsegv like we do in gcc. I also tried adding -traceback while compiling with icx, however it still didnt print stacktrace. Can you provide compiler option or some way to get traceback? – Harsh M Apr 20 '23 at 15:03
  • A debugger like GDB or the one in Visual Studio can show you the call stack when you're stopped at a breakpoint or exception, without any special instrumentation or compiler options beyond enabling debug symbols. Like `icx -O3 -march=native -g`. In GDB, the debugger command is `bt` or `backtrace`. – Peter Cordes Apr 20 '23 at 15:16
  • @PeterCordes If I change the NN to 32, the code works, It is failing for values of NN > 32. I tried -g also and did it with lldb in my mac, but I get nonsensical output (updated in the post). – Harsh M Apr 20 '23 at 17:20
  • 2
    You’re probably writing out of bounds, random crashes are often caused by that. – Cris Luengo Apr 20 '23 at 17:55
  • *I still didn't see valid stacktrace.* - You didn't ask LLDB to print a stacktrace; it only showed what it printed by default on a fault. (Which is already quite useful, showing that `mkl_dft_avx2_ipps_cFFTfwd_64_64fc` is using a bad pointer, `0x111844100`, to store 32 bytes of double-precision FP data. `0x111844100` isn't the start of a new page, but it's close to the start of a page, so probably off the end of an array). https://lldb.llvm.org/use/map.html shows some commonly-useful commands, like `thread backtrace` or `bt` to ask it to print a backtrace. – Peter Cordes Apr 21 '23 at 06:36
  • 1
    Debug your code, and check you allocation sizes. – Peter Cordes Apr 21 '23 at 06:37
  • @PeterCordes Thank you for the suggestion. Turns out the problem was the image 2D and is stored in 1D array in row major order. Since I am creating a 2D FFT descriptor, The Input and Output strides must be given. I will put my working code as a solution. Thanks again :) – Harsh M Apr 22 '23 at 03:29

1 Answers1

0

Turns out the problem was the image is 2D and is stored in 1D array in row major order. Since I am creating a 2D FFT descriptor, The Input and Output strides must be given.

Here is the Working code:

#include <iostream>
#include <fstream>
#include <mkl.h>
#include <complex>
#include <chrono>
#define NN 256
#define NPIXFFT NN * (1 + (NN / 2))
using namespace std;

typedef struct {
    double re;
    double im;
} mkl_double_complex;


int getFFTWPlans(DFTI_DESCRIPTOR_HANDLE *descHandle);
int printFFT(mkl_double_complex *imageFT);


int main() {

    chrono::high_resolution_clock::time_point t0, t1;
    chrono::duration<double> dt;

    int i, flag=0;
    MKL_LONG status, count;
    DFTI_DESCRIPTOR_HANDLE descHandle;
    getFFTWPlans(&descHandle);

    double *image = (double*)malloc(sizeof(double) * NN * NN);
    double *recoveredImage = (double*)malloc(sizeof(double) * NN * NN);
    mkl_double_complex *imageFT = (mkl_double_complex*) mkl_malloc(
            NPIXFFT * sizeof(mkl_double_complex),64);

//    ifstream currentImage("F:\\tiptilt\\20230421_121442\\CurrentImage_286.dat",  ios::binary);

    double TEMP;

    for (int i=0; i<NN * NN; i++){
//        currentImage.read((char*) &TEMP, sizeof(double));
        image[i] = i * i + i * 2 + 1;
    }

    double x;
    count = 0;
    while (1) {
        if (count == 0) {
            t0 = chrono::high_resolution_clock::now();
        }
        status = DftiComputeForward(descHandle, image, imageFT);
        if (status != 0) {
            cout <<"DftiComputeForward Failed: " << status << endl;
            break;
        }
//        if (flag == 0) {
//            printFFT(imageFT);
//            flag = 1;
//        }
        for (i=0; i<NPIXFFT; i++){
            x = imageFT[i].re;
        }
        status = DftiComputeBackward(descHandle, imageFT, recoveredImage);
        if (status != 0) {
            cout <<"DftiComputeBackward Failed: " << status << endl;
            break;
        }
        count += 1;
        if (count == 3000) {
            t1 = chrono::high_resolution_clock::now();
            dt = chrono::duration_cast<chrono::duration<double>>(t1 - t0);
            cout<< "Time (ms): "<<dt.count() * 1000 / (count - 1) <<"\r";
            cout.flush();
        }
    }
    return 0;
}

int getFFTWPlans(DFTI_DESCRIPTOR_HANDLE *descHandle){

    MKL_LONG lengths[2];
    lengths[0] = NN;
    lengths[1] = NN;
    MKL_LONG status = DftiCreateDescriptor(descHandle, DFTI_DOUBLE, DFTI_REAL, 2, lengths);

    if (status != 0) {
        cout << "DftiCreateDescriptor failed : " << status << endl;
        return -1;
    }
    status = DftiSetValue(*descHandle, DFTI_PLACEMENT, DFTI_NOT_INPLACE);
    if (status != 0) {
        cout << "DftiSetValue DFTI_PLACEMENT failed : " << status << endl;
        return -2;
    }

    status = DftiSetValue(*descHandle, DFTI_THREAD_LIMIT, 1);
    if (status != 0) {
        cout << "DftiSetValue DFTI_THREAD_LIMIT failed : " << status << endl;
        return -3;
    }

    status = DftiSetValue(*descHandle, DFTI_CONJUGATE_EVEN_STORAGE, DFTI_COMPLEX_COMPLEX);
    if (status != 0) {
        cout << "DftiSetValue DFTI_CONJUGATE_EVEN_STORAGE failed : " << status << endl;
        return -4;
    }

    status = DftiSetValue(*descHandle, DFTI_PACKED_FORMAT, DFTI_CCE_FORMAT);
    if (status != 0) {
        cout << "DftiSetValue DFTI_PACKED_FORMAT failed : " << status << endl;
        return -5;
    }

    MKL_LONG strides[3];
    strides[0] = 0;
    strides[1] = 1;
    strides[2] = NN;

    status = DftiSetValue(*descHandle, DFTI_INPUT_STRIDES, strides);
    if (status != 0) {
        cout << "DftiSetValue DFTI_INPUT_STRIDES failed : " << status << endl;
        return -6;
    }

    status = DftiSetValue(*descHandle, DFTI_OUTPUT_STRIDES, strides);
    if (status != 0) {
        cout << "DftiSetValue DFTI_OUTPUT_STRIDES failed : " << status << endl;
        return -7;
    }

    MKL_LONG format;
    status = DftiGetValue(*descHandle, DFTI_PACKED_FORMAT, &format);
    if (status != 0) {
        cout << "DftiGetValue DFTI_PACKED_FORMAT failed : " << status << endl;
        return -8;
    }
    cout << "DftiGetValue DFTI_PACKED_FORMAT : " << format << endl;

    status = DftiCommitDescriptor(*descHandle);
    if (status != 0) {
        cout << "DftiCommitDescriptor failed : " << status << endl;
        return -9;
    }

    return status;
}

int printFFT(mkl_double_complex *imageFT) {
    int i;

    for (i=0;i<NPIXFFT;i++) {
        cout <<i << "  " << imageFT[i].re << "  "<<imageFT[i].im<<endl;
    }

    return 0;
}
Harsh M
  • 625
  • 2
  • 11
  • 25