2
// Initialize variables, ZALLOC allocates and fills with zeroes
ZALLOC(z0q, numqbins, double) ;  
ZALLOC(z1q, numqbins, double) ;  
ZALLOC(z2q, numqbins, double) ;  

z_in = (double*)fftw_malloc(sizeof(double) * numqbins * 2) ;
z_out = (fftw_complex*)fftw_malloc(sizeof(fftw_complex) * numqbins * 2) ;

r2c_plan = fftw_plan_dft_r2c_1d(2 * numqbins, z_in, z_out, FFTW_ESTIMATE) ;
c2r_plan = fftw_plan_dft_c2r_1d(2 * numqbins, z_out, z_in, FFTW_ESTIMATE) ;


... (ddbins is initialized to all 0s, z0q, z1q, and z2q are filled with values) ...


for (k1=0; k1<numx; ++k1) { 
    if (snpmarkers[xindex[k1]] -> ignore) {
        continue ;
    } else {
        s = snpmarkers[xindex[k1]] -> tagnumber ;
    }
    y = res[k1] * wt[k1] ; 

    z0q[s] += 1 ; 
    z1q[s] += y ; 
    z2q[s] += y*y ; 
  }
// Perform convolution.
fft_ddadd(ddbins, z0q, z1q, z2q, numqbins, diffmax, z_in, z_out, r2c_plan, c2r_plan) ;

void fft_ddadd(double **ddbins, double *z0q, double *z1q, double *z2q, int numqbins, 
            int diffmax, double *z_in, fftw_complex *z_out, fftw_plan r2c_plan, fftw_plan c2r_plan) 
{
    double *dd00, *dd01, *dd10, *dd11, *dd02, *dd20 ; 
    int i;
    fftw_complex *ft_z0q, *ft_z1q, *ft_z2q ;    

    dd00 = ddbins[0] ;
    dd01 = ddbins[1] ; 
    dd10 = ddbins[3] ; 
    dd11 = ddbins[4] ; 
    dd02 = ddbins[2] ; 
    dd20 = ddbins[6] ;     

    ft_z0q = (fftw_complex*)fftw_malloc(sizeof(fftw_complex) * numqbins * 2) ;
    ft_z1q = (fftw_complex*)fftw_malloc(sizeof(fftw_complex) * numqbins * 2) ;
    ft_z2q = (fftw_complex*)fftw_malloc(sizeof(fftw_complex) * numqbins * 2) ;    


    // Copy arrays into z_in and transform, copy complex results out into ft_z's
    for (i = 0; i < numqbins; ++i) {
        z_in[i] = z0q[i] ;
        z_in[i + numqbins] = 0 ;
    }    

    fftw_execute(r2c_plan) ;    

    for (i = 0; i < numqbins; ++i) {
        z_in[i] = z1q[i] ;
        ft_z0q[i][0] = z_out[i][0] ;
        ft_z0q[i][1] = z_out[i][1] ;
    }    

    fftw_execute(r2c_plan) ;    

    for (i = 0; i < numqbins; ++i) {
        z_in[i] = z2q[i] ;
        ft_z1q[i][0] = z_out[i][0] ;
        ft_z1q[i][1] = z_out[i][1] ;
    }    

    fftw_execute(r2c_plan) ;    

    for (i = 0; i < numqbins; ++i) {
        ft_z2q[i][0] = z_out[i][0] ;
        ft_z2q[i][1] = z_out[i][1] ;
    }    

    // Do correlations
    fft_singledd(dd00, ft_z0q, ft_z0q, numqbins, diffmax, z_in, z_out, c2r_plan);
    fft_singledd(dd01, ft_z0q, ft_z1q, numqbins, diffmax, z_in, z_out, c2r_plan);
    fft_singledd(dd10, ft_z1q, ft_z0q, numqbins, diffmax, z_in, z_out, c2r_plan);
    fft_singledd(dd11, ft_z1q, ft_z1q, numqbins, diffmax, z_in, z_out, c2r_plan);
    fft_singledd(dd02, ft_z0q, ft_z2q, numqbins, diffmax, z_in, z_out, c2r_plan);
    fft_singledd(dd20, ft_z2q, ft_z0q, numqbins, diffmax, z_in, z_out, c2r_plan);    


}    

void fft_singledd(double* ddbin, fftw_complex* ftz_0, fftw_complex* ftz_1, int numqbins, 
            int diffmax, double *z_in, fftw_complex *z_out, fftw_plan c2r_plan) {

    int i ;
    // Correlate ftz_0 and ftz_1 together and copy into z_out
    for (i = 0; i < numqbins * 2; ++i) {
        z_out[i][0] =   ftz_0[i][0] * ftz_1[i][0] + ftz_0[i][1] * ftz_1[i][1] ;
        z_out[i][1] = - ftz_0[i][0] * ftz_1[i][1] + ftz_0[i][1] * ftz_1[i][0] ;
    }    
    // Transform back
    fftw_execute(c2r_plan) ;    
    // Copy the desired results out
    for (i = 1; i < diffmax; ++i) {
        ddbin[i] += (z_in[2 * numqbins - i] / (2 * numqbins)) ;
    }    
}

void ddadd(double **ddbins, double *z0q, double *z1q, double *z2q, int numqbins, int diffmax) {
    double *dd00, *dd01, *dd10, *dd11, *dd02, *dd20 ; 
    double a0, a1, a2, b0, b1, b2 ; 
    int i, s, t, tmax, d ;    

    dd00 = ddbins[0] ;
    dd01 = ddbins[1] ; 
    dd10 = ddbins[3] ; 
    dd11 = ddbins[4] ; 
    dd02 = ddbins[2] ; 
    dd20 = ddbins[6] ;     

    for (s=0; s<numqbins; ++s) { 
        a0 = z0q[s] ; 
        a1 = z1q[s] ;
        a2 = z2q[s] ;
        tmax = MIN(numqbins-1, s + diffmax) ; 
        for (t=s+1; t<=tmax; ++t) { 
            b0 = z0q[t] ;
            b1 = z1q[t] ;
            b2 = z2q[t] ;
            d = t - s ; 
            dd00[d] += a0*b0 ;
            dd01[d] += a0*b1 ;
            dd10[d] += a1*b0 ;
            dd11[d] += a1*b1 ;
            dd02[d] += a0*b2 ;
            dd20[d] += a2*b0 ;
        }
    }
}

When running the above code, I encounter a problem in the results. When comparing the outputs into ddbins from ddadd (non Fourier transform, computed by an O(n^2) correlation) against the output from fft_ddadd, the errors are nearly constant and alternating:

ddbins[0] - fft_ddbins[0] = [0.000000000000, -0.016764512518, 0.016764511936, -0.016764512286, 0.016764512053, -0.016764512286, 0.016764512053, -0.016764512169, 0.016764511995, -0.016764512227, 0.016764512053, -0.016764512344, 0.016764512053, -0.016764512227, 0.016764511995, -0.016764512227, 0.016764512053, -0.016764512344, 0.016764512053, -0.016764512227, 0.016764512053, -0.016764512344, 0.016764511995, -0.016764512227, 0.016764512053, -0.016764512227, 0.016764512053, -0.016764512344 ...]

Which, as you can see, is roughly the same error in every place, with alternating sign, plus or minus some error attributable to machine precision.

A possible issue which has been raised is that there is a Fourier transform component which is, in real space, corresponds to the array [+1, -1, +1, -1 ...], which would, if excluded, explain the alternating error. However, I do not see how this element could be excluded. The arrays z*q are copied in fully (length numqbins) into the plans and executed, while the output arrays (length 2*numqbins) are also copied directly, and the error is introduced before the outputs are copied into their desired location.

This is my first time asking a question on this site, so please tell me if more information (or less!) is required, or if this isn't quite the right place to ask. Thanks in advance.

N. A.
  • 121
  • 1
  • 1
    On the one hand, the Discrete Fourier Transform handles the frame as a period of a periodic signal. Indeed, it ensures that its Fourier frequencies are also discrete and equally spaced in the frequency space, and periodic (+2k*pi => equal coefficient). https://en.wikipedia.org/wiki/Discrete_Fourier_transform On the other hand, the O(n2) correlation seems to correspond to cross correlation of function featuring a compact support https://en.wikipedia.org/wiki/Cross-correlation. The results can be different! – francis Jul 04 '18 at 02:17

0 Answers0