How to calculate the number of operations for the opencv Gaussian filter function?

Question

By checking the source code of opencv, there are sub-functions and some sub-functions further in sub-functions, generally how can we get to know how many operations in total of does this function need and how many times of memory read and write we need in the function?

Trying calculating by hand looks quite a lot of effort, is somebody trying to count this by hand or any other smart way to know it ?

cv::Ptr<cv::FilterEngine> cv::createGaussianFilter( int type, Size ksize,
                                    double sigma1, double sigma2,
                                    int borderType )
{
    Mat kx, ky;
    createGaussianKernels(kx, ky, type, ksize, sigma1, sigma2);

    return createSeparableLinearFilter( type, type, kx, ky, Point(-1,-1), 0, borderType );
}


static void createGaussianKernels( Mat & kx, Mat & ky, int type, Size ksize,
                               double sigma1, double sigma2 )
{
int depth = CV_MAT_DEPTH(type);
if( sigma2 <= 0 )
    sigma2 = sigma1;

// automatic detection of kernel size from sigma
if( ksize.width <= 0 && sigma1 > 0 )
    ksize.width = cvRound(sigma1*(depth == CV_8U ? 3 : 4)*2 + 1)|1;
if( ksize.height <= 0 && sigma2 > 0 )
    ksize.height = cvRound(sigma2*(depth == CV_8U ? 3 : 4)*2 + 1)|1;

CV_Assert( ksize.width > 0 && ksize.width % 2 == 1 &&
    ksize.height > 0 && ksize.height % 2 == 1 );

sigma1 = std::max( sigma1, 0. );
sigma2 = std::max( sigma2, 0. );

kx = getGaussianKernel( ksize.width, sigma1, std::max(depth, CV_32F) );
if( ksize.height == ksize.width && std::abs(sigma1 - sigma2) < DBL_EPSILON )
    ky = kx;
else
    ky = getGaussianKernel( ksize.height, sigma2, std::max(depth, CV_32F) );
}


cv::Mat cv::getGaussianKernel( int n, double sigma, int ktype )  
{
const int SMALL_GAUSSIAN_SIZE = 7;
static const float small_gaussian_tab[][SMALL_GAUSSIAN_SIZE] =
{
    {1.f},
    {0.25f, 0.5f, 0.25f},
    {0.0625f, 0.25f, 0.375f, 0.25f, 0.0625f},
    {0.03125f, 0.109375f, 0.21875f, 0.28125f, 0.21875f, 0.109375f, 0.03125f}
};

const float* fixed_kernel = n % 2 == 1 && n <= SMALL_GAUSSIAN_SIZE && sigma <= 0 ?
    small_gaussian_tab[n>>1] : 0;

CV_Assert( ktype == CV_32F || ktype == CV_64F );
Mat kernel(n, 1, ktype);
float* cf = kernel.ptr<float>();
double* cd = kernel.ptr<double>();

double sigmaX = sigma > 0 ? sigma : ((n-1)*0.5 - 1)*0.3 + 0.8;
double scale2X = -0.5/(sigmaX*sigmaX);
double sum = 0;

int i;
for( i = 0; i < n; i++ )
{
    double x = i - (n-1)*0.5;
    double t = fixed_kernel ? (double)fixed_kernel[i] : std::exp(scale2X*x*x);
    if( ktype == CV_32F )
    {
        cf[i] = (float)t;
        sum += cf[i];
    }
    else
    {
        cd[i] = t;
        sum += cd[i];
    }
}

sum = 1./sum;
for( i = 0; i < n; i++ )
{
    if( ktype == CV_32F )
        cf[i] = (float)(cf[i]*sum);
    else
        cd[i] *= sum;
}

return kernel;
}

cv::Ptr<cv::FilterEngine> cv::createSeparableLinearFilter(
int _srcType, int _dstType,
InputArray __rowKernel, InputArray __columnKernel,
Point _anchor, double _delta,
int _rowBorderType, int _columnBorderType,
const Scalar& _borderValue )
{
Mat _rowKernel = __rowKernel.getMat(), _columnKernel = __columnKernel.getMat();
_srcType = CV_MAT_TYPE(_srcType);
_dstType = CV_MAT_TYPE(_dstType);
int sdepth = CV_MAT_DEPTH(_srcType), ddepth = CV_MAT_DEPTH(_dstType);
int cn = CV_MAT_CN(_srcType);
CV_Assert( cn == CV_MAT_CN(_dstType) );
int rsize = _rowKernel.rows + _rowKernel.cols - 1;
int csize = _columnKernel.rows + _columnKernel.cols - 1;
if( _anchor.x < 0 )
    _anchor.x = rsize/2;
if( _anchor.y < 0 )
    _anchor.y = csize/2;
int rtype = getKernelType(_rowKernel,
    _rowKernel.rows == 1 ? Point(_anchor.x, 0) : Point(0, _anchor.x));
int ctype = getKernelType(_columnKernel,
    _columnKernel.rows == 1 ? Point(_anchor.y, 0) : Point(0, _anchor.y));
Mat rowKernel, columnKernel;

int bdepth = std::max(CV_32F,std::max(sdepth, ddepth));
int bits = 0;

if( sdepth == CV_8U &&
    ((rtype == KERNEL_SMOOTH+KERNEL_SYMMETRICAL &&
      ctype == KERNEL_SMOOTH+KERNEL_SYMMETRICAL &&
      ddepth == CV_8U) ||
     ((rtype & (KERNEL_SYMMETRICAL+KERNEL_ASYMMETRICAL)) &&
      (ctype & (KERNEL_SYMMETRICAL+KERNEL_ASYMMETRICAL)) &&
      (rtype & ctype & KERNEL_INTEGER) &&
      ddepth == CV_16S)) )
{
    bdepth = CV_32S;
    bits = ddepth == CV_8U ? 8 : 0;
    _rowKernel.convertTo( rowKernel, CV_32S, 1 << bits );
    _columnKernel.convertTo( columnKernel, CV_32S, 1 << bits );
    bits *= 2;
    _delta *= (1 << bits);
}
else
{
    if( _rowKernel.type() != bdepth )
        _rowKernel.convertTo( rowKernel, bdepth );
    else
        rowKernel = _rowKernel;
    if( _columnKernel.type() != bdepth )
        _columnKernel.convertTo( columnKernel, bdepth );
    else
        columnKernel = _columnKernel;
}
int _bufType = CV_MAKETYPE(bdepth, cn);
Ptr<BaseRowFilter> _rowFilter = getLinearRowFilter(
    _srcType, _bufType, rowKernel, _anchor.x, rtype);
Ptr<BaseColumnFilter> _columnFilter = getLinearColumnFilter(
    _bufType, _dstType, columnKernel, _anchor.y, ctype, _delta, bits );

return Ptr<FilterEngine>( new FilterEngine(Ptr<BaseFilter>(), _rowFilter, _columnFilter,
    _srcType, _dstType, _bufType, _rowBorderType, _columnBorderType, _borderValue ));
}


cv::Ptr<cv::BaseColumnFilter> cv::getLinearColumnFilter( int bufType, int dstType,
                                         InputArray _kernel, int anchor,
                                         int symmetryType, double delta,
                                         int bits )
{
Mat kernel = _kernel.getMat();
int sdepth = CV_MAT_DEPTH(bufType), ddepth = CV_MAT_DEPTH(dstType);
int cn = CV_MAT_CN(dstType);
CV_Assert( cn == CV_MAT_CN(bufType) &&
    sdepth >= std::max(ddepth, CV_32S) &&
    kernel.type() == sdepth );

if( !(symmetryType & (KERNEL_SYMMETRICAL|KERNEL_ASYMMETRICAL)) )
{
    if( ddepth == CV_8U && sdepth == CV_32S )
        return makePtr<ColumnFilter<FixedPtCastEx<int, uchar>, ColumnNoVec> >
        (kernel, anchor, delta, FixedPtCastEx<int, uchar>(bits));
    if( ddepth == CV_8U && sdepth == CV_32F )
        return makePtr<ColumnFilter<Cast<float, uchar>, ColumnNoVec> >(kernel, anchor, delta);
    if( ddepth == CV_8U && sdepth == CV_64F )
        return makePtr<ColumnFilter<Cast<double, uchar>, ColumnNoVec> >(kernel, anchor, delta);
    if( ddepth == CV_16U && sdepth == CV_32F )
        return makePtr<ColumnFilter<Cast<float, ushort>, ColumnNoVec> >(kernel, anchor, delta);
    if( ddepth == CV_16U && sdepth == CV_64F )
        return makePtr<ColumnFilter<Cast<double, ushort>, ColumnNoVec> >(kernel, anchor, delta);
    if( ddepth == CV_16S && sdepth == CV_32F )
        return makePtr<ColumnFilter<Cast<float, short>, ColumnNoVec> >(kernel, anchor, delta);
    if( ddepth == CV_16S && sdepth == CV_64F )
        return makePtr<ColumnFilter<Cast<double, short>, ColumnNoVec> >(kernel, anchor, delta);
    if( ddepth == CV_32F && sdepth == CV_32F )
        return makePtr<ColumnFilter<Cast<float, float>, ColumnNoVec> >(kernel, anchor, delta);
    if( ddepth == CV_64F && sdepth == CV_64F )
        return makePtr<ColumnFilter<Cast<double, double>, ColumnNoVec> >(kernel, anchor, delta);
}
else
{
    int ksize = kernel.rows + kernel.cols - 1;
    if( ksize == 3 )
    {
        if( ddepth == CV_8U && sdepth == CV_32S )
            return makePtr<SymmColumnSmallFilter<
                FixedPtCastEx<int, uchar>, SymmColumnVec_32s8u> >
                (kernel, anchor, delta, symmetryType, FixedPtCastEx<int, uchar>(bits),
                SymmColumnVec_32s8u(kernel, symmetryType, bits, delta));
        if( ddepth == CV_16S && sdepth == CV_32S && bits == 0 )
            return makePtr<SymmColumnSmallFilter<Cast<int, short>,
                SymmColumnSmallVec_32s16s> >(kernel, anchor, delta, symmetryType,
                    Cast<int, short>(), SymmColumnSmallVec_32s16s(kernel, symmetryType, bits, delta));
        if( ddepth == CV_32F && sdepth == CV_32F )
            return makePtr<SymmColumnSmallFilter<
                Cast<float, float>,SymmColumnSmallVec_32f> >
                (kernel, anchor, delta, symmetryType, Cast<float, float>(),
                SymmColumnSmallVec_32f(kernel, symmetryType, 0, delta));
    }
    if( ddepth == CV_8U && sdepth == CV_32S )
        return makePtr<SymmColumnFilter<FixedPtCastEx<int, uchar>, SymmColumnVec_32s8u> >
            (kernel, anchor, delta, symmetryType, FixedPtCastEx<int, uchar>(bits),
            SymmColumnVec_32s8u(kernel, symmetryType, bits, delta));
    if( ddepth == CV_8U && sdepth == CV_32F )
        return makePtr<SymmColumnFilter<Cast<float, uchar>, ColumnNoVec> >
            (kernel, anchor, delta, symmetryType);
    if( ddepth == CV_8U && sdepth == CV_64F )
        return makePtr<SymmColumnFilter<Cast<double, uchar>, ColumnNoVec> >
            (kernel, anchor, delta, symmetryType);
    if( ddepth == CV_16U && sdepth == CV_32F )
        return makePtr<SymmColumnFilter<Cast<float, ushort>, ColumnNoVec> >
            (kernel, anchor, delta, symmetryType);
    if( ddepth == CV_16U && sdepth == CV_64F )
        return makePtr<SymmColumnFilter<Cast<double, ushort>, ColumnNoVec> >
            (kernel, anchor, delta, symmetryType);
    if( ddepth == CV_16S && sdepth == CV_32S )
        return makePtr<SymmColumnFilter<Cast<int, short>, ColumnNoVec> >
            (kernel, anchor, delta, symmetryType);
    if( ddepth == CV_16S && sdepth == CV_32F )
        return makePtr<SymmColumnFilter<Cast<float, short>, SymmColumnVec_32f16s> >
             (kernel, anchor, delta, symmetryType, Cast<float, short>(),
              SymmColumnVec_32f16s(kernel, symmetryType, 0, delta));
    if( ddepth == CV_16S && sdepth == CV_64F )
        return makePtr<SymmColumnFilter<Cast<double, short>, ColumnNoVec> >
            (kernel, anchor, delta, symmetryType);
    if( ddepth == CV_32F && sdepth == CV_32F )
        return makePtr<SymmColumnFilter<Cast<float, float>, SymmColumnVec_32f> >
            (kernel, anchor, delta, symmetryType, Cast<float, float>(),
            SymmColumnVec_32f(kernel, symmetryType, 0, delta));
    if( ddepth == CV_64F && sdepth == CV_64F )
        return makePtr<SymmColumnFilter<Cast<double, double>, ColumnNoVec> >
            (kernel, anchor, delta, symmetryType);
}

CV_Error_( CV_StsNotImplemented,
    ("Unsupported combination of buffer format (=%d), and destination format (=%d)",
    bufType, dstType));

return Ptr<BaseColumnFilter>();
}

If you just want the number of times a function is called, use a [profiler](http://en.wikipedia.org/wiki/Profiling_%28computer_programming%29). If you want the algorithm complexity, it is a [convolution](http://en.wikipedia.org/wiki/Convolution) of a 2D signal, although for for this particular kernel the straightforward convolution can be optimized using FFT — remi, Sep 12 '14 at 09:41

score 0 · Answer 1 · answered Sep 12 '14 at 11:27

Of course, you could disassemble the program, that will give you an instruction count and you can also easily count reads and writes with e.g. grep. I suggest using a cross-compiler to disassemble into some nice RISC assembler like MIPS - reading arcane, bloated X86 assembler is not fun.

How to calculate the number of operations for the opencv Gaussian filter function?

1 Answers1