Tried the same by measuring cpu time.
int main()
{
clock_t startTime;
clock_t endTime;
int height =1024;
int width =1024;
// 700 ms
cv::Mat in(height,width,CV_8UC1, cv::Scalar(255));
std::cout << "value: " << (int)in.at<unsigned char>(0,0) << std::endl;
cv::Mat out(height,width,CV_8UC1);
startTime = clock();
out = in/4;
endTime = clock();
std::cout << "1: " << (float)(endTime-startTime)/(float)CLOCKS_PER_SEC << std::endl;
std::cout << "value: " << (int)out.at<unsigned char>(0,0) << std::endl;
startTime = clock();
in /= 4;
endTime = clock();
std::cout << "2: " << (float)(endTime-startTime)/(float)CLOCKS_PER_SEC << std::endl;
std::cout << "value: " << (int)in.at<unsigned char>(0,0) << std::endl;
//40 ms
cv::Mat in2(height,width,CV_8UC1, cv::Scalar(255));
startTime = clock();
for (int y=0; y < in2.rows; ++y)
{
//unsigned char* ptr = in2.data + y*in2.step1();
unsigned char* ptr = in2.ptr(y);
for (int x=0; x < in2.cols; ++x)
{
ptr[x] /= 4;
}
}
std::cout << "value: " << (int)in2.at<unsigned char>(0,0) << std::endl;
endTime = clock();
std::cout << "3: " << (float)(endTime-startTime)/(float)CLOCKS_PER_SEC << std::endl;
cv::namedWindow("...");
cv::waitKey(0);
}
with results:
value: 255
1: 0.016
value: 64
2: 0.016
value: 64
3: 0.003
value: 63
you see that the results differ, probably because mat.divide()
does perform floating point division and rounding to next. While you use integer division in your faster version, which is faster but gives a different result.
In addition, there is a saturate_cast in openCV computation, but I guess the bigger computation load difference will be the double precision division.