5

I am trying to calculate the skew of text in an image so I can correct it for the best OCR results.

Currently this is the function I am using:

double compute_skew(Mat &img)
{

    // Binarize
    cv::threshold(img, img, 225, 255, cv::THRESH_BINARY);

    // Invert colors
    cv::bitwise_not(img, img);

    cv::Mat element = cv::getStructuringElement(cv::MORPH_RECT, cv::Size(5, 3));
    cv::erode(img, img, element);

    std::vector<cv::Point> points;
    cv::Mat_<uchar>::iterator it = img.begin<uchar>();
    cv::Mat_<uchar>::iterator end = img.end<uchar>();
    for (; it != end; ++it)
        if (*it)
            points.push_back(it.pos());

    cv::RotatedRect box = cv::minAreaRect(cv::Mat(points));

    double angle = box.angle;
    if (angle < -45.)
        angle += 90.;

    cv::Point2f vertices[4];
    box.points(vertices);
    for(int i = 0; i < 4; ++i)
        cv::line(img, vertices[i], vertices[(i + 1) % 4], cv::Scalar(255, 0, 0), 1, CV_AA);

    return angle;
}

When I look at then angle in debug I get 0.000000 enter image description here

However when I give it this image I get proper results of a skew of about 16 degrees:

enter image description here

How can I properly detect the skew in the first image?

Clip
  • 3,018
  • 8
  • 42
  • 77

2 Answers2

13

there are a few other ways to get the skew degree, 1) by hough transform 2) by horizontal projection profile. rotate the image in different angle bins and calculate horizontal projection. the angle with the greatest horizontal histogram value is the deskewed angle.

i have provided below implementation of 1). i believe this to be superior to the boxing method you are using because it requires that you completely clean the image of any noise,which just isnt possible in most of the time.

you should know that the method doesnt work well if there's too much noise. you can reduce noise in different ways depending on what type of "line" you want to treat as the most dominant in the image. i have provided two methods for this. be sure to play with parameters and threshold etc.

results (all run using preprocess2, all run using same parameter set)

code

#include <opencv2/opencv.hpp>
using namespace cv;
using namespace std;

void hough_transform(Mat& im,Mat& orig,double* skew)
{
    double max_r=sqrt(pow(.5*im.cols,2)+pow(.5*im.rows,2));
    int angleBins = 180;
    Mat acc = Mat::zeros(Size(2*max_r,angleBins),CV_32SC1);
    int cenx = im.cols/2;
    int ceny = im.rows/2;
    for(int x=1;x<im.cols-1;x++)
    {
        for(int y=1;y<im.rows-1;y++)
        {
            if(im.at<uchar>(y,x)==255)
            {
                for(int t=0;t<angleBins;t++)
                {
                    double r =(x-cenx)*cos((double)t/angleBins*CV_PI)+(y-ceny)*sin((double)t    /angleBins*CV_PI);
                    r+=max_r;
                    acc.at<int>(t,int(r))++;
                }
            }
        }
    }
    Mat thresh;
    normalize(acc,acc,255,0,NORM_MINMAX);
    convertScaleAbs(acc,acc);
    /*debug
    Mat cmap;
    applyColorMap(acc,cmap,COLORMAP_JET);
    imshow("cmap",cmap);
    imshow("acc",acc);*/

    Point maxLoc;
    minMaxLoc(acc,0,0,0,&maxLoc);
    double theta = (double)maxLoc.y/angleBins*CV_PI;
    double rho = maxLoc.x-max_r;
    if(abs(sin(theta))<0.000001)//check vertical
    {
        //when vertical, line equation becomes
        //x = rho
        double m = -cos(theta)/sin(theta);
        Point2d p1 = Point2d(rho+im.cols/2,0);
        Point2d p2 = Point2d(rho+im.cols/2,im.rows);
        line(orig,p1,p2,Scalar(0,0,255),1);
        *skew=90;
        cout<<"skew angle "<<" 90"<<endl;
    }else
    {
        //convert normal form back to slope intercept form
        //y = mx + b
        double m = -cos(theta)/sin(theta);
        double b = rho/sin(theta)+im.rows/2.-m*im.cols/2.;
        Point2d p1 = Point2d(0,b);
        Point2d p2 = Point2d(im.cols,im.cols*m+b);
        line(orig,p1,p2,Scalar(0,0,255),1);
        double skewangle;
        skewangle= p1.x-p2.x>0? (atan2(p1.y-p2.y,p1.x-p2.x)*180./CV_PI):(atan2(p2.y-p1.y,p2.    x-p1.x)*180./CV_PI);
        *skew=skewangle;
        cout<<"skew angle "<<skewangle<<endl;
    }
    imshow("orig",orig);
}

Mat preprocess1(Mat& im)
{
    Mat ret = Mat::zeros(im.size(),CV_32SC1);

    for(int x=1;x<im.cols-1;x++)
    {
        for(int y=1;y<im.rows-1;y++)
        {

            int gy = (im.at<uchar>(y-1,x+1)-im.at<uchar>(y-1,x-1))
                +2*(im.at<uchar>(y,x+1)-im.at<uchar>(y,x-1))
                +(im.at<uchar>(y+1,x+1)-im.at<uchar>(y+1,x-1));
            int gx = (im.at<uchar>(y+1,x-1) -im.at<uchar>(y-1,x-1))
                +2*(im.at<uchar>(y+1,x)-im.at<uchar>(y-1,x))
                +(im.at<uchar>(y+1,x+1)-im.at<uchar>(y-1,x+1));
            int g2 = (gy*gy + gx*gx);
            ret.at<int>(y,x)=g2;
        }
    }
    normalize(ret,ret,255,0,NORM_MINMAX);
    ret.convertTo(ret,CV_8UC1);
    threshold(ret,ret,50,255,THRESH_BINARY);
    return ret;
}

Mat preprocess2(Mat& im)
{
    // 1) assume white on black and does local thresholding
    // 2) only allow voting top is white and buttom is black(buttom text line)
    Mat thresh;
    //thresh=255-im;
    thresh=im.clone();
    adaptiveThreshold(thresh,thresh,255,CV_ADAPTIVE_THRESH_GAUSSIAN_C,THRESH_BINARY,15,-2);
    Mat ret = Mat::zeros(im.size(),CV_8UC1);
    for(int x=1;x<thresh.cols-1;x++)
    {
        for(int y=1;y<thresh.rows-1;y++)
        {
            bool toprowblack = thresh.at<uchar>(y-1,x)==0 ||  thresh.at<uchar>(y-1,x-1)==0     || thresh.at<uchar>(y-1,x+1)==0;
            bool belowrowblack = thresh.at<uchar>(y+1,x)==0 ||  thresh.at<uchar>(y+1,    x-1)==0 || thresh.at<uchar>(y+1,x+1)==0;

            uchar pix=thresh.at<uchar>(y,x);
            if((!toprowblack && pix==255 && belowrowblack))
            {
                ret.at<uchar>(y,x) = 255;
            }
        }
    }
    return ret;
}
Mat rot(Mat& im,double thetaRad)
{
    cv::Mat rotated;
    double rskew = thetaRad* CV_PI/180;
    double nw = abs(sin(thetaRad))*im.rows+abs(cos(thetaRad))*im.cols;
    double nh = abs(cos(thetaRad))*im.rows+abs(sin(thetaRad))*im.cols;
    cv::Mat rot_mat = cv::getRotationMatrix2D(Point2d(nw*.5,nh*.5), thetaRad*180/CV_PI, 1);
    Mat pos = Mat::zeros(Size(1,3),CV_64FC1);
    pos.at<double>(0)=(nw-im.cols)*.5;
    pos.at<double>(1)=(nh-im.rows)*.5;
    Mat res = rot_mat*pos;
    rot_mat.at<double>(0,2) += res.at<double>(0);
    rot_mat.at<double>(1,2) += res.at<double>(1);
    cv::warpAffine(im, rotated, rot_mat,Size(nw,nh), cv::INTER_LANCZOS4);
    return rotated;
}

int main(int argc, char** argv)
{
    string src="C:/data/skew.png";
    Mat im= imread(src);
    Mat gray;
    cvtColor(im,gray,CV_BGR2GRAY);

    Mat preprocessed = preprocess2(gray);
    imshow("preprocessed2",preprocessed);
    double skew;
    hough_transform(preprocessed,im,&skew);
    Mat rotated = rot(im,skew* CV_PI/180);
    imshow("corrected",rotated);

    waitKey(0);
    return 0;
}
Zaw Lin
  • 5,629
  • 1
  • 23
  • 41
  • I change my idea your pictures is good. But code is not. Rotate my images with wrong angles... – Can Ürek Aug 16 '14 at 02:04
  • can you post the images? maybe i can see if they can be made to work at all – Zaw Lin Aug 16 '14 at 07:05
  • Thaks for your reply. Can you help me for Crop Text like that http://stackoverflow.com/questions/23125359/crop-text-out-of-binary-image-opencv-c I cant implement that link to my codes. May be after i need rotate again. – Can Ürek Aug 16 '14 at 12:47
  • hmm..it's a different problem. the answer i provided assume that the image is already cropped around the text. it only concerns itself with finding the rotation angle. according to my experience, it works best with real camera images as input..not so well with synthetic or already processed images. you can tweak the function `preprocess2(Mat& im)` to fit your needs. for actual cropping of images, you would need to find a different algorithm – Zaw Lin Aug 18 '14 at 08:51
  • 3
    It's better than this one: http://felix.abecassis.me/2011/09/opencv-detect-skew-angle/ Thanks! – Martijn Mellens Oct 22 '14 at 07:57
  • 1
    Thank you sir you are a live saver – Ibrahim Amer Apr 16 '15 at 11:08
  • Thanks a lot. This is great. But I am afraid, there is no any usage of the function preprocess1. Can u explain it? Or is that a mistake? – Samitha Chathuranga Jul 28 '15 at 01:39
  • It's just an alternative to preprocess2. You can use that or use preprocess2 and the results will be different(perhaps better depending on your application. For detecting text, preprocess2 is better.). Note that preprocess1 is just a standard edge detector. – Zaw Lin Jul 28 '15 at 08:52
  • This code is great, but I'm wondering why the skew angle is always rounded to the nearest degree. – Nikhil Sridhar Aug 31 '17 at 05:55
  • Hi, that's because of 180 bins are allocated(one bin for each angle) when calculating hough transform. You can increase it to higher bins to calculate finer angles though I doubt whether it has meaningful impact on performance. – Zaw Lin Aug 31 '17 at 14:21
  • Thanks but it seems that when you increase the number of bins you also get slower processing time. Any ideas? – Nikhil Sridhar Aug 31 '17 at 22:41
  • Yes that's correct. Performance is quadratic to the number of bins(I think). I have not really focused on performance on this. If you want faster, you can take a look at opencv implementation although theirs might be harder to understand. Or you can take a look at implementing this using simd instructions like sse. – Zaw Lin Sep 02 '17 at 06:50
2

the approach you posted has its own "ideal binarization" assumption. the threshold value directly affects the process. utilize otsu threshold, or think about DFT for a generic solution.

otsu trial:

int main()
{
    Mat input = imread("your text");
    cvtColor(input, input, CV_BGR2GRAY);
    Mat img;
    cv::threshold(input, img, 100, 255, cv::THRESH_OTSU);

    cv::bitwise_not(img, img);
    imshow("img ", img);
    waitKey(0);

    vector<Point> points;
    findNonZero(img, points);
    cv::RotatedRect box = cv::minAreaRect(points);

    double angle = box.angle;
    if (angle < -45.)
        angle += 90.;

    cv::Point2f vertices[4];
    box.points(vertices);
    for(int i = 0; i < 4; ++i)
        cv::line(img, vertices[i], vertices[(i + 1) % 4], cv::Scalar(255, 0, 0));
    imshow("img ", img);
    waitKey(0);

    return 0;
}

enter image description here

baci
  • 2,528
  • 15
  • 28