3

I am new in OpenCV world and neural networks but I have some coding experience in C++/Java.


I created my first ANN MLP and learned it the XOR:

#include <opencv2/core.hpp>
#include <opencv2/imgcodecs.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include <opencv2/ml/ml.hpp>

#include <iostream>
#include <iomanip>

using namespace cv;
using namespace ml;
using namespace std;

void print(Mat& mat, int prec)
{
    for (int i = 0; i<mat.size().height; i++)
    {
        cout << "[";
        for (int j = 0; j<mat.size().width; j++)
        {
            cout << fixed << setw(2) << setprecision(prec) << mat.at<float>(i, j);
            if (j != mat.size().width - 1)
                cout << ", ";
            else
                cout << "]" << endl;
        }
    }
}

int main()
{
    const int hiddenLayerSize = 4;
    float inputTrainingDataArray[4][2] = {
        { 0.0, 0.0 },
        { 0.0, 1.0 },
        { 1.0, 0.0 },
        { 1.0, 1.0 }
    };
    Mat inputTrainingData = Mat(4, 2, CV_32F, inputTrainingDataArray);

    float outputTrainingDataArray[4][1] = {
        { 0.0 },
        { 1.0 },
        { 1.0 },
        { 0.0 }
    };
    Mat outputTrainingData = Mat(4, 1, CV_32F, outputTrainingDataArray);

    Ptr<ANN_MLP> mlp = ANN_MLP::create();

    Mat layersSize = Mat(3, 1, CV_16U);
    layersSize.row(0) = Scalar(inputTrainingData.cols);
    layersSize.row(1) = Scalar(hiddenLayerSize);
    layersSize.row(2) = Scalar(outputTrainingData.cols);
    mlp->setLayerSizes(layersSize);

    mlp->setActivationFunction(ANN_MLP::ActivationFunctions::SIGMOID_SYM);

    TermCriteria termCrit = TermCriteria(
        TermCriteria::Type::COUNT + TermCriteria::Type::EPS,
        100000000,
        0.000000000000000001
    );
    mlp->setTermCriteria(termCrit);

    mlp->setTrainMethod(ANN_MLP::TrainingMethods::BACKPROP);

    Ptr<TrainData> trainingData = TrainData::create(
        inputTrainingData,
        SampleTypes::ROW_SAMPLE,
        outputTrainingData
    );

    mlp->train(trainingData
        /*, ANN_MLP::TrainFlags::UPDATE_WEIGHTS
        + ANN_MLP::TrainFlags::NO_INPUT_SCALE
        + ANN_MLP::TrainFlags::NO_OUTPUT_SCALE*/
    );

    for (int i = 0; i < inputTrainingData.rows; i++) {
        Mat sample = Mat(1, inputTrainingData.cols, CV_32F, inputTrainingDataArray[i]);
        Mat result;
        mlp->predict(sample, result);
        cout << sample << " -> ";// << result << endl;
        print(result, 0);
        cout << endl;
    }

    return 0;
}

It works very well for this simple problem, I also learn this network the 1-10 to binary conversion.


But i need to use MLP for simple image classification - road signs. I write the code for loading training images and preparing matrix for learning but I'm not able to train the network - it "learn" in one second even with 1 000 000 iterations! And it produce garbage results, the same for all inputs!


Here are my test images and the source code:

#include <opencv2/core.hpp>
#include <opencv2/imgcodecs.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include <opencv2/ml/ml.hpp>

#include <iostream>
#include <chrono>
#include <memory>
#include <iomanip>
#include <climits>

#include <Windows.h>

using namespace cv;
using namespace ml;
using namespace std;
using namespace chrono;

const int WIDTH_SIZE = 50;
const int HEIGHT_SIZE = (int)(WIDTH_SIZE * sqrt(3)) / 2;
const int IMAGE_DATA_SIZE = WIDTH_SIZE * HEIGHT_SIZE;

void print(Mat& mat, int prec)
{
    for (int i = 0; i<mat.size().height; i++)
    {
        cout << "[ ";
        for (int j = 0; j<mat.size().width; j++)
        {
            cout << fixed << setw(2) << setprecision(prec) << mat.at<float>(i, j);
            if (j != mat.size().width - 1)
                cout << ", ";
            else
                cout << " ]" << endl;
        }
    }
}

bool loadImage(string imagePath, Mat& outputImage)
{
    // load image in grayscale
    Mat image = imread(imagePath, IMREAD_GRAYSCALE);
    Mat temp;

    // check for invalid input
    if (image.empty()) {
        cout << "Could not open or find the image" << std::endl;
        return false;
    }

    // resize the image
    Size size(WIDTH_SIZE, HEIGHT_SIZE);
    resize(image, temp, size, 0, 0, CV_INTER_AREA);

    // convert to float 1-channel
    temp.convertTo(outputImage, CV_32FC1, 1.0/255.0);

    return true;
}

vector<string> getFilesNamesInFolder(string folder)
{
    vector<string> names;
    char search_path[200];
    sprintf(search_path, "%s/*.*", folder.c_str());
    WIN32_FIND_DATA fd;
    HANDLE hFind = ::FindFirstFile(search_path, &fd);
    if (hFind != INVALID_HANDLE_VALUE) {
        do {
            // read all (real) files in current folder
            // , delete '!' read other 2 default folder . and ..
            if (!(fd.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)) {
                names.push_back(fd.cFileName);
            }
        } while (::FindNextFile(hFind, &fd));
        ::FindClose(hFind);
    }
    return names;
}

class Sign {
public:
    enum class Category { A = 'A', B = 'B', C = 'C', D = 'D' };

    Mat image;
    Category category;
    int number;

    Sign(Mat& image, string name) :image(image) {
        category = static_cast<Category>(name.at(0));
        number = stoi(name.substr(2, name.length()));
    };
};

vector<Sign> loadSignsFromFolder(String folderName) {
    vector<Sign> roadSigns;

    for (string fileName : getFilesNamesInFolder(folderName)) {
        Mat image;
        loadImage(folderName + fileName, image);
        roadSigns.emplace_back(image, fileName.substr(0, (fileName.length() - 4))); //cut .png
    }

    return roadSigns;
}

void showSignsInWindows(vector<Sign> roadSigns) {
    for (Sign sign : roadSigns) {
        String windowName = "Sign " + to_string(sign.number);
        namedWindow(windowName, WINDOW_AUTOSIZE);
        imshow(windowName, sign.image);
    }
    waitKey(0);
}

Mat getInputDataFromSignsVector(vector<Sign> roadSigns) {
    Mat roadSignsImageData;

    for (Sign sign : roadSigns) {
        Mat signImageDataInOneRow = sign.image.reshape(0, 1);
        roadSignsImageData.push_back(signImageDataInOneRow);
    }

    return roadSignsImageData;
}

Mat getOutputDataFromSignsVector(vector<Sign> roadSigns) {
    int signsCount = (int) roadSigns.size();
    int signsVectorSize = signsCount + 1;

    Mat roadSignsData(0, signsVectorSize, CV_32FC1);

    int i = 1;
    for (Sign sign : roadSigns) {
        vector<float> outputTraningVector(signsVectorSize);
        fill(outputTraningVector.begin(), outputTraningVector.end(), -1.0);
        outputTraningVector[i++] = 1.0;

        Mat tempMatrix(outputTraningVector, false);
        roadSignsData.push_back(tempMatrix.reshape(0, 1));
    }

    return roadSignsData;
}

int main(int argc, char* argv[])
{
    if (argc != 2) {
        cout << " Usage: display_image ImageToLoadAndDisplay" << endl;
        return -1;
    }

    const int hiddenLayerSize = 500;

    vector<Sign> roadSigns = loadSignsFromFolder("../../../Znaki/A/");
    Mat inputTrainingData = getInputDataFromSignsVector(roadSigns);
    Mat outputTrainingData = getOutputDataFromSignsVector(roadSigns);

    Ptr<ANN_MLP> mlp = ANN_MLP::create();

    Mat layersSize = Mat(3, 1, CV_16U);
    layersSize.row(0) = Scalar(inputTrainingData.cols);
    layersSize.row(1) = Scalar(hiddenLayerSize);
    layersSize.row(2) = Scalar(outputTrainingData.cols);
    mlp->setLayerSizes(layersSize);

    mlp->setActivationFunction(ANN_MLP::ActivationFunctions::SIGMOID_SYM, 1.0, 1.0);

    mlp->setTrainMethod(ANN_MLP::TrainingMethods::BACKPROP, 0.05, 0.05);
    //mlp->setTrainMethod(ANN_MLP::TrainingMethods::RPROP);

    TermCriteria termCrit = TermCriteria(
        TermCriteria::Type::MAX_ITER //| TermCriteria::Type::EPS,
        ,100 //(int) INT_MAX
        ,0.000001
    );
    mlp->setTermCriteria(termCrit);

    Ptr<TrainData> trainingData = TrainData::create(
        inputTrainingData,
        SampleTypes::ROW_SAMPLE,
        outputTrainingData
    );

    auto start = system_clock::now();
    mlp->train(trainingData
        //, //ANN_MLP::TrainFlags::UPDATE_WEIGHTS
        , ANN_MLP::TrainFlags::NO_INPUT_SCALE
        + ANN_MLP::TrainFlags::NO_OUTPUT_SCALE
    );
    auto duration = duration_cast<milliseconds> (system_clock::now() - start);
    cout << "Training time: " << duration.count() << "ms" << endl;

    for (int i = 0; i < inputTrainingData.rows; i++) {
        Mat result;
        //mlp->predict(inputTrainingData.row(i), result);
        mlp->predict(roadSigns[i].image.reshape(0, 1), result);
        //cout << result << endl;
        print(result, 2);
    }


    //showSignsInWindows(roadSigns);
    return 0;
}

What is wrong in this code, that XOR works but images not? I cheked the input and output matrix and they're correct... could somebody also explain me when to/shoud I use the ANN_MLP::TrainFlags::NO_INPUT_SCALE and ANN_MLP::TrainFlags::NO_OUTPUT_SCALE or what values of setActivationFunction and setTrainMethod parameters should I use?


Thanks!

Michał Lytek
  • 10,577
  • 3
  • 16
  • 24

1 Answers1

2

There was a problem in backprop weight scale parameter - it was too big and the ANN couldn't learn more difficult things.


I changed the line to mlp->setTrainMethod(ANN_MLP::TrainingMethods::BACKPROP, 0.0001); and the hidden layer size to 100 (to speed up the learning) - now it's working!

Michał Lytek
  • 10,577
  • 3
  • 16
  • 24
  • If there is too few samples the weight doesn't change beetween iterations and the training is stopped ignoring term criteria. And when training has stopped too early the NN return garbage results becuase it's undertrained - you need to provide more samples or you can decrease layer size :P – Michał Lytek Jun 02 '16 at 17:12