So I am hitting a wall with my C# Machine Learning project. I am attempting to train an algorithm to recognize numbers. Since this is only an exercise I have a image set of 200 numbers (20 each for 0 to 9). Obviously if I wanted a properly trained algorithm I would use a more robust training set, but this is just an exercise to see if I can get it working in the first place. I can get it up to 60% accuracy, but not past that. I have been doing some research into activation functions and I from what I understand, LeakyRelu is the function I should be using. However, if I use the LeakyRelu function across the board then it doesn't learn anything, and I'm not sure how to use the LeakyRelu as an output activation function. Using sigmoid or tanh as an output activation function makes more sense to me. Here is a block of code that creates the array that feeds the backpropagation:
public static float ACTIVE_VALUE = 1;
public static float INACTIVE_VALUE = -1;
// This is specifically designed for a algorithm that will detect a number between 0 - 9
public static float[] valueToArray(int value)
{
switch (value)
{
case 0:
return new float[] { ACTIVE_VALUE, INACTIVE_VALUE, INACTIVE_VALUE, INACTIVE_VALUE, INACTIVE_VALUE,
INACTIVE_VALUE, INACTIVE_VALUE, INACTIVE_VALUE, INACTIVE_VALUE, INACTIVE_VALUE };
case 1:
return new float[] { INACTIVE_VALUE, ACTIVE_VALUE, INACTIVE_VALUE, INACTIVE_VALUE, INACTIVE_VALUE,
INACTIVE_VALUE, INACTIVE_VALUE, INACTIVE_VALUE, INACTIVE_VALUE, INACTIVE_VALUE };
case 2:
return new float[] { INACTIVE_VALUE, INACTIVE_VALUE, ACTIVE_VALUE, INACTIVE_VALUE, INACTIVE_VALUE,
INACTIVE_VALUE, INACTIVE_VALUE, INACTIVE_VALUE, INACTIVE_VALUE, INACTIVE_VALUE };
case 3:
return new float[] { INACTIVE_VALUE, INACTIVE_VALUE, INACTIVE_VALUE, ACTIVE_VALUE, INACTIVE_VALUE,
INACTIVE_VALUE, INACTIVE_VALUE, INACTIVE_VALUE, INACTIVE_VALUE, INACTIVE_VALUE };
case 4:
return new float[] { INACTIVE_VALUE, INACTIVE_VALUE, INACTIVE_VALUE, INACTIVE_VALUE, ACTIVE_VALUE,
INACTIVE_VALUE, INACTIVE_VALUE, INACTIVE_VALUE, INACTIVE_VALUE, INACTIVE_VALUE };
case 5:
return new float[] { INACTIVE_VALUE, INACTIVE_VALUE, INACTIVE_VALUE, INACTIVE_VALUE, INACTIVE_VALUE,
ACTIVE_VALUE, INACTIVE_VALUE, INACTIVE_VALUE, INACTIVE_VALUE, INACTIVE_VALUE };
case 6:
return new float[] { INACTIVE_VALUE, INACTIVE_VALUE, INACTIVE_VALUE, INACTIVE_VALUE, INACTIVE_VALUE,
INACTIVE_VALUE, ACTIVE_VALUE, INACTIVE_VALUE, INACTIVE_VALUE, INACTIVE_VALUE };
case 7:
return new float[] { INACTIVE_VALUE, INACTIVE_VALUE, INACTIVE_VALUE, INACTIVE_VALUE, INACTIVE_VALUE,
INACTIVE_VALUE, INACTIVE_VALUE, ACTIVE_VALUE, INACTIVE_VALUE, INACTIVE_VALUE };
case 8:
return new float[] { INACTIVE_VALUE, INACTIVE_VALUE, INACTIVE_VALUE, INACTIVE_VALUE, INACTIVE_VALUE,
INACTIVE_VALUE, INACTIVE_VALUE, INACTIVE_VALUE, ACTIVE_VALUE, INACTIVE_VALUE };
case 9:
return new float[] { INACTIVE_VALUE, INACTIVE_VALUE, INACTIVE_VALUE, INACTIVE_VALUE, INACTIVE_VALUE,
INACTIVE_VALUE, INACTIVE_VALUE, INACTIVE_VALUE, INACTIVE_VALUE, ACTIVE_VALUE };
default:
return new float[] { INACTIVE_VALUE, INACTIVE_VALUE, INACTIVE_VALUE, INACTIVE_VALUE, INACTIVE_VALUE,
INACTIVE_VALUE, INACTIVE_VALUE, INACTIVE_VALUE, INACTIVE_VALUE, INACTIVE_VALUE };
}
}
I don't know how to use something like this to read a LeakyRelu output. So I figured the best option would be to use LeakyRelu for the input and hidden layers and then use tanh or sigmoid for the output layer. However that creates an issue, because sigmoid just returns NAN (due to a rounding error from what I understand) and tanh returns -1 or 1 but nothing in between. If I use tanh across the board it works, and it learns, but it only reaches an accuracy of 60% then stops developing there. I assume this is due to the "vanishing gradient" issue. However, If I use LeakyRelu for inpunt and hidden layers and then tanh for the output, it stays at 12-14% (which is just as good as randomly guessing a number).
I am using a neural network that I got from a github user here: https://github.com/kipgparker/BackPropNetwork
He posted a research paper online about neural networks, and it was one of the top hits on google. That's how I found it in the first place. I posted my full project in a zip on GitHub here: https://github.com/JoshuaC0352/Machine-Learning
I am not opposed to using a library I can get from nuget like SiaNet (https://scisharp.github.io/SiaNet/api/SiaNet.Layers.AvgPooling1D.html), however I have gotten so familiar with the one I am currently working with I am somewhat reluctant to switch over because I'd feel like I am almost starting from scratch, because I would have to learn how to interface with a whole new library.
EDIT: additional code. This is my while loop that reads the image and trains the algorithm:
public static void singleThread()
{
int batchSize = 10000;
int rangeLow = 0;
int rangeHi = 9;
int hits = 0;
while (true)
{
// alternates between training and testing
//Console.WriteLine("Training... ");
for (int i = 0; i < batchSize; i++)
{
// Give a training progress report every 100 iterations, this should increase performance
if (i % 100 == 0)
{
Console.SetCursorPosition(0, Console.CursorTop);
Console.Write("Training: ");
Console.Write("(" + (((float)i / (float)batchSize) * 100) + "%)");
Console.Write(" ");
}
// randomly select an image from the list
int number = rng.Next(rangeLow, rangeHi);
int index = rng.Next(1, 20);
Bitmap loadedImage = (Bitmap)Image.FromFile("Train/" + number + "/" +
index + ".png", true);
int indexLocation = 0;
// Convert the image into a grayScale value
for (int x = 0; x < loadedImage.Width; x++)
{
for (int y = 0; y < loadedImage.Height; y++)
{
Color pixel = loadedImage.GetPixel(x, y);
int grayValue = (int)((pixel.R * 0.3) + (pixel.G * 0.59) + (pixel.B * 0.11));
//Console.WriteLine(grayValue);
networkInputs[indexLocation] = grayValue;
indexLocation++;
}
}
// The network will guess what the image is, and return the guess as a float array
float[] guess = currentNetwork.BackPropagate(networkInputs, Interface.valueToArray(number));
// This if statement checks if the guess was correct
if (Interface.guessToValue(guess) == number)
{
hits++;
}
}
currentNetwork.Performance = ((float) hits / (float) batchSize);
hits = 0;
Console.WriteLine("Score: " + (currentNetwork.Performance * 100) + "%");
}
}