0

So I am working on an implementation of a backprop neural network : I made this 'NEURON' class , as every beginner in neural network do .

However, I am having weird results : you see, when the dataset is small (like in the case of a XOR function, where there can be only 4 possible permutations (00, 11, 01, 10) for the dataset), the output neuron gives me very close result, no matter how many training iteration (epoch) takes place. Ex: 1 XOR 1 gives me 0.987, and 1 XOR 0 gives me 0.986, shouldn't they be far apart ?

Here is the class code, in case :

#pragma once

#include <vector>
#include <iostream>
#include "Math.h"
#include "RandomizationUtils.h"

using namespace std;

class ClNeuron
{
    public:
    enum NEURON_TYPE { NEURON_TYPE_INPUT=1,NEURON_TYPE_HIDDEN=2,NEURON_TYPE_OUTPUT=3 };

    private:

    static const int CONST_DEFAULT_INPUT_NUMBER_PER_NEURON = 20;
    static const double CONST_DEFAULT_MOMENTUM_VALUE = 0.4;

    //Connection between 2 neurons
    struct NEURON_CONNECTION
    {
        double m_weight;
        double m_data;
        //Last modification done to the weight
        double m_weight_last_delta;
        double m_momentum_value;
        ClNeuron* m_source_neuron;
        ClNeuron* m_target_neuron;
    };

    //Initialization function
    void Init(unsigned long p_uid,NEURON_TYPE p_type);

    bool m_initialized;
    //All of the output connection of this neuron 
    vector<NEURON_CONNECTION*> m_output_connections;
    //Al of the input connection of this neuron
    vector<NEURON_CONNECTION*> m_input_connections;
    //Tmp internal result buffer (containing all weights multiplicated by their inputs)
    double m_result_buffer;
    //special weight that always has an input of 1.0
    NEURON_CONNECTION m_bias;

    public:

    //the type of this neuron
    NEURON_TYPE m_type;

    ClNeuron(NEURON_TYPE p_type);
    ClNeuron(unsigned long p_uid,NEURON_TYPE p_type);
    ClNeuron(unsigned long p_uid);
    ClNeuron();
    //Connect this neuron's output to another / others neurons' input
    bool AddOutputConnection(ClNeuron* p_neuron);

    //This neuron got a request to have a new input
    NEURON_CONNECTION* InputConnectionRequest(ClNeuron* p_source_neuron);

    //Tell the neuron to fire the sum of the processed inputs
    double Fire();

    //Tell the neuron to fire a particular data
    double Fire(double p_data);

    //Function updating all of the current neuron's weight of the OUTPUT connections , depending on an error ratio
    void UpdateWeights(double p_wanted_output);

    //Sum all the weight * their respective inputs into an internal buffer
    void ProcessInputs();

    //Print neuron & connections & weights
    void PrintNeuronData();

    //Unique ID of this neuron
    unsigned long m_uid;

    //This neuron's calculated error_delta
    double m_error_gradient;
};

ClNeuron::NEURON_CONNECTION* ClNeuron::InputConnectionRequest(ClNeuron* p_neuron)
{
    NEURON_CONNECTION* connection = new NEURON_CONNECTION;
    if(!connection)
    {
    cout << "Error creating new connection, memory full ?" << endl << flush;
    return NULL;
    } 
    connection->m_weight = GetRandomDouble(-1,1); 
    connection->m_data = 0;
    connection->m_momentum_value = CONST_DEFAULT_MOMENTUM_VALUE;
    connection->m_source_neuron = p_neuron;
    connection->m_target_neuron = this;

    m_input_connections.push_back(connection); 
    return connection;
}

bool ClNeuron::AddOutputConnection(ClNeuron* p_neuron)
{
    //If the remote neuron accept the us as a new input, then we add it to output list
    NEURON_CONNECTION* connection = p_neuron->InputConnectionRequest(this);
    if(!connection)   
    {
    return false;
    }

    m_output_connections.push_back(connection);    
    return true;
}

double ClNeuron::Fire()
{
    return Fire(m_result_buffer);
}

double ClNeuron::Fire(double p_data)
{
    if(m_output_connections.size()==0)
    {
    cout << "Final neuron " << m_uid << " return " << p_data << endl;
    return p_data;
    }
    for(unsigned long i=0;i<m_output_connections.size();i++)
    {
    m_output_connections[i]->m_data = p_data;       
    }

    return 1;
}

void ClNeuron::ProcessInputs()
{
    m_result_buffer = 0;
    for(unsigned long i=0;i<m_input_connections.size();i++)
    {
    m_result_buffer += m_input_connections[i]->m_weight * m_input_connections[i]->m_data; 
    }    

    m_result_buffer += m_bias.m_weight ;

    //sigmoid the sum
    m_result_buffer = Sigmoid(m_result_buffer);
}

void ClNeuron::UpdateWeights(double p_wanted_output)
{ 
    //Update weights from neuron to all of its inputs NOTE : p_wanted_output is the output of THIS neuron (in case their is many output neuron in the network)
    if(m_type == NEURON_TYPE_OUTPUT)
    {
    m_error_gradient = (p_wanted_output - m_result_buffer) * SigmoidDerivative(m_result_buffer);

    //Adjust the bias of this neuron
    double weight_delta = 1 * m_error_gradient * 1  ;
    double momentum = m_bias.m_weight_last_delta * m_bias.m_momentum_value;
    m_bias.m_weight += weight_delta + momentum;
    m_bias.m_weight_last_delta = weight_delta;
    }

    else if(m_type == NEURON_TYPE_HIDDEN)
    {
    double error_deriative = SigmoidDerivative(m_result_buffer);

    double tmpBuffer = 0.00;
    for(unsigned long i=0;i<m_output_connections.size();i++)
    {
        tmpBuffer += (m_output_connections[i]->m_target_neuron->m_error_gradient * m_output_connections[i]->m_weight);
    }
    m_error_gradient = error_deriative * tmpBuffer;


    //Adjust the weights for this neuron's OUTPUT connections
    for(unsigned long i=0;i<m_output_connections.size();i++)
    {
        double weight_delta = 1 * m_output_connections[i]->m_target_neuron->m_error_gradient * m_result_buffer  ;
        double momentum = m_output_connections[i]->m_weight_last_delta * m_output_connections[i]->m_momentum_value;
        m_output_connections[i]->m_weight += weight_delta + momentum;
        m_output_connections[i]->m_weight_last_delta = weight_delta;
    }

    //Adjust the bias of this neuron
    double weight_delta = 1 * m_error_gradient * 1  ;
    double momentum = m_bias.m_weight_last_delta * m_bias.m_momentum_value;
    m_bias.m_weight += weight_delta + momentum;
    m_bias.m_weight_last_delta = weight_delta;
    }

    if(m_type == NEURON_TYPE_INPUT)
    { 
    //Adjust the weights for this neuron's OUTPUT connections
    for(unsigned long i=0;i<m_output_connections.size();i++)
    {
        double weight_delta = 1 * m_output_connections[i]->m_target_neuron->m_error_gradient * m_result_buffer  ;
        double momentum = m_output_connections[i]->m_weight_last_delta * m_output_connections[i]->m_momentum_value;
        m_output_connections[i]->m_weight += weight_delta + momentum;
        m_output_connections[i]->m_weight_last_delta = weight_delta;
    }
    }
}

void ClNeuron::PrintNeuronData()
{
    cout << endl << "========================================" << endl;
    cout << "Neuron #" << m_uid << " has " << m_input_connections.size() << " input connection" << endl << endl;

    for(unsigned long i=0;i<m_input_connections.size();i++)
    {
    cout << "----> " << "conn." << i << " | Src ID: " << m_input_connections[i]->m_source_neuron->m_uid << " | W: "<< m_input_connections[i]->m_weight << " | D: "<< m_input_connections[i]->m_data << " | RB : " << m_result_buffer << " | EF: " << endl;   
    }

    cout << "Neuron #" << m_uid << " has " << m_output_connections.size() << " output connection" << endl << endl;

    for(unsigned long i=0;i<m_output_connections.size();i++)
    {
    cout << "----> " << "conn." << i << " | Dst ID: " << m_output_connections[i]->m_target_neuron->m_uid << " | W: "<< m_output_connections[i]->m_weight << " | D: "<< m_output_connections[i]->m_data << " | RB : " << m_result_buffer << " | EF: " << endl;   
    }    

    cout << endl << "========================================" << endl;
}

void ClNeuron::Init(unsigned long p_uid,NEURON_TYPE p_type)
{
    m_initialized = false;
    m_output_connections.clear();    
    m_input_connections.clear();
    m_input_connections.reserve(CONST_DEFAULT_INPUT_NUMBER_PER_NEURON);

    m_type = p_type;
    m_uid = rand() % RAND_MAX;
    m_result_buffer = 0;

    m_bias.m_weight = GetRandomDouble(-1,1); 
    m_bias.m_data = 0;
    m_bias.m_momentum_value = CONST_DEFAULT_MOMENTUM_VALUE;
    m_bias.m_source_neuron = NULL;
    m_bias.m_target_neuron = this;

    m_initialized = true;
}

ClNeuron::ClNeuron(unsigned long p_uid,NEURON_TYPE p_type)
{
    Init(p_uid,p_type);
}

ClNeuron::ClNeuron(NEURON_TYPE p_type)
{
    Init(0,p_type);
}

ClNeuron::ClNeuron(unsigned long p_uid)
{
    Init(p_uid,NEURON_TYPE_HIDDEN);
}

ClNeuron::ClNeuron()
{
    Init(0,NEURON_TYPE_HIDDEN);
}
BartoszKP
  • 34,786
  • 15
  • 102
  • 130
  • Your neuron **ID** is random? That's quite weird. Also, using an `init` function is consdiered bad style, use ctors. You don't need to `clear` a fresh vector, they start empty. Finally, I can't spot the network architecture so it's quite possible you've got too small a network for XOR. Does it work for the identity function (input=output) ? – MSalters Aug 10 '14 at 22:25

1 Answers1

0

The problem was the BIAS weight value for each neuron :

More precisely, the error gradient was always 0 for the bias, (causing the weight_delta of 0), which finally was causing the bias not to update its output weights.

BartoszKP
  • 34,786
  • 15
  • 102
  • 130