What is going wrong in my backpropagation implementation?

Question

I'm following the 15 Steps to Implement a Neural Net guide. I'm stuck on Step 12, where backpropagation implementation is described.

Here's the (relevant) code I have written:

def feed_forward(inputs, weights, biases):
    net = np.matmul(np.hstack((inputs, biases)), weights)
    output = activation_function(net)
    return (output, net)

def initialize_weights(width, height, max_weight):
    return (np.random.random((width, height)) * np.random.randint(-max_weight, max_weight + 1, (width, height)))

def backpropagation(inputs, weights, learning_rate, biases, number_of_samples_for_backpropagation, target_outputs):
    random_sample_index = np.random.randint(0, inputs.shape[0], size=number_of_samples_for_backpropagation)
    random_sample = inputs[random_sample_index, :]
    print("random_sample.shape:")
    print(random_sample.shape)
    target_outputs = target_outputs[random_sample_index, :]
    print("target_outputs.shape:")
    print(target_outputs.shape)
    biases = biases[random_sample_index, :]
    print("biases.shape:")
    print(biases.shape)
    
    outputs, net = feed_forward(random_sample, weights, biases)
    print("weights.shape:")
    print(weights.shape)
    print("outputs.shape:")
    print(outputs.shape)
    print("net.shape:")
    print(net.shape)
    
    error_vector = target_outputs - outputs
    print("error_vector.shape:")
    print(error_vector.shape)
    
    delta = np.multiply(error_vector, activation_function_derivative(net))
    print("delta.shape:")
    print(delta.shape)
    
    weights_delta = learning_rate * np.kron(outputs.T, delta)
    
    print("outputs.T.shape:")
    print(outputs.T.shape)
    print("np.kron(outputs.T, delta).shape:")
    print(np.kron(outputs.T, delta).shape)
    
    weights = weights + weights_delta
    
    return weights

def train(training_set_features, training_set_targets, number_of_samples_for_backpropagation, val_set_features, val_set_targets, test_set_features, test_set_targets):
    number_of_columns_train_features = training_set_features.shape[1]
    number_of_columns_train_targets = training_set_targets.shape[1]
    max_weight = 0.5
    weights = initialize_weights(number_of_columns_train_features + 1, number_of_columns_train_targets, max_weight)
    
    number_of_rows_train = training_set_features.shape[0]
    biases_train = np.ones(shape=(number_of_rows_train, 1))
    number_of_rows_val = val_set_features.shape[0]
    biases_val = np.ones(shape=(number_of_rows_val, 1))
    number_of_rows_test = test_set_features.shape[0]
    biases_test = np.ones(shape=(number_of_rows_test, 1))
    
    # training parameters
    number_of_epochs = 500
    learning_rate = 0.1
    
    train_errors = []
    train_classification_errors = []
    val_errors = []
    val_classification_errors = []
    test_errors = []
    test_classification_errors = []
    
    current_epoch_number = 0
    while (current_epoch_number < number_of_epochs):
        weights = backpropagation(training_set_features, weights, learning_rate, biases_train, number_of_samples_for_backpropagation, training_set_targets)
        if (PLOT_GRAPHS == True):
            train_error, train_classification_error = evaluate_error(training_set_features, weights, training_set_targets, outputs_to_classes(training_set_targets), biases_train)
            val_error, val_classification_error = evaluate_error(val_set_features, weights, val_set_targets, outputs_to_classes(val_set_targets), biases_val)
            test_error, test_classification_error = evaluate_error(test_set_features, weights, test_set_targets, outputs_to_classes(test_set_targets), biases_test)
            
            train_errors.append(train_error)
            train_classification_errors.append(train_classification_error)
            val_errors.append(val_error)
            val_classification_errors.append(val_classification_error)
            test_errors.append(test_error)
            test_classification_errors.append(test_classification_error)
        
        current_epoch_number = current_epoch_number + 1
        
    if (PLOT_GRAPHS == True):
            plt.plot(train_errors, label="Train errors")
            plt.plot(train_classification_errors, label="Train classification errors")
            plt.plot(val_errors, label="Validation errors")
            plt.plot(val_classification_errors, label="Validation classification errors")
            plt.plot(test_errors, label="Test errors")
            plt.plot(test_classification_errors, label="Test classification errors")
            plt.legend(loc="upper left")
    
    train_error, train_classification_error = evaluate_error(training_set_features, weights, training_set_targets, outputs_to_classes(training_set_targets), biases_train)
    val_error, val_classification_error = evaluate_error(val_set_features, weights, val_set_targets, outputs_to_classes(val_set_targets), biases_val)
    test_error, test_classification_error = evaluate_error(test_set_features, weights, test_set_targets, outputs_to_classes(test_set_targets), biases_test)
    
    # debug prints
    print("train_errors:")
    print(train_errors)
    print("train_classification_errors:")
    print(train_classification_errors)
    print("val_errors:")
    print(val_errors)
    print("val_classification_errors:")
    print(val_classification_errors)
    print("test_errors:")
    print(test_errors)
    print("test_classification_errors:")
    print(test_classification_errors)
    
    return (weights, train_error, train_classification_error, val_error, val_classification_error, test_error, test_classification_error)

NUMBER_OF_SAMPLES_FOR_BACKPROPAGATION = 1

train(iris_train_features.values, iris_train_targets.values, NUMBER_OF_SAMPLES_FOR_BACKPROPAGATION, iris_val_features.values, iris_val_targets.values, iris_test_features.values, iris_test_targets.values)

Here's the output I get:

random_sample.shape:
(1, 4)
target_outputs.shape:
(1, 3)
biases.shape:
(1, 1)
weights.shape:
(5, 3)
outputs.shape:
(1, 3)
net.shape:
(1, 3)
error_vector.shape:
(1, 3)
delta.shape:
(1, 3)
outputs.T.shape:
(3, 1)
np.kron(outputs.T, delta).shape:
(3, 3)

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Cell In [105], line 1
----> 1 train(iris_train_features.values, iris_train_targets.values, NUMBER_OF_SAMPLES_FOR_BACKPROPAGATION, iris_val_features.values, iris_val_targets.values, iris_test_features.values, iris_test_targets.values)

Cell In [103], line 27, in train(training_set_features, training_set_targets, number_of_samples_for_backpropagation, val_set_features, val_set_targets, test_set_features, test_set_targets)
     25 current_epoch_number = 0
     26 while (current_epoch_number < number_of_epochs):
---> 27     weights = backpropagation(training_set_features, weights, learning_rate, biases_train, number_of_samples_for_backpropagation, training_set_targets)
     28     if (PLOT_GRAPHS == True):
     29         train_error, train_classification_error = evaluate_error(training_set_features, weights, training_set_targets, outputs_to_classes(training_set_targets), biases_train)

Cell In [102], line 36, in backpropagation(inputs, weights, learning_rate, biases, number_of_samples_for_backpropagation, target_outputs)
     33 print("np.kron(outputs.T, delta).shape:")
     34 print(np.kron(outputs.T, delta).shape)
---> 36 weights = weights + weights_delta
     38 return weights

ValueError: operands could not be broadcast together with shapes (5,3) (3,3)

As you can see, my weights_delta has a shape of (3,3), while my weights has a shape of (5, 3). weights has the correct shape, since input_count (number of features) is 4 and output_count is 3 (my output is a 3 element vector). The problem is that the shapes don't match and I get a ValueError.

The first dimension of the outputs matrix (and, subsequently, error_vector and delta vectors) depend on the size of the random sample. Maybe this shouldn't be the case?

score 0 · Answer 1 · answered Sep 26 '22 at 20:09

0

I think there's a mistake in the guide, the weights_delta should be calculated using the inputs and bias instead of the outputs. Something like

weights_delta = learning_rate * np.kron(np.concatenate([random_sample, biases], axis=1).T, delta)

answered Sep 26 '22 at 20:09

AndrzejO

1,502
1
9
12

Can you explain a bit more please? I found your solution works if I switch from `np.kron` to `np.matmul`, but could you explain the entirety of Step 12 from the perspective of backpropagation (what goes on in each step and why)? In particular, I find it hard to understand the role of `delta` and why does `weights_delta` have such formula. I looked at various explanations of backpropagation, but it's still unclear to me. – Pointer_to_void Sep 27 '22 at 12:41

What is going wrong in my backpropagation implementation?

1 Answers1