I'm following the 15 Steps to Implement a Neural Net guide. I'm stuck on Step 12, where backpropagation implementation is described.
Here's the (relevant) code I have written:
def feed_forward(inputs, weights, biases):
net = np.matmul(np.hstack((inputs, biases)), weights)
output = activation_function(net)
return (output, net)
def initialize_weights(width, height, max_weight):
return (np.random.random((width, height)) * np.random.randint(-max_weight, max_weight + 1, (width, height)))
def backpropagation(inputs, weights, learning_rate, biases, number_of_samples_for_backpropagation, target_outputs):
random_sample_index = np.random.randint(0, inputs.shape[0], size=number_of_samples_for_backpropagation)
random_sample = inputs[random_sample_index, :]
print("random_sample.shape:")
print(random_sample.shape)
target_outputs = target_outputs[random_sample_index, :]
print("target_outputs.shape:")
print(target_outputs.shape)
biases = biases[random_sample_index, :]
print("biases.shape:")
print(biases.shape)
outputs, net = feed_forward(random_sample, weights, biases)
print("weights.shape:")
print(weights.shape)
print("outputs.shape:")
print(outputs.shape)
print("net.shape:")
print(net.shape)
error_vector = target_outputs - outputs
print("error_vector.shape:")
print(error_vector.shape)
delta = np.multiply(error_vector, activation_function_derivative(net))
print("delta.shape:")
print(delta.shape)
weights_delta = learning_rate * np.kron(outputs.T, delta)
print("outputs.T.shape:")
print(outputs.T.shape)
print("np.kron(outputs.T, delta).shape:")
print(np.kron(outputs.T, delta).shape)
weights = weights + weights_delta
return weights
def train(training_set_features, training_set_targets, number_of_samples_for_backpropagation, val_set_features, val_set_targets, test_set_features, test_set_targets):
number_of_columns_train_features = training_set_features.shape[1]
number_of_columns_train_targets = training_set_targets.shape[1]
max_weight = 0.5
weights = initialize_weights(number_of_columns_train_features + 1, number_of_columns_train_targets, max_weight)
number_of_rows_train = training_set_features.shape[0]
biases_train = np.ones(shape=(number_of_rows_train, 1))
number_of_rows_val = val_set_features.shape[0]
biases_val = np.ones(shape=(number_of_rows_val, 1))
number_of_rows_test = test_set_features.shape[0]
biases_test = np.ones(shape=(number_of_rows_test, 1))
# training parameters
number_of_epochs = 500
learning_rate = 0.1
train_errors = []
train_classification_errors = []
val_errors = []
val_classification_errors = []
test_errors = []
test_classification_errors = []
current_epoch_number = 0
while (current_epoch_number < number_of_epochs):
weights = backpropagation(training_set_features, weights, learning_rate, biases_train, number_of_samples_for_backpropagation, training_set_targets)
if (PLOT_GRAPHS == True):
train_error, train_classification_error = evaluate_error(training_set_features, weights, training_set_targets, outputs_to_classes(training_set_targets), biases_train)
val_error, val_classification_error = evaluate_error(val_set_features, weights, val_set_targets, outputs_to_classes(val_set_targets), biases_val)
test_error, test_classification_error = evaluate_error(test_set_features, weights, test_set_targets, outputs_to_classes(test_set_targets), biases_test)
train_errors.append(train_error)
train_classification_errors.append(train_classification_error)
val_errors.append(val_error)
val_classification_errors.append(val_classification_error)
test_errors.append(test_error)
test_classification_errors.append(test_classification_error)
current_epoch_number = current_epoch_number + 1
if (PLOT_GRAPHS == True):
plt.plot(train_errors, label="Train errors")
plt.plot(train_classification_errors, label="Train classification errors")
plt.plot(val_errors, label="Validation errors")
plt.plot(val_classification_errors, label="Validation classification errors")
plt.plot(test_errors, label="Test errors")
plt.plot(test_classification_errors, label="Test classification errors")
plt.legend(loc="upper left")
train_error, train_classification_error = evaluate_error(training_set_features, weights, training_set_targets, outputs_to_classes(training_set_targets), biases_train)
val_error, val_classification_error = evaluate_error(val_set_features, weights, val_set_targets, outputs_to_classes(val_set_targets), biases_val)
test_error, test_classification_error = evaluate_error(test_set_features, weights, test_set_targets, outputs_to_classes(test_set_targets), biases_test)
# debug prints
print("train_errors:")
print(train_errors)
print("train_classification_errors:")
print(train_classification_errors)
print("val_errors:")
print(val_errors)
print("val_classification_errors:")
print(val_classification_errors)
print("test_errors:")
print(test_errors)
print("test_classification_errors:")
print(test_classification_errors)
return (weights, train_error, train_classification_error, val_error, val_classification_error, test_error, test_classification_error)
NUMBER_OF_SAMPLES_FOR_BACKPROPAGATION = 1
train(iris_train_features.values, iris_train_targets.values, NUMBER_OF_SAMPLES_FOR_BACKPROPAGATION, iris_val_features.values, iris_val_targets.values, iris_test_features.values, iris_test_targets.values)
Here's the output I get:
random_sample.shape:
(1, 4)
target_outputs.shape:
(1, 3)
biases.shape:
(1, 1)
weights.shape:
(5, 3)
outputs.shape:
(1, 3)
net.shape:
(1, 3)
error_vector.shape:
(1, 3)
delta.shape:
(1, 3)
outputs.T.shape:
(3, 1)
np.kron(outputs.T, delta).shape:
(3, 3)
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
Cell In [105], line 1
----> 1 train(iris_train_features.values, iris_train_targets.values, NUMBER_OF_SAMPLES_FOR_BACKPROPAGATION, iris_val_features.values, iris_val_targets.values, iris_test_features.values, iris_test_targets.values)
Cell In [103], line 27, in train(training_set_features, training_set_targets, number_of_samples_for_backpropagation, val_set_features, val_set_targets, test_set_features, test_set_targets)
25 current_epoch_number = 0
26 while (current_epoch_number < number_of_epochs):
---> 27 weights = backpropagation(training_set_features, weights, learning_rate, biases_train, number_of_samples_for_backpropagation, training_set_targets)
28 if (PLOT_GRAPHS == True):
29 train_error, train_classification_error = evaluate_error(training_set_features, weights, training_set_targets, outputs_to_classes(training_set_targets), biases_train)
Cell In [102], line 36, in backpropagation(inputs, weights, learning_rate, biases, number_of_samples_for_backpropagation, target_outputs)
33 print("np.kron(outputs.T, delta).shape:")
34 print(np.kron(outputs.T, delta).shape)
---> 36 weights = weights + weights_delta
38 return weights
ValueError: operands could not be broadcast together with shapes (5,3) (3,3)
As you can see, my weights_delta
has a shape of (3,3)
, while my weights
has a shape of (5, 3)
. weights has the correct shape, since input_count
(number of features) is 4 and output_count
is 3 (my output is a 3 element vector). The problem is that the shapes don't match and I get a ValueError
.
The first dimension of the outputs
matrix (and, subsequently, error_vector
and delta
vectors) depend on the size of the random sample. Maybe this shouldn't be the case?