I am trying to use tf.GradientTape loosely based on the example in https://www.tensorflow.org/beta/tutorials/eager/custom_training_walkthrough, and have the need to create a custom loss function, where each prediction gets a weighted loss value, depending on the outcome.
This is a three class classification problem, where the loss function takes the features 'x' (130), the labels 'y' (0, 1 or 2), and the 'weights' (one weight for each label) which depend on whether the prediction match the label or not. Here is my code:
def TF_learning(training_data,training_results,testing_data):
odds = [i[-2:] for i in training_data]
training_data = tf.keras.utils.normalize(training_data, axis=1)
testing_data = tf.keras.utils.normalize(testing_data, axis=1)
minutes = int((len(training_data[0]) - 10) / 2)
dense_layers = 1
neurons = 32
epochs = 70
NAME = "{}-nodes-{}-dense".format(neurons, dense_layers)
tensorboard = TensorBoard(log_dir='logs/{}'.format(NAME))
#print(NAME)
model = tf.keras.models.Sequential()
model.add(tf.keras.layers.Flatten())
for i_layer in range(0,dense_layers):
#model.add(tf.keras.layers.batch_normalization(training_data))
model.add(tf.keras.layers.Dense(neurons, activation=tf.nn.relu))
model.add(tf.keras.layers.Dropout(0.2))
model.add(tf.keras.layers.Dense(neurons/2., activation=tf.nn.relu))
model.add(tf.keras.layers.Dropout(0.1))
model.add(tf.keras.layers.Dense(3, activation=tf.nn.softmax))
@tf.function
def loss(model, x, y, weights):
x = model(x)
x_range = tf.range(x.shape.as_list()[-1], dtype=x.dtype)
y_ = tf.reduce_sum(tf.nn.softmax(x*1e10) * x_range, axis=-1)
y_ = tf.cast(y_, dtype=tf.int32)
y_ = tf.one_hot(y_, depth=3)
y = tf.cast(y, tf.int64)
y = tf.one_hot(y, depth=3)
correct = tf.multiply(y_, y)
wrong = tf.add(tf.multiply(y[:,0], y_[:,2]), tf.multiply(y[:,2], y_[:,0]))
indices = tf.cast(tf.stack([tf.range(tf.shape(weights)[0], dtype=tf.int32), tf.ones(tf.shape(weights)[0], dtype=tf.int32)], axis=1), dtype=tf.int32)
scatter = tf.tensor_scatter_nd_update(correct, indices, wrong)
scatter = tf.cast(scatter, dtype=tf.float64)
loss_array = tf.multiply(scatter, weights)
loss = tf.reduce_sum(loss_array)
return loss
@tf.function
def grad(model, inputs, targets, weights):
with tf.GradientTape(persistent=True, watch_accessed_variables=False) as tape:
loss_value = loss(model, training_data, training_results, weights)
print(tape.gradient(loss_value, model.trainable_variables))
return loss_value, tape.gradient(loss_value, model.trainable_variables) # Virker ikke, model.variables er tom
weights = - tf.Variable(np.insert(odds, 1, values=0, axis=1), dtype=tf.float64) + 1
l = loss(model, training_data, training_results, weights)
print("Loss test: {}".format(l))
optimizer = tf.keras.optimizers.Adam(lr=0.1, decay=1e-5)
loss_value, grads = grad(model, training_data, training_results, weights)
print("Step: {}, Initial Loss: {}".format(optimizer.iterations.numpy(),
loss_value.numpy()))
optimizer.apply_gradients(zip(grads, model.trainable_variables))
print("Step: {}, Loss: {}".format(optimizer.iterations.numpy(),
loss(model, training_data, training_results).numpy()))
How do i make something like this in Tensorflow? I just need a loss that is weighted depending on whether the prediction is correct or not. I guess that the gradient can't be calculated, because when it takes a small step, the number will still be converted to the same integer. I get the following error.
Loss test: 7.040000000000001
WARNING: Logging before flag parsing goes to stderr.
W0711 18:04:30.068719 9868 backprop.py:935] Calling GradientTape.gradient on a persistent tape inside it's context is significantly less efficient than calling it outside the context (it causes the gradient ops to be recorded on the tape, leading to increased CPU and memory usage). Only call GradientTape.gradient inside the context if you actually want to trace the gradient in order to compute higher order derrivatives.
[None, None, None, None, None, None]
Step: 0, Initial Loss: 7.040000000000001
Traceback (most recent call last):
File "ML_test.py", line 322, in <module>
predictions = TF_learning(training_data=X_train,training_results=Y_train,testing_data=X_test)
File "C:\Code\ATP\Ad_hoc_opgaver\Test\ML_tests\machine_learning_tf2.py", line 157, in TF_learning
optimizer.apply_gradients(zip(grads, model.trainable_variables))
File "C:\Code\lib\site-packages\tensorflow\python\keras\optimizer_v2\optimizer_v2.py", line 396, in apply_gradients
grads_and_vars = _filter_grads(grads_and_vars)
File "C:\Code\lib\site-packages\tensorflow\python\keras\optimizer_v2\optimizer_v2.py", line 924, in _filter_grads
([v.name for _, v in grads_and_vars],))
ValueError: No gradients provided for any variable: ['sequential/dense/kernel:0', 'sequential/dense/bias:0', 'sequential/dense_1/kernel:0', 'sequential/dense_1/bias:0', 'sequential/dense_2/kernel:0', 'sequential/dense_2/bias:0'].
Is there any way to make this work? maybe with an optimizer that doesn't use gradient decent, but random sampling? Or one that takes a big enough step, to get a gradient?