When i train a simple NN architecture with cross entropy i get same loss when using the built-in keras crossentropy
loss and user defined crossentropy
while the gradients are different for the different implementation ...
Important to mention that this issue is comes up when my "target" is not categorical variable (As it happening in policy gradients). When i define the target to be 0 or 1 the gradient and losses are the same.
I made some public google colab notebook so you can see and run the example link
Also attaching the code and the results here. I run the the code twice for the built-in and the manual cross entropy. once with categorical target , in order to show that the results are the same. and once with non categorical target.
from tensorflow.keras.losses import BinaryCrossentropy
from tensorflow import constant ,GradientTape, math
import numpy as np
def get_loss_and_grads(function,
y_true,
y_pred):
with GradientTape() as gr:
gr.watch(y_pred)
loss = function(y_true = y_true,
y_pred = y_pred)
dy_dx = gr.gradient(loss, y_pred)
return loss, dy_dx
def udf_loss(y_true, y_pred):
loss = - math.log(y_pred) * y_true
return loss
# Define predicted and actual target variables
y_true = constant(np.array([1.]))
y_pred = constant(np.array([0.5]))
# Run for built in keras cross entropy
loss, dy_dx = get_loss_and_grads(function = BinaryCrossentropy(from_logits=False) ,
y_true = y_true,
y_pred = y_pred)
print(f"\n\n-- == built in cross entropy - y_true = {y_true} , y_pred = {y_pred} == --\n")
print("loss ", loss)
print("dy_dx ", dy_dx)
# Run for user defined cross entropy
loss, dy_dx = get_loss_and_grads(function = udf_loss ,
y_true = y_true,
y_pred = y_pred)
print(f"\n-- == user defined cross entropy - y_true = {y_true} , y_pred = {y_pred} == --\n")
print("loss ", loss)
print("dy_dx ", dy_dx)
# Define predicted and actual target variables
y_true = constant(np.array([5.]))
y_pred = constant(np.array([0.5]))
# Run for built in keras cross entropy
loss, dy_dx = get_loss_and_grads(function = BinaryCrossentropy(from_logits=False) ,
y_true = y_true,
y_pred = y_pred)
print(f"\n\n-- == built in cross entropy - y_true = {y_true} , y_pred = {y_pred} == --\n")
print("loss ", loss)
print("dy_dx ", dy_dx)
# Run for user defined cross entropy
loss, dy_dx = get_loss_and_grads(function = udf_loss ,
y_true = y_true,
y_pred = y_pred)
print(f"\n-- == user defined cross entropy - y_true = {y_true} , y_pred = {y_pred} == --\n")
print("loss ", loss)
print("dy_dx ", dy_dx)
-- == built in cross entropy - y_true = [1.] , y_pred = [0.5] == --
loss tf.Tensor(0.6931469805599654, shape=(), dtype=float64)
dy_dx tf.Tensor([-1.9999996], shape=(1,), dtype=float64)
-- == user defined cross entropy - y_true = [1.] , y_pred = [0.5] == --
loss tf.Tensor([0.69314718], shape=(1,), dtype=float64)
dy_dx tf.Tensor([-2.], shape=(1,), dtype=float64)
-- == built in cross entropy - y_true = [5.] , y_pred = [0.5] == --
loss tf.Tensor(0.6931469805599653, shape=(), dtype=float64)
dy_dx tf.Tensor([-17.9999964], shape=(1,), dtype=float64)
-- == user defined cross entropy - y_true = [5.] , y_pred = [0.5] == --
loss tf.Tensor([3.4657359], shape=(1,), dtype=float64)
dy_dx tf.Tensor([-10.], shape=(1,), dtype=float64)