0

When i train a simple NN architecture with cross entropy i get same loss when using the built-in keras crossentropy loss and user defined crossentropy while the gradients are different for the different implementation ...

Important to mention that this issue is comes up when my "target" is not categorical variable (As it happening in policy gradients). When i define the target to be 0 or 1 the gradient and losses are the same.

I made some public google colab notebook so you can see and run the example link

Also attaching the code and the results here. I run the the code twice for the built-in and the manual cross entropy. once with categorical target , in order to show that the results are the same. and once with non categorical target.

from tensorflow.keras.losses import BinaryCrossentropy
from tensorflow import constant ,GradientTape, math
import numpy as np 

def get_loss_and_grads(function, 
                       y_true,
                       y_pred):
  
  with GradientTape() as gr:
    gr.watch(y_pred)

    loss = function(y_true = y_true,
                    y_pred = y_pred)
    
  dy_dx = gr.gradient(loss, y_pred)
  return loss, dy_dx

def udf_loss(y_true, y_pred):
  loss = - math.log(y_pred) * y_true
  return loss


# Define predicted and actual target variables 
y_true = constant(np.array([1.]))
y_pred = constant(np.array([0.5]))

# Run for built in keras cross entropy 
loss, dy_dx = get_loss_and_grads(function = BinaryCrossentropy(from_logits=False) , 
                                 y_true = y_true,
                                 y_pred = y_pred)

print(f"\n\n-- ==  built in cross entropy -  y_true = {y_true} , y_pred = {y_pred} == --\n")
print("loss ", loss)
print("dy_dx ", dy_dx)


# Run for user defined cross entropy 
loss, dy_dx = get_loss_and_grads(function = udf_loss , 
                                 y_true = y_true,
                                 y_pred = y_pred)

print(f"\n-- ==  user defined cross entropy - y_true = {y_true} , y_pred = {y_pred} == --\n")
print("loss ", loss)
print("dy_dx ", dy_dx)



# Define predicted and actual target variables 
y_true = constant(np.array([5.]))
y_pred = constant(np.array([0.5]))

# Run for built in keras cross entropy 
loss, dy_dx = get_loss_and_grads(function = BinaryCrossentropy(from_logits=False) , 
                                 y_true = y_true,
                                 y_pred = y_pred)

print(f"\n\n-- ==  built in cross entropy -  y_true = {y_true} , y_pred = {y_pred} == --\n")
print("loss ", loss)
print("dy_dx ", dy_dx)


# Run for user defined cross entropy 
loss, dy_dx = get_loss_and_grads(function = udf_loss , 
                                 y_true = y_true,
                                 y_pred = y_pred)

print(f"\n-- ==  user defined cross entropy - y_true = {y_true} , y_pred = {y_pred} == --\n")
print("loss ", loss)
print("dy_dx ", dy_dx)


-- ==  built in cross entropy -  y_true = [1.] , y_pred = [0.5] == --

loss  tf.Tensor(0.6931469805599654, shape=(), dtype=float64)
dy_dx  tf.Tensor([-1.9999996], shape=(1,), dtype=float64)

-- ==  user defined cross entropy - y_true = [1.] , y_pred = [0.5] == --

loss  tf.Tensor([0.69314718], shape=(1,), dtype=float64)
dy_dx  tf.Tensor([-2.], shape=(1,), dtype=float64)


-- ==  built in cross entropy -  y_true = [5.] , y_pred = [0.5] == --

loss  tf.Tensor(0.6931469805599653, shape=(), dtype=float64)
dy_dx  tf.Tensor([-17.9999964], shape=(1,), dtype=float64)

-- ==  user defined cross entropy - y_true = [5.] , y_pred = [0.5] == --

loss  tf.Tensor([3.4657359], shape=(1,), dtype=float64)
dy_dx  tf.Tensor([-10.], shape=(1,), dtype=float64)

0 Answers0