Computing gradients wrt model inputs in Tensorflow eager mode

Question

I am interested in calculating gradients wrt. the inputs of a keras model in Tensorflow. I understand that previously this can be done by building a graph and using tf.gradients. For example here. However I would like to achieve this while experimenting in eager mode (possibly using GradientTape). Specifically, if my network has two inputs (x, y), and predicts (u, v, p) calculate e.g., du/dx for use in the loss.

Snippit below, full code at this gist.

model = tf.keras.Sequential([
    tf.keras.layers.Dense(20, activation=tf.nn.relu, input_shape=(2,)),  # input shape required
    tf.keras.layers.Dense(20, activation=tf.nn.relu),
    tf.keras.layers.Dense(20, activation=tf.nn.relu),
    tf.keras.layers.Dense(20, activation=tf.nn.relu),
    tf.keras.layers.Dense(3)
])

def loss(model: tf.keras.Model, inputs, outputs):

    u_true, v_true = outputs[:, 0], outputs[:, 1]

    prediction = model(inputs)
    u_pred, v_pred = prediction[:, 0], prediction[:, 1]

    loss_value = tf.reduce_mean(tf.square(u_true - u_pred)) + \
                 tf.reduce_mean(tf.square(v_true - v_pred))

    return loss_value, u_pred, v_pred

def grad(model: tf.keras.Model, inputs, outputs):
    """
    :param inputs:  (batch_size, 2) -> x, y
    :param outputs: (batch_size, 3) -> vx, vy, p
    :return:
    """
    with tf.GradientTape() as tape:

        loss_value, u_pred, v_pred = loss(model, inputs, outputs)
        # AttributeError: 'DeferredTensor' object has no attribute '_id'
        print(tape.gradient(u_pred, model.input))

    grads = tape.gradient(loss_value, model.trainable_variables)

    return loss_value, grads

I've tried a few things, e.g. tape.gradient(u_pred, model.input) or tape.gradient(model.output, model.input) but these throw:

AttributeError: 'DeferredTensor' object has no attribute '_id'

Is there a way to achieve this within eager mode and if so how?

score 4 · Answer 1 · answered Mar 25 '19 at 20:03

Here is an example of retrieving the gradients of the predictions with respect to the inputs using eager execution

Basically, you need to use tape.watch(inputs) [I am using features in my example - whatever you want to call your x ... ] for Tensorflow to record the change in the model output (you can do the same with loss) with respect to the inputs... (and make sure to call your tape.gradient outside of the with tf.GradientTape() context)

Look at the get_gradients function below ...

Hope this helps!

model = tf.keras.Sequential([
  tf.keras.layers.Dense(10, activation=tf.nn.relu, input_shape=(len(numeric_headers),)),  # input shape required
  tf.keras.layers.Dense(10, activation=tf.nn.relu),
  tf.keras.layers.Dense(1, activation=tf.nn.sigmoid)
])


# model = MyModel()
loss_object = tf.keras.losses.BinaryCrossentropy()
optimizer = tf.keras.optimizers.Adam()

train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')

test_loss = tf.keras.metrics.Mean(name='test_loss')
test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='test_accuracy')

def get_gradients(model, features):
  with tf.GradientTape() as tape:
      tape.watch(features)
      predictions = model(features)
  gradients = tape.gradient(predictions, features)
  return gradients

def train_step(features, label):

  with tf.GradientTape() as tape:
    predictions = model(features)
    loss = loss_object(label, predictions)

  gradients = tape.gradient(loss, model.trainable_variables)
  optimizer.apply_gradients(zip(gradients, model.trainable_variables))

  train_loss(loss)
  train_accuracy(label, predictions)

def test_step(features, label):
  predictions = model(features)
  t_loss = loss_object(label, predictions)

  test_loss(t_loss)
  test_accuracy(label, predictions)

EPOCHS = 5
for epoch in range(EPOCHS):
  for features, labels in train_ds:
    train_step(features, labels)

  for features, labels in train_ds:
      test_step(features, labels)

  template = 'Epoch {}, Loss: {}, Accuracy: {}, Test Loss: {}, Test Accuracy: {}'
  print (template.format(epoch+1,
                           train_loss.result(), 
                           train_accuracy.result()*100,
                           test_loss.result(), 
                           test_accuracy.result()*100))

  if epoch == EPOCHS - 1:
    for features, labels in train_ds:
      print ('-')
      print (get_gradients(model, features))

So I guess instead of `tape.watch(model.inputs)`, which doesn't work, you have to use the actual input data: `tape.watch(input_data)` — Azmisov, Nov 19 '19 at 23:11

Computing gradients wrt model inputs in Tensorflow eager mode

1 Answers1