Good Evening,
I want to implement a toy example for a simple regression problem with tf2 and the Gradient Tape function. With Model.fit it learns properly but the same with the GradientTape does something but the loss doesn't move compared to model.fit(). Here my example codes and the results. I can't find the problem.
model_opt = tf.keras.optimizers.Adam()
loss_fn = tf.keras.losses.MeanSquaredError()
with tf.GradientTape() as tape:
y = model(X, training=True)
loss_value = loss_fn(y_true, y)
grads = tape.gradient(loss_value, model.trainable_variables)
model_opt.apply_gradients(zip(grads, model.trainable_variables))
#Results:
42.47433806265809
42.63973672226078
36.687397360178586
38.744844324717526
36.59080452300609
...
Here the Regular case with model.fit()
model.compile(optimizer=tf.keras.optimizers.Adam(),loss=tf.keras.losses.MSE,metrics="mse")
...
model.fit(X,y_true,verbose=0)
#Results
[40.97759069299212]
[28.04145720307729]
[17.643483147375473]
[7.575242056454791]
[5.83682193867299]
The accuracy should be roughly the same but it looks like it doesn't learn at all. The input X is a tensor and y_true as well.
Edit for testing
import pathlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
dataset_path = keras.utils.get_file("auto-mpg.data", "http://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data")
column_names = ['MPG','Cylinders','Displacement','Horsepower','Weight',
'Acceleration', 'Model Year', 'Origin']
dataset = pd.read_csv(dataset_path, names=column_names,
na_values = "?", comment='\t',
sep=" ", skipinitialspace=True)
dataset = dataset.dropna()
dataset['Origin'] = dataset['Origin'].map({1: 'USA', 2: 'Europe', 3: 'Japan'})
dataset = pd.get_dummies(dataset, prefix='', prefix_sep='')
train_dataset = dataset.sample(frac=0.8,random_state=0)
test_dataset = dataset.drop(train_dataset.index)
train_stats = train_dataset.describe()
train_stats.pop("MPG")
train_stats = train_stats.transpose()
train_labels = train_dataset.pop('MPG')
test_labels = test_dataset.pop('MPG')
def norm(x):
return (x - train_stats['mean']) / train_stats['std']
normed_train_data = norm(train_dataset)
normed_test_data = norm(test_dataset)
def build_model_fit():
model = keras.Sequential([
layers.Dense(64, activation='relu', input_shape=[len(train_dataset.keys())]),
layers.Dense(64, activation='relu'),
layers.Dense(1)])
optimizer = tf.keras.optimizers.RMSprop(0.001)
model.compile(loss='mse',optimizer=optimizer)
return model
def build_model_tape():
model = keras.Sequential([
layers.Dense(64, activation='relu', input_shape=[len(train_dataset.keys())]),
layers.Dense(64, activation='relu'),
layers.Dense(1)])
opt = tf.keras.optimizers.RMSprop(0.001)
return model, opt
model_f = build_model_fit()
model_g, opt_g = build_model_tape()
EPOCHS = 20
#Model.fit() - Test
history = model_f.fit(normed_train_data, train_labels, epochs=EPOCHS, verbose=2)
X = tf.convert_to_tensor(normed_train_data.to_numpy())
y_true = tf.convert_to_tensor(train_labels.to_numpy())
#GradientTape - Test
loss_fn = tf.keras.losses.MeanSquaredError()
for i in range(0,EPOCHS):
with tf.GradientTape() as tape:
y = model_g(X, training=True)
loss_value = loss_fn(y_true, y)
grads = tape.gradient(loss_value, model_g.trainable_variables)
opt_g.apply_gradients(zip(grads, model_g.trainable_variables))
print(loss_value)