I am trying to manually implement a very simple RNN using tensorflow2. I modeled my code on the example to manually make models on tensorflow website. The code, stripped to bare essentials for this purpose, is
class ModelSimple(object):
def __init__(self):
# Initialize the weights to `5.0` and the bias to `0.0`
# In practice, these should be initialized to random values (for example, with `tf.random.normal`)
self.W = tf.Variable(tf.random.normal([]))
self.b = tf.Variable(tf.random.normal([]))
def __call__(self, x):
return self.W * x + self.b
def loss(predicted_y, target_y):
return tf.reduce_mean(tf.square(predicted_y - target_y))
NUM_EXAMPLES = 1000
inputs = tf.random.normal(shape=[NUM_EXAMPLES])
outputs = tf.zeros(NUM_EXAMPLES)
model = ModelSimple()
with tf.GradientTape() as t:
t.watch([model.W,model.b])
current_loss = loss(model(inputs), outputs)
dW, db = t.gradient(current_loss, [model.W, model.b])
print(dW,db)
This gives nice tensors for dW and db. Then I try to do what I described above
class ModelRNN(object):
def __init__(self, n_inputs, n_neurons):
self.n_inputs = n_inputs
self.n_neurons = n_neurons
# weights for new input
self.Wx = tf.Variable(tf.random.normal(shape=[self.n_inputs, self.n_neurons], dtype=tf.float32))
# weights for previous output
self.Wy = tf.Variable(tf.random.normal(shape=[self.n_neurons, self.n_neurons], dtype=tf.float32))
# bias weights
self.b = tf.Variable(tf.zeros([1, self.n_neurons], dtype=tf.float32))
def __call__(self, X_batch):
# get shape of input
batch_size, num_time_steps, _ = X_batch.get_shape()
# we will loop through the time steps and the output of the previous computation feeds into
# the next one.
# this variable keeps track of it and is initialized to zero
y_last = tf.Variable(tf.zeros([batch_size, self.n_neurons], dtype=tf.float32))
# the outputs will be stored in this tensor
Ys = tf.Variable(tf.zeros([batch_size, num_time_steps, self.n_neurons], dtype=tf.float32))
for t in range(num_time_steps):
Xt = X_batch[:, t, :]
yt = tf.tanh(tf.matmul(y_last, self.Wy) +
tf.matmul(Xt, self.Wx) +
self.b)
y_last.assign(yt)
Ys[:, t, :].assign(yt)
return Ys
inputs = tf.convert_to_tensor(np.array([
# t = 0 t = 1
[[0, 1, 2], [9, 8, 7]], # instance 1
[[3, 4, 5], [0, 0, 0]], # instance 2
[[6, 7, 8], [6, 5, 4]], # instance 3
[[9, 0, 1], [3, 2, 1]], # instance 4
],dtype=np.float32))
outputs=tf.Variable(tf.zeros((4,2,5),dtype=np.float32))
model = ModelRNN(3, 5)
with tf.GradientTape() as t:
t.watch([model.Wx,model.Wy,model.b])
current_loss = loss(model(inputs), outputs)
dWx,dWy,db = t.gradient(current_loss, [model.Wx, model.Wy,model.b])
print(dWx,dWy,db)
and it turns out dWx,dWy,db are all None. I have tried several things (including watching them using the GradientTape despite them being variables) and yet I keep getting None. What am I doing wrong?