0

I am trying to manually implement a very simple RNN using tensorflow2. I modeled my code on the example to manually make models on tensorflow website. The code, stripped to bare essentials for this purpose, is

class ModelSimple(object):
    def __init__(self):
        # Initialize the weights to `5.0` and the bias to `0.0`
        # In practice, these should be initialized to random values (for example, with `tf.random.normal`)
        self.W = tf.Variable(tf.random.normal([]))
        self.b = tf.Variable(tf.random.normal([]))

    def __call__(self, x):
        return self.W * x + self.b

def loss(predicted_y, target_y):
    return tf.reduce_mean(tf.square(predicted_y - target_y))


NUM_EXAMPLES = 1000

inputs  = tf.random.normal(shape=[NUM_EXAMPLES])
outputs = tf.zeros(NUM_EXAMPLES)
model = ModelSimple()

with tf.GradientTape() as t:
    t.watch([model.W,model.b])
    current_loss = loss(model(inputs), outputs)
dW, db = t.gradient(current_loss, [model.W, model.b])
print(dW,db)

This gives nice tensors for dW and db. Then I try to do what I described above

class ModelRNN(object):
    def __init__(self, n_inputs, n_neurons):
        self.n_inputs = n_inputs
        self.n_neurons = n_neurons

        # weights for new input
        self.Wx = tf.Variable(tf.random.normal(shape=[self.n_inputs, self.n_neurons], dtype=tf.float32))

        # weights for previous output
        self.Wy = tf.Variable(tf.random.normal(shape=[self.n_neurons, self.n_neurons], dtype=tf.float32))

        # bias weights
        self.b = tf.Variable(tf.zeros([1, self.n_neurons], dtype=tf.float32))

    def __call__(self, X_batch):
        # get shape of input
        batch_size, num_time_steps, _ = X_batch.get_shape()

        # we will loop through the time steps and the output of the previous computation feeds into
        # the next one.
        # this variable keeps track of it and is initialized to zero
        y_last = tf.Variable(tf.zeros([batch_size, self.n_neurons], dtype=tf.float32))

        # the outputs will be stored in this tensor
        Ys = tf.Variable(tf.zeros([batch_size, num_time_steps, self.n_neurons], dtype=tf.float32))

        for t in range(num_time_steps):
            Xt = X_batch[:, t, :]
            yt = tf.tanh(tf.matmul(y_last, self.Wy) +
                         tf.matmul(Xt, self.Wx) +
                         self.b)
            y_last.assign(yt)
            Ys[:, t, :].assign(yt)

        return Ys




inputs = tf.convert_to_tensor(np.array([
        # t = 0      t = 1
        [[0, 1, 2], [9, 8, 7]], # instance 1
        [[3, 4, 5], [0, 0, 0]], # instance 2
        [[6, 7, 8], [6, 5, 4]], # instance 3
        [[9, 0, 1], [3, 2, 1]], # instance 4
    ],dtype=np.float32))
outputs=tf.Variable(tf.zeros((4,2,5),dtype=np.float32))

model = ModelRNN(3, 5)

with tf.GradientTape() as t:
    t.watch([model.Wx,model.Wy,model.b])
    current_loss = loss(model(inputs), outputs)

dWx,dWy,db = t.gradient(current_loss, [model.Wx, model.Wy,model.b])
print(dWx,dWy,db)

and it turns out dWx,dWy,db are all None. I have tried several things (including watching them using the GradientTape despite them being variables) and yet I keep getting None. What am I doing wrong?

Borun Chowdhury
  • 491
  • 2
  • 19

1 Answers1

1

It looks like this is related to this issue: Tensorflow cannot get gradient wrt a Variable, but can wrt a Tensor

Replacing assign with a python list and tf.stack results in a gradient being returned

    Ys = []
    for t in range(num_time_steps):
        Xt = X_batch[:, t, :]
        yt = tf.tanh(tf.matmul(y_last, self.Wy) +
                     tf.matmul(Xt, self.Wx) +
                     self.b)
        y_last.assign(yt)
        Ys.append(yt)

    return tf.stack(Ys,axis=1)
Colin Torney
  • 111
  • 3