0

I went over a basic example of tf2.0

containing very simple code

from __future__ import absolute_import, division, print_function, unicode_literals
import os

import tensorflow as tf

import cProfile

# Fetch and format the mnist data
(mnist_images, mnist_labels), _ = tf.keras.datasets.mnist.load_data()

dataset = tf.data.Dataset.from_tensor_slices(
  (tf.cast(mnist_images[...,tf.newaxis]/255, tf.float32),
   tf.cast(mnist_labels,tf.int64)))
dataset = dataset.shuffle(1000).batch(32)

# Build the model
mnist_model = tf.keras.Sequential([
  tf.keras.layers.Conv2D(16,[3,3], activation='relu',
                         input_shape=(None, None, 1)),
  tf.keras.layers.Conv2D(16,[3,3], activation='relu'),
  tf.keras.layers.GlobalAveragePooling2D(),
  tf.keras.layers.Dense(10)
])

for images,labels in dataset.take(1):
    print("Logits: ", mnist_model(images[0:1]).numpy())

optimizer = tf.keras.optimizers.Adam()
loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

loss_history = []


def train_step(model, images, labels):

    with tf.GradientTape() as tape:
        logits = model(images, training=True)

        # Add asserts to check the shape of the output.
        tf.debugging.assert_equal(logits.shape, (32, 10))

        loss_value = loss_object(labels, logits)

    loss_history.append(loss_value.numpy().mean())
    grads = tape.gradient(loss_value, model.trainable_variables)
    optimizer.apply_gradients(zip(grads, model.trainable_variables))


def train(epochs):
  for epoch in range(epochs):
    for (batch, (images, labels)) in enumerate(dataset):
      train_step(mnist_model, images, labels)
    print ('Epoch {} finished'.format(epoch))

I trained it and save trainable_variables before and after by the following


t0=mnist_model.trainable_variables  
train(epochs = 3)
t1=mnist_model.trainable_variables
diff = tf.reduce_mean(tf.abs(t0[0] - t1[0])) 
# whethere indexing [0] or [1] etc. gets the same outcome of diff
print(diff.numpy())

They are the same!!! So am I checking somethere incorrect? If that is the case, how can I observe those updated variables correctly?

Theron
  • 567
  • 1
  • 7
  • 21

1 Answers1

0

You aren't creating new arrays of variables, just 2 pointers on the same object Try to do so

t0 = np.array(mnist_model.trainable_variables)
  • 1
    It is the same. And yet, if I fetch them using optrimizer.get_weights() and have them compared before and after either each training step or epoch. I can see the change. Yet I still do not understand why I cannot use trainable_variables. – Theron Dec 24 '19 at 01:33