I am doing this custom training of a neural network in Colab, with and without GPU, and the training process is faster using the CPU, which makes me think that I am not parallelising the operations or missing something. I do not think it is because of the model is small, because I tried more complicated models and the problem persists:
## Import libraries
import matplotlib
# matplotlib.use('TkAgg') # Required to make it run on both Windows and Mac
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
import numpy as np
import os
from tqdm import trange
# Switch of unnecessary TF warning messages
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
###############################################################################
################################## Parameters #################################
###############################################################################
gamma = tf.constant(2.0) # Curvature of the utility function
rho = tf.constant(0.04) # Discount rate
A = tf.constant(0.5) # TFP
alpha = tf.constant(0.36) # Returns to scale
delta = tf.constant(0.05) # Depreciation Rate of Capital
batchSize = 100 # Batch Size
number_epochs = 100000 # Number of epochs
kMin = 0.1 # lower bound of sample interval
kMax = 10.0 # upper bound of sample interval
gridSize = 10000 # Plotting grid
# Set global seed
tf.random.set_seed(1234)
np.random.seed(1234)
# Value function initial guess
initGuess = -60
# Neural network optimizer
optimizer = keras.optimizers.Adam()
###############################################################################
######################## Value Function Neural Network ########################
###############################################################################
def valueFnNeuralNet(nHidden = 3, nNeurons = 8):
model = keras.models.Sequential()
# Input layer
model.add(keras.layers.Dense(nNeurons, activation = "tanh", input_dim = 1))
# Hiden layers
for layer in range(nHidden - 1):
model.add(keras.layers.Dense(nNeurons, activation = "tanh"))
# Output layer
model.add(keras.layers.Dense(1,bias_initializer = keras.initializers.Constant(value = initGuess)))
return model
def HJB(input, V):
VPrime = tf.gradients(V(input), input)[0]
VPrimemax = tf.maximum(VPrime, 1E-7) # dV/dk
Y = A * tf.pow(input, alpha) # Output
C = tf.pow(VPrimemax, (-1/gamma)) # Consumption
I = Y - C # Investment
muK = I - delta * input # Capital drift
U = tf.pow(C, 1-gamma) / (1-gamma) # Utility
HJB = U - rho * V(input) + tf.multiply(tf.stop_gradient(VPrimemax), muK)
return HJB
def Objective(batchSize):
input = tf.random.uniform(shape = (batchSize,1), minval = kMin, maxval = kMax)
error = HJB(input, VF)
return tf.reduce_mean(tf.square(error))
###############################################################################
################################ Training Step ################################
###############################################################################
# Need decorator to run in graph mode instead of eager exectution
@tf.function
def training_step():
with tf.GradientTape() as tape:
loss = Objective(batchSize)
grads = tape.gradient(loss, theta)
optimizer.apply_gradients(zip(grads, theta))
return loss
###############################################################################
################################ Training Loop ################################
###############################################################################
def train_model(epochs):
losses = []
for epoch in trange(epochs):
loss = training_step()
losses.append(loss.numpy())
return losses
###############################################################################
################################### Running ###################################
###############################################################################
# Set up neural network
VF = valueFnNeuralNet()
# Define trainable network parameters
theta = VF.trainable_variables
# Run Model (and output loss evolution)
results = train_model(number_epochs)
The outputs that I get are the following:
withouth GPU: 100%|██████████| 100000/100000 [01:30<00:00, 1101.79it/s]
with GPU: 100%|██████████| 100000/100000 [03:36<00:00, 461.47it/s]