I'm working on a reinforcement learning model implemented with Keras and Tensorflow. I have to do frequent calls to model.predict() on single inputs.
While testing inference on a simple pretrained model, I noticed that using Keras' model.predict is WAY slower than just using Numpy on stored weights. Why is it that slow and how can I accelerate it? Using pure Numpy is not viable for complex models.
import timeit
import numpy as np
from tensorflow.python.keras.models import Sequential
from tensorflow.python.keras.layers import Dense
w = np.array([[-1., 1., 0., 0.], [0., 0., -1., 1.]]).T
b = np.array([ 15., -15., -21., 21.])
model = Sequential()
model.add(Dense(4, input_dim=2, activation='linear'))
model.layers[0].set_weights([w.T, b])
model.compile(loss='mse', optimizer='adam')
state = np.array([-23.5, 17.8])
def predict_very_slow():
return model.predict(state[np.newaxis])[0]
def predict_slow():
ws = model.layers[0].get_weights()
return np.matmul(ws[0].T, state) + ws[1]
def predict_fast():
return np.matmul(w, state) + b
print(
timeit.timeit(predict_very_slow, number=10000),
timeit.timeit(predict_slow, number=10000),
timeit.timeit(predict_fast, number=10000)
)
# 5.168972805004538 1.6963867129435828 0.021918574168087623
# 5.461319456664639 1.5491559107269515 0.021502970783442876