I've been experimenting with running inference in a pre-trained Keras model directly in the browser using tensorflowjs and I'm struggling to get the same kind of performance.
On investigation I noticed that there were small cumulative differences in output across all layers, which become fairly disastrous by the end.
The following simplified example should serve to illustrate. This was tested with tensorflow 2.1.0, tensorflowjs 1.7.2 and Chrome 80.0.3987.163.
First we build a simple deep model in tf.keras, initialise the weights to random values and convert this to tensorflowjs.
import tensorflowjs as tfjs
from tensorflow.keras.models import Model, save_model
from tensorflow.keras import layers as L
from tensorflow.keras import backend as K
from tensorflow.keras import initializers as I
K.clear_session()
def build_model(depth, size):
x = L.Input((size), name = 'input')
inputs = [x]
outputs = []
uniform_init = I.RandomUniform(minval=-1, maxval=1, seed=123)
for i in range(depth):
x = L.Dense(size,
dtype = 'float32',
kernel_initializer = uniform_init,
bias_initializer = uniform_init,
name = 'dense_' + str(i))(x)
outputs += [x]
model = Model(inputs=inputs, outputs=outputs)
return (model, inputs, outputs)
model, inputs, outputs = build_model(10, 50)
model.compile(optimizer='adam', loss='mean_squared_error')
model.summary()
save_model(model, './keras_model.h5')
tfjs.converters.save_keras_model(model, './keras_converted')
Output:
Model: "model"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input (InputLayer) [(None, 50)] 0
_________________________________________________________________
dense_0 (Dense) (None, 50) 2550
_________________________________________________________________
dense_1 (Dense) (None, 50) 2550
_________________________________________________________________
dense_2 (Dense) (None, 50) 2550
_________________________________________________________________
dense_3 (Dense) (None, 50) 2550
_________________________________________________________________
dense_4 (Dense) (None, 50) 2550
_________________________________________________________________
dense_5 (Dense) (None, 50) 2550
_________________________________________________________________
dense_6 (Dense) (None, 50) 2550
_________________________________________________________________
dense_7 (Dense) (None, 50) 2550
_________________________________________________________________
dense_8 (Dense) (None, 50) 2550
_________________________________________________________________
dense_9 (Dense) (None, 50) 2550
=================================================================
Total params: 25,500
Trainable params: 25,500
Non-trainable params: 0
Next we run the model against a unit vector in tensorflowjs and export the output from each layer as well as the weights to json.
async function run() {
const model = await tf.loadLayersModel('/keras_converted/model.json')
model.summary()
let input_size = model.inputLayers[0].batchInputShape[1]
let ones = [...Array(input_size)].map(x => 1)
let x = tf.tensor(ones).reshape([1,input_size])
let yhat = model.predict(x).map(x => x.arraySync())
let i = 0
let vals = {}
model.outputLayers.forEach(output => {
vals[output.name + '_output'] = yhat[i]
i += 1
})
model.layers.forEach(layer => {
vals[layer.name + '_weights'] = layer.getWeights().map(w => w.arraySync())
})
let a = document.createElement('a')
let file = new Blob([JSON.stringify(vals)], { type: 'text/plain' })
a.href = URL.createObjectURL(file)
a.download = 'keras_model_tfjs_output.json'
a.click()
}
run()
We run the model in Python as well against the same input vector and compare the outputs of the different layers as well as the weights just to rule out any changes in weights as a possible cause.
import re
import json
import numpy as np
from tensorflow.keras.models import load_model
model = load_model('./keras_model.h5')
# load tensorflow js output and weights
with open('./keras_model_tfjs_output.json', 'r') as json_file:
tfjs_vals = json.load(json_file)
# create similar dictionary from running inference in Python
keras_vals = dict()
# run the model on a single vector of ones
yhat = model.predict(np.ones((1,inputs[0].shape[1])))
for i, layer in enumerate(model.layers[1:]):
keras_vals[layer.name + '_weights'] = layer.get_weights()
keras_vals[layer.name + '_output'] = yhat[i]
# Compare values in keras_vals and tfjs_vals
def compare_vals(key, a, b):
for i in range(len(a)):
print('{0}[{1}]: different values = {2}, average difference = {3}'.\
format(key, i,
np.sum(a[i] != b[i]),
np.sum(np.abs(a[i] - b[i])) / np.size(a[i])
))
for key in dict.keys(keras_vals):
compare_vals(key, keras_vals[key], tfjs_vals[key])
Output:
dense_0_weights[0]: different values = 0, average difference = 0.0
dense_0_weights[1]: different values = 0, average difference = 0.0
dense_0_output[0]: different values = 0, average difference = 0.0
dense_1_weights[0]: different values = 0, average difference = 0.0
dense_1_weights[1]: different values = 0, average difference = 0.0
dense_1_output[0]: different values = 40, average difference = 1.7833709716796876e-06
dense_2_weights[0]: different values = 0, average difference = 0.0
dense_2_weights[1]: different values = 0, average difference = 0.0
dense_2_output[0]: different values = 43, average difference = 1.2047290802001953e-05
dense_3_weights[0]: different values = 0, average difference = 0.0
dense_3_weights[1]: different values = 0, average difference = 0.0
dense_3_output[0]: different values = 38, average difference = 5.3539276123046876e-05
dense_4_weights[0]: different values = 0, average difference = 0.0
dense_4_weights[1]: different values = 0, average difference = 0.0
dense_4_output[0]: different values = 43, average difference = 0.00024005889892578125
dense_5_weights[0]: different values = 0, average difference = 0.0
dense_5_weights[1]: different values = 0, average difference = 0.0
dense_5_output[0]: different values = 45, average difference = 0.0010586166381835937
dense_6_weights[0]: different values = 0, average difference = 0.0
dense_6_weights[1]: different values = 0, average difference = 0.0
dense_6_output[0]: different values = 46, average difference = 0.00515625
dense_7_weights[0]: different values = 0, average difference = 0.0
dense_7_weights[1]: different values = 0, average difference = 0.0
dense_7_output[0]: different values = 47, average difference = 0.0254345703125
dense_8_weights[0]: different values = 0, average difference = 0.0
dense_8_weights[1]: different values = 0, average difference = 0.0
dense_8_output[0]: different values = 46, average difference = 0.11546875
dense_9_weights[0]: different values = 0, average difference = 0.0
dense_9_weights[1]: different values = 0, average difference = 0.0
dense_9_output[0]: different values = 44, average difference = 0.42203125
The weights are identical after the roundtrip from keras to tensorflowjs and back again. However, the outputs of each layer have diverged quite significantly by the end.
Is there any way to ensure identical performance between Python and JS versions of a model? I've tried running this with both the WebGL and cpu backends and the same problem persists