I converted the keras model to tflite. I am converting model like this
from keras import backend as K
from keras.models import load_model
from keras.engine.base_layer import Layer
import tensorflow as tf
# This line must be executed before loading Keras model.
K.set_learning_phase(0)
# custom layer
class Mish(Layer):
'''
Mish Activation Function.
.. math::
mish(x) = x * tanh(softplus(x)) = x * tanh(ln(1 + e^{x}))
Shape:
- Input: Arbitrary. Use the keyword argument `input_shape`
(tuple of integers, does not include the samples axis)
when using this layer as the first layer in a model.
- Output: Same shape as the input.
Examples:
>>> X_input = Input(input_shape)
>>> X = Mish()(X_input)
'''
def __init__(self, **kwargs):
super(Mish, self).__init__(**kwargs)
self.supports_masking = True
def call(self, inputs):
# return inputs * K.tanh(K.softplus(inputs))
# return inputs * tf.tanh(tf.log(1 + tf.exp(inputs)))
return inputs * K.tanh(K.log(1 + K.exp(inputs)))
def get_config(self):
config = super(Mish, self).get_config()
return config
def compute_output_shape(self, input_shape):
return input_shape
model = load_model('./keras_model/yolo4.h5', custom_objects={"Mish":Mish})
def freeze_session(session, keep_var_names=None, output_names=None, clear_devices=True):
from tensorflow.python.framework.graph_util import convert_variables_to_constants
graph = session.graph
with graph.as_default():
freeze_var_names = list(set(v.op.name for v in tf.global_variables()).difference(keep_var_names or []))
output_names = output_names or []
output_names += [v.op.name for v in tf.global_variables()]
# Graph -> GraphDef ProtoBuf
input_graph_def = graph.as_graph_def()
if clear_devices:
for node in input_graph_def.node:
node.device = ""
frozen_graph = convert_variables_to_constants(session, input_graph_def,
output_names, freeze_var_names)
return frozen_graph
frozen_graph = freeze_session(K.get_session(),
output_names=[out.op.name for out in model.outputs])
tf.train.write_graph(frozen_graph, "frozen", "tf_model_l0.pb", as_text=False)
converter = tf.lite.TFLiteConverter.from_frozen_graph('frozen/tf_model_l0.pb',
input_arrays=['input_1'],
output_arrays=["conv2d_110/BiasAdd","conv2d_102/BiasAdd","conv2d_94/BiasAdd"]
)
tfmodel = converter.convert()
open ("model5.tflite" , "wb").write(tfmodel)
Above one is conversion script. At the inference time I am taking care of same preprocessing which one used in keras inference. So this is tflite inference code
# load tflite model
babyNet_lite = tf.lite.Interpreter(model_path=model_path)
# allocate tensors
babyNet_lite.allocate_tensors()
input_details = babyNet_lite.get_input_details()
output_details = babyNet_lite.get_output_details()
# image reading
img = cv2.imread("test.jpg")
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img = cv2.resize(img, (416, 416))
img = img.astype(np.float32) / 255.
img = np.expand_dims(img, axis=0)
babynet.set_tensor(input_details[0]['index'], img)
# run the inference
babynet.invoke()
# output data
outs = []
outs.append(babynet.get_tensor(output_details[0]['index']))
outs.append(babynet.get_tensor(output_details[1]['index']))
outs.append(babynet.get_tensor(output_details[2]['index']))
I am getting the accurate results with tflite. But it is taking very long time to process 1 frame. In keras model inference time is 1.0110 second per frame. But now in tflite inference it is 7.560 second per frame.
After that I quantized the model to float16 with this code.
# float16
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.target_spec.supported_types = [tf.float16]
tflite_fp16_model = converter.convert()
tflite_model_fp16_file = "model_quant_f16.tflite"
open (tflite_model_fp16_file , "wb").write(tflite_fp16_model)
Then I checked the inference time. Now it is showing around 2.100 second per frame. Model size is reduced from 256 mb to 128 mb. Accuracy is also same. But still inference time is more than keras model inference. Where I did mistake?
Not understanding where I did mistake. My keras model inference is 1 second per frame but same converted tflite model inference is 2 second per second. I am using CPU system only. Tensorflow version is 1.15.2. Keras version is 2.3.1. Not gaining any performance speed at inference time after converting to tflite.