I quantized a basic TFLite regression model to int8 but the prediction output seems to be highly uncorrelated with the actual underlying model prior to quantizing it.
All the code and steps taken to train and quantize the model are seen below to make it easy to replicate the issue (just copy and paste it :).
I am working with the famous boston_housing dataset, which can be downloaded here https://www.kaggle.com/code/prasadperera/the-boston-housing-dataset/input
The steps I took are as follows:
- Trained the linear regression model without quantization (which worked fine)
- Created a new quantization aware model using the previous model 3)Converted the quantization-aware model to TFlite (setting the input and output tensor to int8)
- Converted the validation dataset to int8
- Used the quantized model to make predictions (predictions are significantly off and seem uncorrelated to the underlying model)
i
import pandas as pd
import numpy as np
from numpy import loadtxt
# load data
dataset = loadtxt('boston_housing.csv', delimiter=",")
# split into inputs and outputs
dataset_x = dataset[:, :-1]
dataset_y = dataset[:, -1]
dataset_x = np.float32(dataset_x )
dataset_y = np.float32(dataset_y )
from sklearn.model_selection import train_test_split
training_dataset_x, test_dataset_x, training_dataset_y, test_dataset_y = train_test_split(dataset_x, dataset_y, test_size = 0.20)
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
model = Sequential()
model.add(Dense(100, input_dim = 13, activation='relu'))
model.add(Dense(1,))
model.compile(optimizer='rmsprop', loss='mse', metrics=['mae'])
hist = model.fit(training_dataset_x, training_dataset_y, batch_size=32, epochs=500, validation_split=0.2)
#Quantize the model
import tensorflow_model_optimization as tfmot
quantize_model = tfmot.quantization.keras.quantize_model
q_aware_model = quantize_model(model)
# 'quantize_model' requires a recompile
q_aware_model.compile(optimizer='rmsprop',
loss='mse',
metrics=['mae'])
q_aware_model.summary()
history = q_aware_model.fit(training_dataset_x, training_dataset_y, batch_size=32, epochs=500, validation_split=0.2)
print(test_dataset_y[1])
print(q_aware_model.predict(test_dataset_x[1].reshape(1, -1)))
#Convert the model to TFLite
import tensorflow as tf
# Create a converter
converter = tf.lite.TFLiteConverter.from_keras_model(q_aware_model)
# Indicate that you want to perform default optimizations,
# which include quantization
converter.optimizations = [tf.lite.Optimize.DEFAULT]
# Define a generator function that provides your test data's numpy arrays
def representative_data_gen():
for i in range(10500):
yield [test_dataset_x[i:i+1]]
# Use the generator function to guide the quantization process
converter.representative_dataset = representative_data_gen
# Ensure that if any ops can't be quantized, the converter throws an error
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
# Set the input and output tensors to int8
converter.inference_input_type = tf.int8
converter.inference_output_type = tf.int8
# Convert the model
tflite_model = converter.convert()
# Save the model to disk
open("q_aware_model.tflite", "wb").write(tflite_model)
#Testing the quantized model
# Load the TFLite model and allocate tensors.
interpreter = tf.lite.Interpreter(model_path="q_aware_model.tflite")
interpreter.allocate_tensors()
# Get input and output tensors.
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
print(input_details)
print(output_details )
test_x1 = (test_dataset_x-128).astype(np.int8)
print(test_x1)
predictions=[]
for i in range(len(test_x1)):
test_values = np.expand_dims(test_x1[i].flatten(), axis=0)
# Set the value for the input tensor
interpreter.set_tensor(input_details[0]['index'], test_values)
# Run the inference
interpreter.invoke()
output = interpreter.get_tensor(output_details[0]['index'])
predictions.append(output)
print(predictions) ### prediction values = 50-127, expected values 0-40