I have trained a fully connected neural network with one hidden layer of 64 nodes. I am testing with the Medical Cost dataset. With the original precision model, the mean absolute error is 0.22063259780406952. With a model quantized to float16
or integer quantization with float fallback
, the difference between the original error and the low precision model's is never more than 0.1. However, if I do full integer quantization
, the error shoots to unreasonable amounts. In this particular case, it jumps to nearly 60. I have no idea if this is a bug in TensorFlow, or if I'm using the APIs incorrectly or if this is a reasonable behavior after quantization. Any help is appreciated. The code showing the conversion and inference is shown below:
- Preprocessing
import math
import pathlib
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import numpy as np
import pandas as pd
from sklearn import preprocessing as pr
from sklearn.metrics import mean_absolute_error
url = 'insurance.csv'
column_names = ["age", "sex", "bmi", "children", "smoker", "region", "charges"]
dataset = pd.read_csv(url, names=column_names, header=0, na_values='?')
dataset = dataset.dropna() # Drop rows with missing values
dataset['sex'] = dataset['sex'].map({'female': 2, 'male': 1})
dataset['smoker'] = dataset['smoker'].map({'yes': 1, 'no': 0})
dataset = pd.get_dummies(dataset, prefix='', prefix_sep='', columns=['region'])
# this is a trick to convert a dataframe to 2d array, scale it and
# convert back to dataframe
scaled_np = pr.StandardScaler().fit_transform(dataset.values)
dataset = pd.DataFrame(scaled_np, index=dataset.index, columns=dataset.columns)
- Train and Test split
train_dataset = dataset.sample(frac=0.8, random_state=0)
test_dataset = dataset.drop(train_dataset.index)
train_features = train_dataset.copy()
test_features = test_dataset.copy()
train_labels = train_features.pop('charges')
test_labels = test_features.pop('charges')
- Original model training
def build_and_compile_model():
model = keras.Sequential([
layers.Dense(64,
activation='relu',
input_shape=(len(dataset.columns) - 1, )),
layers.Dense(1)
])
model.compile(loss='mean_absolute_error',
optimizer=tf.keras.optimizers.Adam(0.001))
return model
dnn_model = build_and_compile_model()
dnn_model.summary()
dnn_model.fit(train_features,
train_labels,
validation_split=0.2,
verbose=0,
epochs=100)
print("Original error = {}".format(
dnn_model.evaluate(test_features, test_labels, verbose=0)))
- Conversion to lower precision model
converter = tf.lite.TFLiteConverter.from_keras_model(dnn_model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
def representative_data_gen():
for input_value in tf.data.Dataset.from_tensor_slices(
train_features.astype('float32')).batch(1).take(100):
yield [input_value]
converter.representative_dataset = representative_data_gen
# Full Integer Quantization
# Ensure that if any ops can't be quantized, the converter throws an error
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
# Set the input and output tensors to uint8 (APIs added in r2.3)
converter.inference_input_type = tf.uint8
converter.inference_output_type = tf.uint8
tflite_model_quant = converter.convert()
dir_save = pathlib.Path(".")
file_save = dir_save / "model_16.tflite"
file_save.write_bytes(tflite_model_quant)
- Instantiate the TFLite model
interpreter = tf.lite.Interpreter(model_path=str(file_save))
interpreter.allocate_tensors()
- Evaluate the lower precision model
def evaluate_model(interpreter, test_images, test_labels):
input_details = interpreter.get_input_details()[0]
input_index = interpreter.get_input_details()[0]["index"]
output_index = interpreter.get_output_details()[0]["index"]
# Run predictions on every image in the "test" dataset.
prediction_digits = []
for test_image in test_images:
if input_details['dtype'] == np.uint8:
input_scale, input_zero_point = input_details['quantization']
test_image = test_image / input_scale + input_zero_point
test_image = np.expand_dims(test_image,
axis=0).astype(input_details['dtype'])
interpreter.set_tensor(input_index, test_image)
# Run inference.
interpreter.invoke()
output = interpreter.get_tensor(output_index)
prediction_digits.append(output[0])
filtered_labels, correct_digits = map(
list,
zip(*[(x, y) for x, y in zip(test_labels, prediction_digits)
if not math.isnan(y)]))
return mean_absolute_error(filtered_labels, correct_digits)
print(evaluate_model(interpreter, test_features[:].values, test_labels))