I have recently converted a pre trained gpt2 model to tflite and trying to use an interpreter for generating text from the prompt.
Please find my code below which does the following:
- Converting the pre-trained model to tflite, works fine.
- Creating an interpreter from the saved tflite model, works fine.
- Using tokenizer, generate tokens and set tensors for input.
- Invoke through an interpreter.
- Extract output details
Now, how could we accurately convert the output to tokens which is input to tokenizer's decode method?
from transformers import AutoTokenizer, TFGPT2LMHeadModel, AutoConfig, TFAutoModelForTokenClassification
from transformers import GPT2TokenizerFast
hub_gpt_model = TFGPT2LMHeadModel.from_pretrained("gpt-2")
hub_gpt_tokenizer = AutoTokenizer.from_pretrained("gpt-2")
input_ids = tf.keras.layers.Input((1024, ), batch_size=1, dtype=tf.int32, name='input_ids')
attention_mask = tf.keras.layers.Input((1024, ), batch_size=1, dtype=tf.int32, name='attention_mask')
inputs = {"input_ids": input_ids, "attention_mask": attention_mask}
output = hub_gpt_model(inputs)
hub_gpt_model_x = tf.keras.models.Model(inputs=inputs, outputs=output)
converter = tf.lite.TFLiteConverter.from_keras_model(hub_gpt_model_x)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.inference_input_type = tf.float32
tflite_quant_model = converter.convert()
with open('models/botGPT_lite/botGPT2_cosine_lite.tflite', 'wb') as f:
f.write(tflite_quant_model)
interpreter = tf.lite.Interpreter(model_path='models/botGPT_lite/botGPT2_cosine_lite.tflite')
interpreter.allocate_tensors()
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
def pad_up_to(t, max_in_dims, constant_values):
s = tf.shape(t)
paddings = [ [ 0, m - s[i] ] for (i, m) in enumerate(max_in_dims) ]
return tf.pad(t, paddings, 'CONSTANT', constant_values=constant_values)
import numpy as np
sentance = 'try(errorhandler) opencsvtxt(csvtxt)'
review_token = hub_gpt_tokenizer.encode(sentance, return_tensors = 'tf')
padded = pad_up_to(review_token, [1, 1024], 0)
input_mask = tf.ones_like(padded)
input_type_ids = tf.zeros_like(padded)
print(padded)
interpreter.set_tensor(
input_details[0]['index'],
padded,
)
# input_mask
interpreter.set_tensor(input_details[1]['index'], input_mask)
# input ids
#interpreter.set_tensor(input_details[2]['index'],input_ids)
interpreter.invoke()
output_details = interpreter.get_output_details()[0]
tflite_model_predictions = interpreter.get_tensor(output_details['index'])
print("Prediction results shape:", tflite_model_predictions.shape)
# HOW TO GET TOKENS THAT CAN BE INPUT TO TOKENIZER.DECODE METHOD?
predictions = np.argmax(tflite_model_predictions[0], axis=2)