I am working on a script that runs on a iMX8MP board equipped with NPU. The script acquires and processes images on the NPU using tflite_runtime with NNAPI delegate. Sometimes the script starts threads to perform other parallel tasks. I noticed that sometimes the threaded tasks cause tflite to stop working, producing always the same prediction regardless of the input. This seems to happen when the CPU usage goes to 100% even for a few seconds. Please check the following working example (I cannot share the model but it should work with any quantized tflite model):
import multiprocessing
from multiprocessing import Queue, Process
import numpy as np
from threading import Thread
from random import random, randint
import tflite_runtime.interpreter as tflite
import time
import cv2
import os
import sys
import psutil
class ClassificationModel(object):
def __init__(self, path, mask_path=None):
self.interpreter = tflite.Interpreter(model_path=path)
self.interpreter.allocate_tensors()
self.input_details = self.interpreter.get_input_details()
self.output_details = self.interpreter.get_output_details()
self.input_shape = self.input_details[0]['shape']
def predict(self, img, resize=True):
if resize:
img = cv2.resize(img, (self.input_shape[2], self.input_shape[1]))
img = (img/255.0).astype(np.float32)
img = np.expand_dims(img, 0)
self.interpreter.set_tensor(self.input_details[0]['index'], img)
self.interpreter.invoke()
output = self.interpreter.get_tensor(self.output_details[0]['index'])
output = np.squeeze(output)
return output
def my_thread_1():
print("Start threaded task 1")
simulate_cpu_load()
print("Task 1 completed")
def worker():
while True:
pass
def simulate_cpu_load():
num_cores = multiprocessing.cpu_count()
processes = []
for _ in range(num_cores):
p = multiprocessing.Process(target=worker)
p.start()
processes.append(p)
time.sleep(3)
for p in processes:
p.terminate()
if __name__ == '__main__':
classifier_1 = ClassificationModel('mymodel.tflite')
cap = cv2.VideoCapture()
for i in range(5):
cap.open(i)
if cap.isOpened():
break
if not cap.isOpened():
print("Could not open camera")
exit()
try:
while True:
# get image
ret, img = cap.read()
# predict
p1 = classifier_1.predict(img)
print(p1)
# threaded task (this breaks tflite inference)
if random() < 0.1:
t = Thread(target=my_thread_1)
t.start()
except(KeyboardInterrupt):
exit()
This problem is not encountered on my PC running tflite on the CPU. Is there a way I can better investigate it?
UPDATE: after some tests, it seems that the problem occurs when the Thread starts a process, like in this example. I was able to replicate it also using subprocess.Popen(cmd).wait()
, where cmd
was a I/O intensive task, like mv dir_with_many_files dest