tflite + NNAPI stops working when starting processes inside threads

Question

I am working on a script that runs on a iMX8MP board equipped with NPU. The script acquires and processes images on the NPU using tflite_runtime with NNAPI delegate. Sometimes the script starts threads to perform other parallel tasks. I noticed that sometimes the threaded tasks cause tflite to stop working, producing always the same prediction regardless of the input. This seems to happen when the CPU usage goes to 100% even for a few seconds. Please check the following working example (I cannot share the model but it should work with any quantized tflite model):

import multiprocessing
from multiprocessing import Queue, Process
import numpy as np
from threading import Thread
from random import random, randint
import tflite_runtime.interpreter as tflite
import time
import cv2 
import os
import sys
import psutil

class ClassificationModel(object):
    def __init__(self, path, mask_path=None):
        self.interpreter = tflite.Interpreter(model_path=path)
        self.interpreter.allocate_tensors()
        self.input_details = self.interpreter.get_input_details()
        self.output_details = self.interpreter.get_output_details()
        self.input_shape = self.input_details[0]['shape']

    def predict(self, img, resize=True):

        if resize:
            img = cv2.resize(img, (self.input_shape[2], self.input_shape[1]))       

        img = (img/255.0).astype(np.float32)
        img = np.expand_dims(img, 0)
        self.interpreter.set_tensor(self.input_details[0]['index'], img)                
        self.interpreter.invoke()           
        output = self.interpreter.get_tensor(self.output_details[0]['index'])
        output = np.squeeze(output)
        
        return output



def my_thread_1():
    print("Start threaded task 1")
    simulate_cpu_load()

    print("Task 1 completed")


def worker():
    while True:
        pass

def simulate_cpu_load():
    num_cores = multiprocessing.cpu_count()
    processes = []

    for _ in range(num_cores):
        p = multiprocessing.Process(target=worker)
        p.start()
        processes.append(p)

    time.sleep(3) 

    for p in processes:
        p.terminate()


if __name__ == '__main__':
    classifier_1 = ClassificationModel('mymodel.tflite')
    
    
    cap = cv2.VideoCapture()
    for i in range(5):
        cap.open(i)
        if cap.isOpened():
            break

    if not cap.isOpened():
        print("Could not open camera")
        exit()


    try:
        while True:     
            # get image         
            ret, img = cap.read()

            # predict           
            p1 = classifier_1.predict(img)
            print(p1)           

            # threaded task (this breaks tflite inference)

            if random() < 0.1:
                t = Thread(target=my_thread_1)
                t.start()
            
    except(KeyboardInterrupt):
        exit()

This problem is not encountered on my PC running tflite on the CPU. Is there a way I can better investigate it?

UPDATE: after some tests, it seems that the problem occurs when the Thread starts a process, like in this example. I was able to replicate it also using subprocess.Popen(cmd).wait(), where cmd was a I/O intensive task, like mv dir_with_many_files dest

tflite + NNAPI stops working when starting processes inside threads

0 Answers0