Suppose that I have 4 Process and 2 GPUs. Running the following code, it works while n = 1
. If I set n = 2
, it would be blocked at the second loop.
import onnxruntime as ort
from multiprocessing import Process, Queue
class Onnx:
def __init__(self, model_path, device_id):
providers = [('CUDAExecutionProvider', {'device_id': device_id}), 'CPUExecutionProvider']
options.intra_op_num_threads = 1
options.inter_op_num_threads = 1
self.session = ort.InferenceSession(model_path, options, providers=providers)
def forward(self, **kwargs):
return self.session.run(None, **kwargs)
def impl(model_path, ctx, num_gpus):
device_id = ctx % num_gpus
model = Onnx(model_path, device_id)
# DO SOMETHING AND FORWARD ONNX MODEL
def main():
num_workers = 4
num_gpus = 2
n = 2
for i in range(n):
print("START LOOP {}".format(i))
cs = [Process(target=impl, args=(model_path, ctx, num_gpus)) for ctx in range(num_workers)]
[c.start() for c in cs]
[c.join() for c in cs]
print("END LOOP {}".format(i))
if __name__ == "__main__":
main()
Could someone tell me what happened and give some suggestions?
I have tried to init ort session outer subprocess function and pickle it as an arg, following https://github.com/microsoft/onnxruntime/issues/7846#issuecomment-850217402.
Unfortunately, it is also blocked at the second loop.