I have a processpoolexecutor into which I submit multiple disk read/write calls. I want to create a threadpool inside every process for performance benefits.
below is my attempt to override and modify _process_worker method of concurrent.futures process.py to use with ProcessPoolExecutor. I am trying to run the function in a ThreadPoolExecutor inside -
from concurrent.futures import process as process_futures
class ProcessPoolExecutor(process_futures.ProcessPoolExecutor):
"""Override process creation to use our processes"""
def _adjust_process_count(self):
"""This is copy-pasted from concurrent.futures to override the Process class"""
for _ in range(len(self._processes), self._max_workers):
p = Process(
target=_process_worker,
args=(self._call_queue, self._result_queue, None, None))
p.start()
self._processes[p.pid] = p
def _process_worker(call_queue, result_queue):
with ThreadPoolExecutor(max_workers=8) as executor: # starting a Threadpool
while True:
call_item = call_queue.get(block=True)
if call_item is None:
# Wake up queue management thread
result_queue.put(os.getpid())
return
try:
if 1: # my changes , problem with this code
future = executor.submit(call_item.fn, *call_item.args, **call_item.kwargs)
future.add_done_callback(
functools.partial(_return_result, call_item, result_queue))
else: # original code with only processpool as in futures process.py
r = call_item.fn(*call_item.args, **call_item.kwargs)
except BaseException as e:
result_queue.put(process_futures._ResultItem(call_item.work_id,
exception=e))
else:
result_queue.put(process_futures._ResultItem(call_item.work_id,
result=r))
when I add a threadpoolexecutor inside processpoolexecutor , i get below error
RuntimeError: There is no current event loop in thread '<threadedprocess._ThreadPoolExecutor object at 0x000001C5897B1FA0>_0'.
I understand that eventloop are not created on child threads, so its complaining of no current event loop. and so, even if i add new event loop -
def _process_worker(call_queue, result_queue, a, b):
try:
import asyncio
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
except Exception as e:
logger.info("eexception {} ".format(e))
with ThreadPoolExecutor(max_workers=8) as executor:
while True:
call_item = call_queue.get(block=True)
if call_item is None:
# Wake up queue management thread
result_queue.put(os.getpid())
return
try:
if 1: # my changes , problem with this code
job_func = functools.partial(call_item.fn, *call_item.args, **call_item.kwargs)
try:
loop.run_in_executor(executor, job_func)
except Exception as e:
logger.info("exception recvd {}".format(e))
else: # original code with only processpool as in futures process.py
r = call_item.fn(*call_item.args, **call_item.kwargs)
except BaseException as e:
result_queue.put(process_futures._ResultItem(call_item.work_id,
exception=e))
else:
result_queue.put(process_futures._ResultItem(call_item.work_id,
result=r))
I get a new error -
concurrent.futures.process.BrokenProcessPool: A process in the process pool was terminated abruptly while the future was running or pending.
how can i change _process_worker to run the work in a threadpool ? Any suggestions please.