I have been trying to use multiprocessing module from python to achieve parallism.
I'm able to execute my code, it run in parallel but after some time just only one process finishes its task and the others exit without finishing I know there is join()method that wait all tasks to finish but not work properly. I have been reading multiprocessing's manual page and forums to find out why it isn't working and i haven't figured it out yet.
I think that the problem may be related with some other thing like database or python version my python version 3.10 and i have 8 cores CPU , any help
Here is my code:
I have 5 process (20000 items split to batch of 4000)
def process_batch(self, batch_index, batched_payloads):
payloads = self.import_file.get_payloads_for_import()
imported_rows = []
total_payload_count = len(payloads)
batch_size = frappe.conf.data_import_batch_size or 4000
for i, payload in enumerate(batched_payloads):
doc = payload.doc
row_indexes = [row.row_number for row in payload.rows]
current_index = (i + 1) + (batch_index * batch_size)
if set(row_indexes).intersection(set(imported_rows)):
print("Skipping imported rows", row_indexes)
if total_payload_count > 5:
frappe.publish_realtime(
"data_import_progress",
{
"current": current_index,
"total": total_payload_count,
"skipping": True,
"data_import": self.data_import.name,
},
user=frappe.session.user,
)
continue
try:
start = timeit.default_timer()
# insert data to database process_doc method
doc = self.process_doc(doc)
processing_time = timeit.default_timer() - start
eta = self.get_eta(current_index, total_payload_count, processing_time)
if self.console:
update_progress_bar(
f"Importing {total_payload_count} records",
current_index,
total_payload_count,
)
elif total_payload_count > 5:
frappe.publish_realtime(
"data_import_progress",
{
"current": current_index,
"total": total_payload_count,
"docname": doc.name,
"data_import": self.data_import.name,
"success": True,
"row_indexes": row_indexes,
"eta": eta,
},
user=frappe.session.user,
)
# commit after every successful import
frappe.db.commit()
except Exception:
# rollback if exception
frappe.db.rollback()
here my code for multiprocessing
def import_data()
batch_size = frappe.conf.data_import_batch_size or 4000
workers = []
for batch_index, batched_payloads in enumerate(frappe.utils.create_batch(payloads, batch_size)):
p = Process(target=self.process_batch, args=(batch_index, batched_payloads))
p.start()
workers.append(p)
for worker in workers:
worker.join()
same thing if I do :
processes_count = multiprocessing.cpu_count()
pool = multiprocessing.Pool(processes_count)
#pool = multiprocessing.Pool(4)
for batch_index, batched_payloads in enumerate(
frappe.utils.create_batch(payloads, batch_size)):
pool.apply_async(self.process_batch, args=(batch_index, batched_payloads))
pool.close()
pool.join()
edit:
same thing with concurrent.futures.ProcessPoolExecutor
batches = enumerate(frappe.utils.create_batch(payloads, batch_size))
with concurrent.futures.ProcessPoolExecutor(max_workers=processes_count) as executor:
executor.map(self.process_batch, batches)
same thing with concurrent.futures.ProcessPoolExecutor submit()
with concurrent.futures.ProcessPoolExecutor(max_workers=processes_count) as executor:
future_to_batch = [executor.submit(self.process_batch, batch_index, batched_payloads) for batch_index, batched_payloads in enumerate(frappe.utils.create_batch(payloads, batch_size))]
for future in concurrent.futures.as_completed(future_to_batch):
future.result()