-3

I have following function - mp_process(); would like to add progress bar, but running into a lot issue.

Look for help on how to add tqdm in mp_process

from gzip import READ
import http.client
import pandas as pd
import xml.etree.cElementTree as ET
import multiprocessing as mp
from tqdm import tqdm

def mp_rocess(df):
    N_ROWS = 100 # number of rows in each dataframe
    
    with mp.Pool(10) as pool: # use 3 processes
        # break up dataframe into smaller daraframes of N_ROWS rows each
        cnt = len(df.index)
        n, remainder = divmod(cnt, N_ROWS)
        results = []
        start_index = 0
        for i in range(n):
            results.append(pool.apply_async(process_frame, args=(df.loc[start_index:start_index+N_ROWS-1, :],)))
            start_index += N_ROWS
        if remainder:
            results.append(pool.apply_async(process_frame, args=(df.loc[start_index:start_index+remainder-1, :],)))
        new_dfs = [result.get() for result in results]
        # reassemble final dataframe:
        ret_df = pd.concat(new_dfs, ignore_index=True)
Jshang
  • 11
  • 2
  • What were your issues and what did you try? – Booboo Jan 22 '22 at 19:56
  • Please clarify your specific problem or provide additional details to highlight exactly what you need. As it's currently written, it's hard to tell exactly what you're asking. – Community Feb 01 '22 at 14:48

1 Answers1

0
def mp_rocess(df):
N_ROWS = 2 # number of rows in each dataframe
total_row = len(df)
pbar = tqdm(total=total_row)



with mp.Pool(10) as pool: # use 3 processes
    # break up dataframe into smaller daraframes of N_ROWS rows each
    cnt = len(df.index)
    n, remainder = divmod(cnt, N_ROWS)
    results = []

    def update_bar(result):
        pbar.update(N_ROWS) # this is just for the fancy progress bar

    start_index = 0
    for i in range(n):
        #results.append(pool.apply_async(process_frame, args=(df.loc[start_index:start_index+N_ROWS-1, :],)))
        results.append(pool.apply_async(process_frame, args=(df.loc[start_index:start_index+N_ROWS-1, :],) , callback=update_bar ))
        start_index += N_ROWS
    if remainder:
        #results.append(pool.apply_async(process_frame, args=(df.loc[start_index:start_index+remainder-1, :],)))
        results.append(pool.apply_async(process_frame, args=(df.loc[start_index:start_index+remainder-1, :],), callback=update_bar ))
    new_dfs = [result.get() for result in results]
    # reassemble final dataframe:
    ret_df = pd.concat(new_dfs, ignore_index=True)
    

    
return ret_df
Jshang
  • 11
  • 2
  • As it’s currently written, your answer is unclear. Please [edit] to add additional details that will help others understand how this addresses the question asked. You can find more information on how to write good answers [in the help center](/help/how-to-answer). – Community Jan 23 '22 at 03:12