In case someone else see's this same problem:
## The params should have been sent as a tuple
data = []
for file in files:
data.append((file, loc, symbol, under_df))
with concurrent.futures.ProcessPoolExecutor() as executor:
r = [executor.submit(process_file, data) for data in data]
I want to use concurrent.futures
to cut down processing time for a bunch of files. I am very new to concurrent.futures
.
Here is the code:
def process_file(file, loc, symbol, under_df):
print(file)
# Load the data
fut_df = pd.read_csv('{}{}fut/{}'.format(loc, symbol, file), index_col=0)
# drop duplicates
fut_df.drop_duplicates(inplace=True)
# Create expiry column
fut_df['expiry'] = fut_df['contractname'].str.replace(symbol.upper(), '')
fut_df['expiry'] = fut_df['expiry'].str.replace('FUT', '')
fut_df['expiry'] = pd.to_datetime(fut_df['expiry'], format='%y%b')
# Convert timestamp
fut_df['timestamp'] = pd.to_datetime(fut_df['timestamp'])
# get all the timestamps
times = fut_df['timestamp'].tolist()
# There is a bunch of code after this
with concurrent.futures.ProcessPoolExecutor() as executor:
r = [executor.submit(process_file, [file, loc, symbol, under_df]) for file in files[0:2]]
[<Future at 0x7fd65a5b1860 state=finished raised TypeError>, <Future at 0x7fd65a5dc6d8 state=finished raised TypeError>]
I tried changing the code to test out the problem:
def process_file(file):
print(file)
with concurrent.futures.ProcessPoolExecutor() as executor:
r = [executor.submit(process_file, file) for file in files[0:2]]
I still get the same error.
What could be the issue here? The process_file
function works without a problem.