the above solutions werent working for me because the file wasnt being split properly and resulted into omitting the last few rows.. actually it gave me an error saying unequal divisions or something to that effect.
so i wrote the following. this will work for any file size.
enter code here
url_1=r'C:/Users/t3734uk/Downloads/ML-GooGLECRASH/amp_ub/df2.csv'
target_folder=r'C:\Users\t3734uk\Downloads\ML-GooGLECRASH\amp_ub'
df = pd.read_csv(url_1)
rows,columns=df.shape
def calcRowRanges(_no_of_files):
row_ranges=[]
interval_size=math.ceil(rows/_no_of_files)
print('intrval size is ----> '+ '{}'.format(interval_size))
for n in range(_no_of_files):
row_range=(n*interval_size,(n+1)*interval_size)
# print(row_range)
if row_range[1] > rows:
row_range=(n*interval_size,rows)
# print(row_range)
row_ranges.append(row_range)
return row_ranges
def splitFile(_df_,_row_ranges):
for row_range in _row_ranges:
_df=_df_[row_range[0]:row_range[1]]
writer = pd.ExcelWriter('FILE_'+str(_row_ranges.index(row_range))+'_'+'.xlsx')
_df.to_excel(writer)
def dosplit(num_files):
row_ranges=calcRowRanges(num_files)
print(row_ranges)
print(len(row_ranges))
splitFile(df,row_ranges)
dosplit(enter_no_files_to_be_split_in)
on second thoughts the following fucntion is more intuitive:
def splitFile2(_df_,no_of_splits):
_row_ranges=calcRowRanges(no_of_splits)
for row_range in _row_ranges:
_df=_df_[row_range[0]:row_range[1]]
writer = pd.ExcelWriter('FILE_'+str(_row_ranges.index(row_range))+'_'+'.xlsx')
_df.to_excel(writer)enter code here