I have data, such that one data frame is subset of the other dataframe:
## Import Modules
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
#Create an original dataset, with 21 rows
original_data = {'time': [ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21],
'Y': [ 1, 1 ,1 ,1 ,0.99999992,0.99999917,0.99999438,0.99997155,0.99988416,0.99960035,0.99879033,
0.99670785, 0.99180077,0.98107838,0.95921261,0.91757312,0.84402734,0.72558066,0.55704032,0.3558886 ,0.17003336],}
#Convert to dataframe, with 21 rows
df_full = pd.DataFrame(original_data)
print("df_full = \n", df_full)
#Create a subset of the original dataset with 10 rows
subset_data = {'time': [ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, ],
'Y': [ 1., 1. ,1. ,1. ,0.99999992,0.99999917,0.99999438,0.99997155,0.99988416,0.99960035,],}
# Covert to dataframe, with 10 rows
df_subset = pd.DataFrame(subset_data)
print("df = \n", df_subset)
## Plot the data
plt_fig_verify = plt.figure(figsize=(10,5))
plt.plot(df_full['time'], df_full['Y'], 'b', linewidth = '4', label = 'df_full')
plt.plot(df_subset['time'], df_subset['Y'], 'k', linewidth = '8', label = 'df_subset')
plt.xlabel('time (t)')
plt.ylabel('Amplitude (Y)')
plt.legend()
plt.show()
The above data looks as such:
Now, I wish to extrapolate the 'df_subset' shown in black colour and match it with the 'df_full' shown in blue colour
The function for extrapolation is defined as such:
## Define function for extrapolation
def EXTRAPOL_func(dataframe, ## dataframe for extrpolation
max_cycles_extrapol, ## Uptill what cycles do you want to extrapolate
cycle_steps, ## Cycle steps: Ex 0,0.01,0.02,0.03,...........21
ORDER_of_extrapol, ## order of fit for Y
):
## Initialize 'polyfit'
Extrapolate_Y = np.poly1d(np.polyfit(dataframe['time'], ## Independent data: df_subset['time']
dataframe['Y'], ## Dependent data: df_subset['Y']
deg = ORDER_of_extrapol, ## Order of fit
))
## Create NEW 'time' axis data points for extrapolation
X_new = np.arange(0,max_cycles_extrapol, cycle_steps)
print("\n X_new = \n", X_new)
## Correspondingly, extrapolate for Y
Y_extrapol = Extrapolate_Y(X_new)
print("\n Y_extrapol = \n", Y_extrapol)
## Store the extrapolated Y in a dataframe
Extrapolated_data = np.vstack((X_new,Y_extrapol,)).transpose()
Extrapolated_data = pd.DataFrame(Extrapolated_data,columns =[('time_extrapol'),('Y_extrapol')])
print("\n Extrapolated_data = \n", Extrapolated_data)
## return the extrapolated data
return Extrapolated_data
Now for different values of 'ORDER_of_extrapol' I have different dataframes as such:
## Extrapolate the 'df_subset' dataframe with order=4
df_extrapol_4 = EXTRAPOL_func(dataframe = df_subset, ## dataframe for extrpolation
max_cycles_extrapol = 21, ## Uptill what cycles do you want to extrapolate
cycle_steps = 0.01, ## Cycle steps: Ex 0,0.01,0.02,0.03,...........21
ORDER_of_extrapol = 4, ## order of fit for Y: Very bad fitting
)
## Extrapolate the 'df_subset' dataframe with order=7
df_extrapol_7 = EXTRAPOL_func(dataframe = df_subset, ## dataframe for extrpolation
max_cycles_extrapol = 21, ## Uptill what cycles do you want to extrapolate
cycle_steps = 0.01, ## Cycle steps: Ex 0,0.01,0.02,0.03,...........21
ORDER_of_extrapol = 7, ## order of fit for Y: Very bad fitting
)
## Extrapolate the 'df_subset' dataframe with order=8
df_extrapol_8 = EXTRAPOL_func(dataframe = df_subset, ## dataframe for extrpolation
max_cycles_extrapol = 21, ## Uptill what cycles do you want to extrapolate
cycle_steps = 0.01, ## Cycle steps: Ex 0,0.01,0.02,0.03,...........21
ORDER_of_extrapol = 8, ## order of fit for Y: Very bad fitting
)
## Extrapolate the 'df_subset' dataframe with order=9
df_extrapol_9 = EXTRAPOL_func(dataframe = df_subset, ## dataframe for extrpolation
max_cycles_extrapol = 21, ## Uptill what cycles do you want to extrapolate
cycle_steps = 0.01, ## Cycle steps: Ex 0,0.01,0.02,0.03,...........21
ORDER_of_extrapol = 9, ## order of fit for Y: Very bad fitting
)
## Extrapolate the 'df_subset' dataframe with order=12
df_extrapol_12 = EXTRAPOL_func(dataframe = df_subset, ## dataframe for extrpolation
max_cycles_extrapol = 21, ## Uptill what cycles do you want to extrapolate
cycle_steps = 0.01, ## Cycle steps: Ex 0,0.01,0.02,0.03,...........21
ORDER_of_extrapol = 12, ## order of fit for Y: Very bad fitting
)
## Extrapolate the 'df_subset' dataframe with order=15
df_extrapol_15 = EXTRAPOL_func(dataframe = df_subset, ## dataframe for extrpolation
max_cycles_extrapol = 21, ## Uptill what cycles do you want to extrapolate
cycle_steps = 0.01, ## Cycle steps: Ex 0,0.01,0.02,0.03,...........21
ORDER_of_extrapol = 15, ## order of fit for Y: Very bad fitting
)
The above data frames can be visualized graphically as such:
## Plot the data
plt_fig_verify = plt.figure(figsize=(10,5))
plt.plot(df_full['time'], df_full['Y'], 'b', linewidth = '4', label = 'df_full')
plt.plot(df_subset['time'], df_subset['Y'], 'k', linewidth = '8', label = 'df_subset')
plt.plot(df_extrapol_4['time_extrapol'], df_extrapol_4['Y_extrapol'], 'g', linewidth = '1', label = 'extrapolate df_subset: order-4')
plt.plot(df_extrapol_7['time_extrapol'], df_extrapol_7['Y_extrapol'], 'r', linewidth = '1', label = 'extrapolate df_subset: order-7')
plt.plot(df_extrapol_8['time_extrapol'], df_extrapol_8['Y_extrapol'], 'c', linewidth = '1', label = 'extrapolate df_subset: order-8')
plt.plot(df_extrapol_9['time_extrapol'], df_extrapol_9['Y_extrapol'], 'm', linewidth = '1', label = 'extrapolate df_subset: order-9')
plt.plot(df_extrapol_12['time_extrapol'], df_extrapol_12['Y_extrapol'], 'y', linewidth = '1', label = 'extrapolate df_subset: order-12')
plt.plot(df_extrapol_15['time_extrapol'], df_extrapol_15['Y_extrapol'], 'brown', linewidth = '1', label = 'extrapolate df_subset: order-15')
plt.xlabel('time (t)')
plt.ylabel('Amplitude (Y)')
plt.legend()
plt.show()
Visually it can be seen that red (ORDER_of_extrapol = 7) and cyan (ORDER_of_extrapol = 8) are close to the blue (df_full) curve.
However, is there any mathematical formulation that can help me derive which curve (green/red/cyan/magenta/yellow/brown) based on the order of extrapolation is closer to the blue (df_full) curve in Python?
OR,
is there any method by which we can predetermine the order of fit for extrapolation in Python?
Can somebody please help me out with this?