0

I have data, such that one data frame is subset of the other dataframe:

## Import Modules
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt


#Create an original dataset, with 21 rows
original_data = {'time': [ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21],
                 'Y':    [ 1, 1 ,1 ,1 ,0.99999992,0.99999917,0.99999438,0.99997155,0.99988416,0.99960035,0.99879033,
                            0.99670785, 0.99180077,0.98107838,0.95921261,0.91757312,0.84402734,0.72558066,0.55704032,0.3558886 ,0.17003336],}

#Convert to dataframe, with 21 rows
df_full = pd.DataFrame(original_data)
print("df_full = \n", df_full)


        
#Create a subset of the original dataset with 10 rows
subset_data = {'time': [ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, ],
               'Y':    [ 1., 1. ,1. ,1. ,0.99999992,0.99999917,0.99999438,0.99997155,0.99988416,0.99960035,],}


# Covert to dataframe, with 10 rows         
df_subset = pd.DataFrame(subset_data)
print("df = \n", df_subset)


## Plot the data
plt_fig_verify = plt.figure(figsize=(10,5))
plt.plot(df_full['time'],                 df_full['Y'],                 'b',  linewidth = '4',     label = 'df_full')
plt.plot(df_subset['time'],               df_subset['Y'],               'k',  linewidth = '8',     label = 'df_subset')
plt.xlabel('time (t)')
plt.ylabel('Amplitude (Y)')
plt.legend()
plt.show() 

The above data looks as such:

enter image description here

Now, I wish to extrapolate the 'df_subset' shown in black colour and match it with the 'df_full' shown in blue colour

The function for extrapolation is defined as such:

## Define function for extrapolation
def EXTRAPOL_func(dataframe,               ## dataframe for extrpolation
                  max_cycles_extrapol,     ## Uptill what cycles do you want to extrapolate
                  cycle_steps,             ## Cycle steps: Ex 0,0.01,0.02,0.03,...........21
                  ORDER_of_extrapol,       ## order of fit for Y
                  ):

    ## Initialize 'polyfit'
    Extrapolate_Y = np.poly1d(np.polyfit(dataframe['time'],        ## Independent data: df_subset['time']
                                         dataframe['Y'],           ## Dependent data:   df_subset['Y']
                                         deg = ORDER_of_extrapol,  ## Order of fit
                                         ))
                                                

    ## Create NEW 'time' axis data points for extrapolation
    X_new = np.arange(0,max_cycles_extrapol, cycle_steps)   
    print("\n X_new = \n", X_new)


    ## Correspondingly, extrapolate for Y
    Y_extrapol = Extrapolate_Y(X_new)
    print("\n Y_extrapol = \n", Y_extrapol)


    ## Store the extrapolated Y in a dataframe
    Extrapolated_data = np.vstack((X_new,Y_extrapol,)).transpose()
    Extrapolated_data = pd.DataFrame(Extrapolated_data,columns =[('time_extrapol'),('Y_extrapol')])
    print("\n Extrapolated_data = \n", Extrapolated_data)
    


    ## return the extrapolated data
    return Extrapolated_data

Now for different values of 'ORDER_of_extrapol' I have different dataframes as such:

## Extrapolate the 'df_subset' dataframe with order=4
df_extrapol_4 = EXTRAPOL_func(dataframe          = df_subset,    ## dataframe for extrpolation
                            max_cycles_extrapol  = 21,           ## Uptill what cycles do you want to extrapolate
                            cycle_steps          = 0.01,         ## Cycle steps: Ex 0,0.01,0.02,0.03,...........21
                            ORDER_of_extrapol    = 4,            ## order of fit for Y: Very bad fitting
                            )


## Extrapolate the 'df_subset' dataframe with order=7
df_extrapol_7 = EXTRAPOL_func(dataframe          = df_subset,    ## dataframe for extrpolation
                            max_cycles_extrapol  = 21,           ## Uptill what cycles do you want to extrapolate
                            cycle_steps          = 0.01,         ## Cycle steps: Ex 0,0.01,0.02,0.03,...........21
                            ORDER_of_extrapol    = 7,            ## order of fit for Y: Very bad fitting
                            )

## Extrapolate the 'df_subset' dataframe with order=8
df_extrapol_8 = EXTRAPOL_func(dataframe          = df_subset,    ## dataframe for extrpolation
                            max_cycles_extrapol  = 21,           ## Uptill what cycles do you want to extrapolate
                            cycle_steps          = 0.01,         ## Cycle steps: Ex 0,0.01,0.02,0.03,...........21
                            ORDER_of_extrapol    = 8,            ## order of fit for Y: Very bad fitting
                            )

## Extrapolate the 'df_subset' dataframe with order=9
df_extrapol_9 = EXTRAPOL_func(dataframe          = df_subset,    ## dataframe for extrpolation
                            max_cycles_extrapol  = 21,           ## Uptill what cycles do you want to extrapolate
                            cycle_steps          = 0.01,         ## Cycle steps: Ex 0,0.01,0.02,0.03,...........21
                            ORDER_of_extrapol    = 9,            ## order of fit for Y: Very bad fitting
                            )


## Extrapolate the 'df_subset' dataframe with order=12
df_extrapol_12 = EXTRAPOL_func(dataframe         = df_subset,    ## dataframe for extrpolation
                            max_cycles_extrapol  = 21,           ## Uptill what cycles do you want to extrapolate
                            cycle_steps          = 0.01,         ## Cycle steps: Ex 0,0.01,0.02,0.03,...........21
                            ORDER_of_extrapol    = 12,            ## order of fit for Y: Very bad fitting
                            )

## Extrapolate the 'df_subset' dataframe with order=15
df_extrapol_15 = EXTRAPOL_func(dataframe         = df_subset,    ## dataframe for extrpolation
                            max_cycles_extrapol  = 21,           ## Uptill what cycles do you want to extrapolate
                            cycle_steps          = 0.01,         ## Cycle steps: Ex 0,0.01,0.02,0.03,...........21
                            ORDER_of_extrapol    = 15,            ## order of fit for Y: Very bad fitting
                            )

The above data frames can be visualized graphically as such:

## Plot the data
plt_fig_verify = plt.figure(figsize=(10,5))
plt.plot(df_full['time'],                 df_full['Y'],                 'b',  linewidth = '4',     label = 'df_full')
plt.plot(df_subset['time'],               df_subset['Y'],               'k',  linewidth = '8',     label = 'df_subset')
plt.plot(df_extrapol_4['time_extrapol'],  df_extrapol_4['Y_extrapol'],  'g',  linewidth = '1',     label = 'extrapolate df_subset: order-4')
plt.plot(df_extrapol_7['time_extrapol'],  df_extrapol_7['Y_extrapol'],  'r',  linewidth = '1',     label = 'extrapolate df_subset: order-7')
plt.plot(df_extrapol_8['time_extrapol'],  df_extrapol_8['Y_extrapol'],  'c',  linewidth = '1',     label = 'extrapolate df_subset: order-8')
plt.plot(df_extrapol_9['time_extrapol'],  df_extrapol_9['Y_extrapol'],  'm',  linewidth = '1',     label = 'extrapolate df_subset: order-9')
plt.plot(df_extrapol_12['time_extrapol'], df_extrapol_12['Y_extrapol'], 'y',  linewidth = '1',     label = 'extrapolate df_subset: order-12')
plt.plot(df_extrapol_15['time_extrapol'], df_extrapol_15['Y_extrapol'], 'brown',  linewidth = '1', label = 'extrapolate df_subset: order-15')
plt.xlabel('time (t)')
plt.ylabel('Amplitude (Y)')
plt.legend()
plt.show() 

enter image description here

Visually it can be seen that red (ORDER_of_extrapol = 7) and cyan (ORDER_of_extrapol = 8) are close to the blue (df_full) curve.

However, is there any mathematical formulation that can help me derive which curve (green/red/cyan/magenta/yellow/brown) based on the order of extrapolation is closer to the blue (df_full) curve in Python?

OR,

is there any method by which we can predetermine the order of fit for extrapolation in Python?

Can somebody please help me out with this?

NN_Developer
  • 417
  • 6

0 Answers0