Detemine the similarity between extrapolated curve and actual curve in Python

Question

I have data, such that one data frame is subset of the other dataframe:

## Import Modules
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt


#Create an original dataset, with 21 rows
original_data = {'time': [ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21],
                 'Y':    [ 1, 1 ,1 ,1 ,0.99999992,0.99999917,0.99999438,0.99997155,0.99988416,0.99960035,0.99879033,
                            0.99670785, 0.99180077,0.98107838,0.95921261,0.91757312,0.84402734,0.72558066,0.55704032,0.3558886 ,0.17003336],}

#Convert to dataframe, with 21 rows
df_full = pd.DataFrame(original_data)
print("df_full = \n", df_full)


        
#Create a subset of the original dataset with 10 rows
subset_data = {'time': [ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, ],
               'Y':    [ 1., 1. ,1. ,1. ,0.99999992,0.99999917,0.99999438,0.99997155,0.99988416,0.99960035,],}


# Covert to dataframe, with 10 rows         
df_subset = pd.DataFrame(subset_data)
print("df = \n", df_subset)


## Plot the data
plt_fig_verify = plt.figure(figsize=(10,5))
plt.plot(df_full['time'],                 df_full['Y'],                 'b',  linewidth = '4',     label = 'df_full')
plt.plot(df_subset['time'],               df_subset['Y'],               'k',  linewidth = '8',     label = 'df_subset')
plt.xlabel('time (t)')
plt.ylabel('Amplitude (Y)')
plt.legend()
plt.show()

The above data looks as such:

Now, I wish to extrapolate the 'df_subset' shown in black colour and match it with the 'df_full' shown in blue colour

The function for extrapolation is defined as such:

## Define function for extrapolation
def EXTRAPOL_func(dataframe,               ## dataframe for extrpolation
                  max_cycles_extrapol,     ## Uptill what cycles do you want to extrapolate
                  cycle_steps,             ## Cycle steps: Ex 0,0.01,0.02,0.03,...........21
                  ORDER_of_extrapol,       ## order of fit for Y
                  ):

    ## Initialize 'polyfit'
    Extrapolate_Y = np.poly1d(np.polyfit(dataframe['time'],        ## Independent data: df_subset['time']
                                         dataframe['Y'],           ## Dependent data:   df_subset['Y']
                                         deg = ORDER_of_extrapol,  ## Order of fit
                                         ))
                                                

    ## Create NEW 'time' axis data points for extrapolation
    X_new = np.arange(0,max_cycles_extrapol, cycle_steps)   
    print("\n X_new = \n", X_new)


    ## Correspondingly, extrapolate for Y
    Y_extrapol = Extrapolate_Y(X_new)
    print("\n Y_extrapol = \n", Y_extrapol)


    ## Store the extrapolated Y in a dataframe
    Extrapolated_data = np.vstack((X_new,Y_extrapol,)).transpose()
    Extrapolated_data = pd.DataFrame(Extrapolated_data,columns =[('time_extrapol'),('Y_extrapol')])
    print("\n Extrapolated_data = \n", Extrapolated_data)
    


    ## return the extrapolated data
    return Extrapolated_data

Now for different values of 'ORDER_of_extrapol' I have different dataframes as such:

## Extrapolate the 'df_subset' dataframe with order=4
df_extrapol_4 = EXTRAPOL_func(dataframe          = df_subset,    ## dataframe for extrpolation
                            max_cycles_extrapol  = 21,           ## Uptill what cycles do you want to extrapolate
                            cycle_steps          = 0.01,         ## Cycle steps: Ex 0,0.01,0.02,0.03,...........21
                            ORDER_of_extrapol    = 4,            ## order of fit for Y: Very bad fitting
                            )


## Extrapolate the 'df_subset' dataframe with order=7
df_extrapol_7 = EXTRAPOL_func(dataframe          = df_subset,    ## dataframe for extrpolation
                            max_cycles_extrapol  = 21,           ## Uptill what cycles do you want to extrapolate
                            cycle_steps          = 0.01,         ## Cycle steps: Ex 0,0.01,0.02,0.03,...........21
                            ORDER_of_extrapol    = 7,            ## order of fit for Y: Very bad fitting
                            )

## Extrapolate the 'df_subset' dataframe with order=8
df_extrapol_8 = EXTRAPOL_func(dataframe          = df_subset,    ## dataframe for extrpolation
                            max_cycles_extrapol  = 21,           ## Uptill what cycles do you want to extrapolate
                            cycle_steps          = 0.01,         ## Cycle steps: Ex 0,0.01,0.02,0.03,...........21
                            ORDER_of_extrapol    = 8,            ## order of fit for Y: Very bad fitting
                            )

## Extrapolate the 'df_subset' dataframe with order=9
df_extrapol_9 = EXTRAPOL_func(dataframe          = df_subset,    ## dataframe for extrpolation
                            max_cycles_extrapol  = 21,           ## Uptill what cycles do you want to extrapolate
                            cycle_steps          = 0.01,         ## Cycle steps: Ex 0,0.01,0.02,0.03,...........21
                            ORDER_of_extrapol    = 9,            ## order of fit for Y: Very bad fitting
                            )


## Extrapolate the 'df_subset' dataframe with order=12
df_extrapol_12 = EXTRAPOL_func(dataframe         = df_subset,    ## dataframe for extrpolation
                            max_cycles_extrapol  = 21,           ## Uptill what cycles do you want to extrapolate
                            cycle_steps          = 0.01,         ## Cycle steps: Ex 0,0.01,0.02,0.03,...........21
                            ORDER_of_extrapol    = 12,            ## order of fit for Y: Very bad fitting
                            )

## Extrapolate the 'df_subset' dataframe with order=15
df_extrapol_15 = EXTRAPOL_func(dataframe         = df_subset,    ## dataframe for extrpolation
                            max_cycles_extrapol  = 21,           ## Uptill what cycles do you want to extrapolate
                            cycle_steps          = 0.01,         ## Cycle steps: Ex 0,0.01,0.02,0.03,...........21
                            ORDER_of_extrapol    = 15,            ## order of fit for Y: Very bad fitting
                            )

The above data frames can be visualized graphically as such:

## Plot the data
plt_fig_verify = plt.figure(figsize=(10,5))
plt.plot(df_full['time'],                 df_full['Y'],                 'b',  linewidth = '4',     label = 'df_full')
plt.plot(df_subset['time'],               df_subset['Y'],               'k',  linewidth = '8',     label = 'df_subset')
plt.plot(df_extrapol_4['time_extrapol'],  df_extrapol_4['Y_extrapol'],  'g',  linewidth = '1',     label = 'extrapolate df_subset: order-4')
plt.plot(df_extrapol_7['time_extrapol'],  df_extrapol_7['Y_extrapol'],  'r',  linewidth = '1',     label = 'extrapolate df_subset: order-7')
plt.plot(df_extrapol_8['time_extrapol'],  df_extrapol_8['Y_extrapol'],  'c',  linewidth = '1',     label = 'extrapolate df_subset: order-8')
plt.plot(df_extrapol_9['time_extrapol'],  df_extrapol_9['Y_extrapol'],  'm',  linewidth = '1',     label = 'extrapolate df_subset: order-9')
plt.plot(df_extrapol_12['time_extrapol'], df_extrapol_12['Y_extrapol'], 'y',  linewidth = '1',     label = 'extrapolate df_subset: order-12')
plt.plot(df_extrapol_15['time_extrapol'], df_extrapol_15['Y_extrapol'], 'brown',  linewidth = '1', label = 'extrapolate df_subset: order-15')
plt.xlabel('time (t)')
plt.ylabel('Amplitude (Y)')
plt.legend()
plt.show()

Visually it can be seen that red (ORDER_of_extrapol = 7) and cyan (ORDER_of_extrapol = 8) are close to the blue (df_full) curve.

However, is there any mathematical formulation that can help me derive which curve (green/red/cyan/magenta/yellow/brown) based on the order of extrapolation is closer to the blue (df_full) curve in Python?

OR,

is there any method by which we can predetermine the order of fit for extrapolation in Python?

Can somebody please help me out with this?

Detemine the similarity between extrapolated curve and actual curve in Python

0 Answers0