I am currently trying to do a regression of a function calculated via a RK4 method performed on a non-linear Volterra integral of the second kind. The problem I found is that the code is extremely slow, for 1 call of the curve_fit function (fitt), it takes about 30-40 minute to generate a data. Overall, there will be a lot of calls to fitt before the parameters are determined, this takes more than 6 hours to run. Is there anyway to optimize this code? Thanks in advance!
from scipy.special import gamma
from ml_internal import LTInversion
from scipy.optimize import curve_fit , fsolve
from scipy.misc import derivative
from sklearn.metrics import r2_score
from math import comb , factorial
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()
# Gets the data
df = pd.read_excel('D:\\CoMat\\Fractional_fit\\optimized\\data_optimized.xlsx')
skipTime = 1
skipIndex = df[df['Time']== skipTime].index.values[0]
xls = pd.read_excel('D:\\CoMat\\Fractional_fit\\optimized\\data_optimized.xlsx',skiprows=np.arange(1,skipIndex+1,1))
timeDF = xls['Time']
tempDF = xls['Temp']
taDF = xls['Ta']
timeDF = timeDF - timeDF[0]
tempDF = tempDF + 273.15
t0 = tempDF[0]
ta = sum(taDF)/len(taDF)
ta = ta + 273.15
###########################################
#Spliting into intervals
h = 0.05
a = 0
b = timeDF[len(timeDF)-1]
N = int(np.round((b-a)/h))
#Each xi
def xidx(index):
return a + h*index
#Function in the image are written here.
def gx(t,lamda,alpha):
return t0 * ml(lamda*(t**alpha),alpha)
gx = np.vectorize(gx)
def kernel(t,s,rad,lamda,alpha,beta):
if t == s:
return 0
return (t-s)**(alpha-1) * ml_(lamda*((t-s)**alpha),alpha,alpha,1) * (beta*(rad**4) - beta*(ta**4) - lamda*ta)
kernel = np.vectorize(kernel)
############################
# The problem is here!!!!!!
def fx(x,n,lamda,alpha,beta):
ans = gx(x,lamda,alpha)
for j in range(n):
ans += (h/6)*(kernel(x,xidx(j),f0[j],lamda,alpha,beta) + 2*kernel(x,xidx(j+1/2),f1[j],lamda,alpha,beta) + 2*kernel(x,xidx(j+1/2),f2[j],lamda,alpha,beta) + kernel(x,xidx(j+1),f3[j],lamda,alpha,beta))
return ans
#########################
f0 = np.zeros(N+1)
f0[0] = t0
f1 = np.zeros(N+1)
f2 = np.zeros(N+1)
f3 = np.zeros(N+1)
F = np.zeros((3,N+1))
def fitt(xvalue,lamda,alpha,beta):
global f0,f1,f2,f3,F
n = int(np.round(xvalue/h))
f1[n] = fx(xidx(n) + 1/2,n,lamda,alpha,beta) + (h/2)*kernel(xidx(n + 1/2),xidx(n),f0[n],lamda,alpha,beta)
f2[n] = fx(xidx(n + 1/2),n,lamda,alpha,beta)
f3[n] = fx(xidx(n+1),n,lamda,alpha,beta) + h*kernel(xidx(n+1),xidx(n+1/2),f2[n],lamda,alpha,beta)
if n+1 <= N:
f0[n+1] = fx(xidx(n+1),n,lamda,alpha,beta) + (h/6)*(kernel(xidx(n+1),xidx(n),f0[n],lamda,alpha,beta) + 2*kernel(xidx(n+1),xidx(n+1/2),f1[n],lamda,alpha,beta) + 2*kernel(xidx(n+1),xidx(n+1/2),f2[n],lamda,alpha,beta) + kernel(xidx(n+1),xidx(n+1),f3[n],lamda,alpha,beta))
if(xvalue == timeDF[len(timeDF) - 1]):
print(f0[n],n)
returnValue = f0[n]
f0 = np.zeros(N+1)
f0[0] = t0
f1 = np.zeros(N+1)
f2 = np.zeros(N+1)
f3 = np.zeros(N+1)
return returnValue
print(f0[n],n)
return f0[n]
fitt = np.vectorize(fitt)
#Fitting, plotting and giving (Adj) R-squared
popt , pcov = curve_fit(fitt,timeDF,tempDF,p0=(-0.1317,0.95,-1e-11),bounds=((-np.inf,0,-np.inf),(0,1,0)))
print(popt)
y_fit = np.array(fitt(timeDF,popt[0],popt[1],popt[2]))
plt.scatter(timeDF,tempDF,color='ORANGE',marker='.',s=0.5)
plt.fill_between(timeDF,tempDF-0.5,tempDF+0.5,color='ORANGE', alpha=0.2)
plt.plot(timeDF,y_fit,color='RED',linewidth=1)
plt.legend(["Experimental data", "Caputo fit"], loc ="upper right")
plt.xlabel("Time (min)")
plt.ylabel("Temperature (Kelvin)")
plt.show()
plt.close()
r2 = r2_score(tempDF,y_fit)
print(r2)
adjr2 = 1 - (1 - r2)*((len(xls)-1)/(len(xls)-3-1))
print(adjr2)
I already tried computing the values f0,f1,f2,f3 all at once, but the thing consuming the most time is Fn(x) which I haven't figured in out how to compute them all at once. If this is possible to compute at once, I think the program will run much faster. PS: ML,ML_ is a function from https://github.com/khinsen/mittag-leffler.
This is the function necesssary. Fn is the only one I haven't figured out yet.