Hi and thanks in advance for your time,
I'm trying to use scipy interpolation/extrapolation from data with 3D coordinates + value(accuracy) The purpose is to use the interpn as the function to later run in a global optimizer to try and speed up a hyperparameter tunning task.
The strategy is:
- inputs are a sparse dataset of 4Dims, 3 parameters + accuracy
- from the min and max of each parameter create boundaries that define rec grid and use mean accuracy as fill_value
- create a values filled grid using scipy.interpolation.griddata
- run the function scipy.interpolation.interpn passing the grid points, values and desired point (which interpn can both interpolate or extrapolate)
here is the documentation from scipy:
https://docs.scipy.org/doc/scipy/reference/generated/scipy.interpolate.griddata.html
https://docs.scipy.org/doc/scipy/reference/generated/scipy.interpolate.interpn.html
https://numpy.org/doc/stable/reference/generated/numpy.meshgrid.html
Problem: everything works up to point 4 but interpn won't accept the format of grid points/values i'm using
#python3
import numpy as np
from scipy.interpolate import griddata,interpn
experimentsString = "3,1,8,0.636;3,5,16,0.741;3,10,32,0.680;20,1,8,0.715;20,5,16,0.719;20,10,32,0.693;40,1,8,0.500;40,5,16,0.504;40,10,32,0.500;3,1,8,0.715;3,1,16,0.746;3,1,32,0.724;3,1,8,0.667;3,1,16,0.662;3,1,32,0.728;3,1,8,0.750;3,1,16,0.711;3,1,32,0.719;3,1,8,0.750;3,1,16,0.750;3,1,32,0.671;3,1,8,0.737;3,1,16,0.680;3,1,32,0.711;3,1,8,0.737;3,1,16,0.724;3,1,32,0.728;3,1,8,0.737;3,1,16,0.728;3,1,32,0.724;3,1,8,0.702;"
experimentsRows = experimentsString.split(";")
print(*experimentsRows, sep= "\n")
sequenceLength=[]
sampleRate=[]
fullyConnected=[]
accuracy=[]
zippedDataPts=[]
for row in experimentsRows:
if len(row) > 1:
values=row.split(",")
sequenceLength.append(int(values[0]))
sampleRate.append(int(values[1]))
fullyConnected.append(int(values[2]))
accuracy.append(float(values[3]))
point=np.array([int(values[0]),int(values[1]),int(values[2])])
zippedDataPts.append(point)
zippedDataPtsCopy=zippedDataPts.copy()
zippedDataPts = np.array(zippedDataPtsCopy,dtype=float)
unZippedDataPts=(np.array(sequenceLength),np.array(sampleRate),np.array(fullyConnected))
minSequenceLength=min(sequenceLength)
maxSequenceLength=max(sequenceLength)
print("sequenceLength Bounds: ",minSequenceLength,maxSequenceLength)
minSampleRate=min(sampleRate)
maxSampleRate=max(sampleRate)
print("sampleRate Bounds: ",minSampleRate,maxSampleRate)
minFullyConnected=min(fullyConnected)
maxFullyConnected=max(fullyConnected)
print("fullyConnected Bounds: ",minFullyConnected,maxFullyConnected)
meanAccuracy=np.mean(accuracy)
print("Mean Accuracy: ",meanAccuracy)
accuracyArr=np.array(accuracy,dtype=float)
print("accuracyArr:",np.shape(accuracyArr))
x=np.linspace(minSequenceLength,maxSequenceLength,num=int(maxSequenceLength-minSequenceLength),dtype=int)
print("LINSPACE x")
print(x)
y=np.linspace(minSampleRate,maxSampleRate,num=int(maxSampleRate-minSampleRate),dtype=int)
print("LINSPACE y")
print(y)
z=np.linspace(minFullyConnected,maxFullyConnected,num=int(maxFullyConnected-minFullyConnected),dtype=int)
print("LINSPACE z")
print(z)
X,Y,Z = np.meshgrid(x,y,z)
X=X.astype(float)
Y=Y.astype(float)
Z=Z.astype(float)
print("X",np.shape(X))
print("Y",np.shape(Y))
print("Z",np.shape(Z))
XX, YY, ZZ = np.array(X.ravel()), np.array(Y.ravel()), np.array(Z.ravel())
print("XX",np.shape(XX))
print("YY",np.shape(YY))
print("ZZ",np.shape(ZZ))
dataGridValues1D = griddata(zippedDataPts,accuracyArr,(XX,YY,ZZ),method='linear',fill_value=meanAccuracy)
dataGridValues3D = griddata(zippedDataPts,accuracyArr,(X,Y,Z),method='linear',fill_value=meanAccuracy)
# dataGridValuesArr = np.array(dataGridValues)
print("dataGridValues1D:",np.shape(dataGridValues1D))
print("dataGridValues3D:",np.shape(dataGridValues3D))
xc=x.copy()
yc=x.copy()
zc=x.copy()
xf = xc.astype(float)
yf = yc.astype(float)
zf = zc.astype(float)
testPoint=np.array([16.0,6.0,32.0],dtype=float)
I conducted the following experiments for the interpn function with the following error messages:
guess = interpn((xf,yf,zf),dataGridValues1D,testPoint,method='linear',fill_value=None,bounds_error=False)
#ValueError: There are 3 point arrays, but values has 1 dimensions
guess = interpn((xf,yf,zf),dataGridValues3D,testPoint,method='linear',fill_value=None,bounds_error=False)
#ValueError: There are 37 points and 9 values in dimension 0
guess = interpn((X,Y,Z),dataGridValues1D,testPoint,method='linear',fill_value=None,bounds_error=False)
#ValueError: There are 3 point arrays, but values has 1 dimensions
guess = interpn((X,Y,Z),dataGridValues3D,testPoint,method='linear',fill_value=None,bounds_error=False)
#ValueError: The points in dimension 0 must be strictly ascending
guess = interpn((XX,YY,ZZ),dataGridValues1D,testPoint,method='linear',fill_value=None,bounds_error=False)
#ValueError: There are 3 point arrays, but values has 1 dimensions
guess = interpn((XX,YY,ZZ),dataGridValues3D,testPoint,method='linear',fill_value=None,bounds_error=False)
#ValueError: The points in dimension 0 must be strictly ascending
guess = interpn(zippedGridPoints,dataGridValues1D,testPoint,method='linear',fill_value=None,bounds_error=False)
#ValueError: There are 7992 point arrays, but values has 1 dimensions
guess = interpn(zippedGridPoints,dataGridValues3D,testPoint,method='linear',fill_value=None,bounds_error=False)
#ValueError: There are 7992 point arrays, but values has 3 dimensions