0

Hi and thanks in advance for your time,

I'm trying to use scipy interpolation/extrapolation from data with 3D coordinates + value(accuracy) The purpose is to use the interpn as the function to later run in a global optimizer to try and speed up a hyperparameter tunning task.

The strategy is:

  1. inputs are a sparse dataset of 4Dims, 3 parameters + accuracy
  2. from the min and max of each parameter create boundaries that define rec grid and use mean accuracy as fill_value
  3. create a values filled grid using scipy.interpolation.griddata
  4. run the function scipy.interpolation.interpn passing the grid points, values and desired point (which interpn can both interpolate or extrapolate)

here is the documentation from scipy:

https://docs.scipy.org/doc/scipy/reference/generated/scipy.interpolate.griddata.html

https://docs.scipy.org/doc/scipy/reference/generated/scipy.interpolate.interpn.html

https://numpy.org/doc/stable/reference/generated/numpy.meshgrid.html

Problem: everything works up to point 4 but interpn won't accept the format of grid points/values i'm using

#python3
import numpy as np
from scipy.interpolate import griddata,interpn

experimentsString = "3,1,8,0.636;3,5,16,0.741;3,10,32,0.680;20,1,8,0.715;20,5,16,0.719;20,10,32,0.693;40,1,8,0.500;40,5,16,0.504;40,10,32,0.500;3,1,8,0.715;3,1,16,0.746;3,1,32,0.724;3,1,8,0.667;3,1,16,0.662;3,1,32,0.728;3,1,8,0.750;3,1,16,0.711;3,1,32,0.719;3,1,8,0.750;3,1,16,0.750;3,1,32,0.671;3,1,8,0.737;3,1,16,0.680;3,1,32,0.711;3,1,8,0.737;3,1,16,0.724;3,1,32,0.728;3,1,8,0.737;3,1,16,0.728;3,1,32,0.724;3,1,8,0.702;"

experimentsRows = experimentsString.split(";")
print(*experimentsRows, sep= "\n")

sequenceLength=[]
sampleRate=[]
fullyConnected=[]
accuracy=[]

zippedDataPts=[]
for row in experimentsRows:
    if len(row) > 1:
        values=row.split(",")
        sequenceLength.append(int(values[0]))
        sampleRate.append(int(values[1]))
        fullyConnected.append(int(values[2]))
        accuracy.append(float(values[3]))
        point=np.array([int(values[0]),int(values[1]),int(values[2])])
        zippedDataPts.append(point)

zippedDataPtsCopy=zippedDataPts.copy()
zippedDataPts = np.array(zippedDataPtsCopy,dtype=float)
unZippedDataPts=(np.array(sequenceLength),np.array(sampleRate),np.array(fullyConnected))

minSequenceLength=min(sequenceLength)
maxSequenceLength=max(sequenceLength)
print("sequenceLength Bounds: ",minSequenceLength,maxSequenceLength)

minSampleRate=min(sampleRate)
maxSampleRate=max(sampleRate)
print("sampleRate Bounds: ",minSampleRate,maxSampleRate)

minFullyConnected=min(fullyConnected)
maxFullyConnected=max(fullyConnected)
print("fullyConnected Bounds: ",minFullyConnected,maxFullyConnected)

meanAccuracy=np.mean(accuracy)
print("Mean Accuracy: ",meanAccuracy)
accuracyArr=np.array(accuracy,dtype=float)
print("accuracyArr:",np.shape(accuracyArr))

x=np.linspace(minSequenceLength,maxSequenceLength,num=int(maxSequenceLength-minSequenceLength),dtype=int)
print("LINSPACE x")
print(x)
y=np.linspace(minSampleRate,maxSampleRate,num=int(maxSampleRate-minSampleRate),dtype=int)
print("LINSPACE y")
print(y)
z=np.linspace(minFullyConnected,maxFullyConnected,num=int(maxFullyConnected-minFullyConnected),dtype=int)
print("LINSPACE z")
print(z)

X,Y,Z = np.meshgrid(x,y,z)
X=X.astype(float)
Y=Y.astype(float)
Z=Z.astype(float)

print("X",np.shape(X))
print("Y",np.shape(Y))
print("Z",np.shape(Z))

XX, YY, ZZ = np.array(X.ravel()), np.array(Y.ravel()), np.array(Z.ravel())
print("XX",np.shape(XX))
print("YY",np.shape(YY))
print("ZZ",np.shape(ZZ))

dataGridValues1D = griddata(zippedDataPts,accuracyArr,(XX,YY,ZZ),method='linear',fill_value=meanAccuracy)
dataGridValues3D = griddata(zippedDataPts,accuracyArr,(X,Y,Z),method='linear',fill_value=meanAccuracy)
# dataGridValuesArr = np.array(dataGridValues)
print("dataGridValues1D:",np.shape(dataGridValues1D))
print("dataGridValues3D:",np.shape(dataGridValues3D))

xc=x.copy()
yc=x.copy()
zc=x.copy()
xf = xc.astype(float)
yf = yc.astype(float)
zf = zc.astype(float)

testPoint=np.array([16.0,6.0,32.0],dtype=float)

I conducted the following experiments for the interpn function with the following error messages:

guess = interpn((xf,yf,zf),dataGridValues1D,testPoint,method='linear',fill_value=None,bounds_error=False)
#ValueError: There are 3 point arrays, but values has 1 dimensions

guess = interpn((xf,yf,zf),dataGridValues3D,testPoint,method='linear',fill_value=None,bounds_error=False)
#ValueError: There are 37 points and 9 values in dimension 0

guess = interpn((X,Y,Z),dataGridValues1D,testPoint,method='linear',fill_value=None,bounds_error=False)
#ValueError: There are 3 point arrays, but values has 1 dimensions

guess = interpn((X,Y,Z),dataGridValues3D,testPoint,method='linear',fill_value=None,bounds_error=False)
#ValueError: The points in dimension 0 must be strictly ascending

guess = interpn((XX,YY,ZZ),dataGridValues1D,testPoint,method='linear',fill_value=None,bounds_error=False)
#ValueError: There are 3 point arrays, but values has 1 dimensions

guess = interpn((XX,YY,ZZ),dataGridValues3D,testPoint,method='linear',fill_value=None,bounds_error=False)
#ValueError: The points in dimension 0 must be strictly ascending

guess = interpn(zippedGridPoints,dataGridValues1D,testPoint,method='linear',fill_value=None,bounds_error=False)
#ValueError: There are 7992 point arrays, but values has 1 dimensions

guess = interpn(zippedGridPoints,dataGridValues3D,testPoint,method='linear',fill_value=None,bounds_error=False)
#ValueError: There are 7992 point arrays, but values has 3 dimensions
Nuno Bártolo
  • 75
  • 2
  • 6

0 Answers0