Please see code below. For some reason when i split using the kfold method, after the second split, the length of the sample changes.
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.cluster import KMeans
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import KFold, cross_val_score, cross_val_predict
from sklearn import datasets, ensemble
from scipy.interpolate import make_interp_spline
import random
import math
import statistics
random.seed(1)
from numpy import array
from sklearn.model_selection import KFold
from statistics import mean
df=pd.read_csv('auto-mpg.csv')
df= df.drop(columns=['carname'])
X = np.array(df.iloc[:, 1:7])
y = np.array(df['mpg'])
kf = KFold(n_splits=10)
def prop_reg(X_test,X_train,y_train,y_test,kappa):
d = np.zeros((len(X_test),len(X_train)))
for i in range(len(X_test)):
for j in range(len(X_train)):
d[i,j] = np.linalg.norm(X_test[i]-X_train[j])
c=np.zeros(len(y_test))
for i in range(len(y_test)):
l=0
v=0
for j in range(len(y_train)):
l+=y_train[j]/((1+d[i,j])**kappa)
v+=1/((1+d[i,j])**kappa)
c[i]=l/v
return(c)
mse = 0.0
for kappa in [10,20,30,40,50]:
mse = 0.0
for train_index, test_index in kf.split(X):
X_train, X_test = X[train_index], X[test_index]
y_train, y_test = y[train_index], y[test_index]
print(len(y[test_index]))
pr = prop_reg(X_test,X_train,y_train,y_test,kappa)
# mse += mean_squared_error(y_test,c)/10
# print("mse {} kappa {}".format(mse,kappa))
# print("======================================")
The results for this are
40 40 39 39 39 39 39 39 39 39 40 40 39 39 39 39 39 39 39 39 40 40 39 39 39 39 39 39 39 39 40 40 39 39 39 39 39 39 39 39 40 40 39 39 39 39 39 39 39 39