I am trying to put kfold to my code as overfitting is an issue. Previously i have split my data into train test . But i am getting confused where and how to apply k-fold as my data is already split.
x_norm = preprocessing.normalize(x, axis=0)
x=x_norm
x_trainval, x_test, y_trainval, y_test = train_test_split(x, y, test_size=0.2, random_state=0, stratify = df["label"])
#y_trainval: labels from 80%
# Split train into train-val
x_train, x_val, y_train, y_val = train_test_split(x_trainval, y_trainval, test_size=0.1, random_state=0)
class classifierdataset(Dataset):
def __init__(self,x_data,y_data):
self.x_data = x_data
self.y_data = y_data
def __getitem__(self, index):
return self.x_data[index], self.y_data[index]
def __len__(self):
return len(self.x_data)
train_dataset = classifierdataset(torch.from_numpy(x_train).float(), torch.from_numpy(y_train).long())
val_dataset = classifierdataset(torch.from_numpy(x_val).float(), torch.from_numpy(y_val).long())
test_dataset = classifierdataset(torch.from_numpy(x_test).float(), torch.from_numpy(y_test).long())
EPOCHS = 10
BATCH_SIZE = 16
LEARNING_RATE = 0.0007
#0.0009, 0.0007
train_loader = DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE)
val_loader = DataLoader(dataset = val_dataset, batch_size = 1)
test_loader = DataLoader(dataset = test_dataset , batch_size = 1)