i have this error below from my code for a K-Fold cross validation
"None of [Int64Index([ 0, 1, 3, 4, 5, 6, 7,\n 8, 9, 10,\n ...\n 1048565, 1048566, 1048567, 1048568, 1048569, 1048570, 1048571,\n 1048572, 1048573, 1048574],\n dtype='int64', length=943717)] are in the [columns]"
the model is logistic regression model and i need to evaluate it using K-fold however if you have another code in python i would be thankful
this is my code
y3_data = data['DEATH']
#dependant variable y_data:
x3_data = df14
from sklearn.model_selection import KFold
from sklearn.model_selection import StratifiedKFold
from imblearn.under_sampling import RandomUnderSampler
rus = RandomUnderSampler(random_state=0)
x3_resampled,y3_resampled = rus.fit_resample(x3_data,y3_data)
#training and test sample :
x3_training_data, x3_test_data, y3_training_data, y3_test_data = train_test_split(x3_data, y3_data, test_size = 0.3)
# Estimation result:
logit_model=sm.Logit(y3_training_data,x3_training_data)
result3=logit_model.fit()
print(result3.summary2())
# Model Evaluation :
logreg=LogisticRegression()
logreg.fit(x3_training_data,y3_training_data)
y_pred=logreg.predict(x3_test_data)
print('Logistic regression model accuracy:{:.2f}'.format(logreg.score(x3_test_data,y3_test_data)))
print("Logistic Regression F1 Score :",f1_score(y3_test_data,logreg.predict(x3_test_data),average=None))
sns.heatmap(confusion_matrix(y3_test_data, logreg.predict(x3_test_data)), annot=True, fmt=".0f")
plt.title("Logistic Regression Confusion Matrix",fontsize=18, color="red");
num_splits = 10
kfold = StratifiedKFold(num_splits, shuffle= True, random_state = 1)
train_accs, test_accs = [], [] #create empty lists to store accurcy values
for train_index, test_index in kfold.split(x3_data, y3_data): #Generate indices to split data into training and test set.
x3_train, x3_test = x3_data[train_index], x3_data[test_index]
y3_train, y3_test = y3_data[train_index], y3_data[test_index]
logreg.fit(x3_train, y3_train)
y3_pred_train = logreg.predict(x3_train)
y3_pred_test = logreg.predict(x3_test)
train_accs.append(metrics.accuracy_score(y3_train, y3_pred_train) * 100)
test_accs.append(metrics.accuracy_score(y3_test, y3_pred_test) * 100)
ave_train_acc = 0
ave_test_acc = 0
print("\t","Training_Acc","\t","\t", "Testing_Acc")
for i in range(num_splits):
print(i,"\t", train_accs[i],"\t", test_accs[i])
ave_train_acc+= train_accs[i]/num_splits
ave_test_acc+= test_accs[i]/num_splits
print("Av", "\t", ave_train_acc,"\t", ave_test_acc)
the error is only on the k fold part
and i want the output to be like this one below
Training_Acc Testing_Acc
0 76.0586319218241 75.32467532467533
1 75.2442996742671 74.67532467532467
2 73.9413680781759 81.16883116883116
3 77.72357723577235 71.89542483660131
4 76.7479674796748 73.20261437908496
Av 75.94316887794285 75.25337407690348
PLEASE help
THIS IS the full error trace track
RF Regression F1 Score : [0.96507625 0.47731195]
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
<ipython-input-19-955883a0e9f9> in <module>
62 train_accs, test_accs = [], [] #create empty lists to store accurcy values
63 for train_index, test_index in kfold.split(x_data, y_data): #Generate indices to split data into training and test set.
---> 64 x_train_data, x_test_data = x_data[train_index], x_data[test_index]
65 y_train, y_test = y_data[train_index], y_data[test_index]
66
2 frames
/usr/local/lib/python3.8/dist-packages/pandas/core/indexing.py in _validate_read_indexer(self, key, indexer, axis)
1372 if use_interval_msg:
1373 key = list(key)
-> 1374 raise KeyError(f"None of [{key}] are in the [{axis_name}]")
1375
1376 not_found = list(ensure_index(key)[missing_mask.nonzero()[0]].unique())
KeyError: "None of [Int64Index([ 0, 1, 3, 4, 5, 6, 7,\n 8, 9, 10,\n ...\n 1048564, 1048565, 1048566, 1048567, 1048568, 1048569, 1048570,\n 1048572, 1048573, 1048574],\n dtype='int64', length=838860)] are in the [columns]"
new error
> ---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
/usr/local/lib/python3.8/dist-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
3360 try:
-> 3361 return self._engine.get_loc(casted_key)
3362 except KeyError as err:
4 frames
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 0
The above exception was the direct cause of the following exception:
KeyError Traceback (most recent call last)
/usr/local/lib/python3.8/dist-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
3361 return self._engine.get_loc(casted_key)
3362 except KeyError as err:
-> 3363 raise KeyError(key) from err
3364
3365 if is_scalar(key) and isna(key) and not self.hasnans: