0

I am trying to create a model for the classification categorical_features=['sex','cp','fbs','restecg','exang','thal'] I have columns in pandas data frame like

df.columns
Index(['age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg', 'thalach',
       'exang', 'oldpeak', 'slope', 'ca', 'thal', 'target'],
      dtype='object')

When I do in (from pycaret.classification import *)

predict_model(best_model, data=df.drop('age',axis=1).tail())#not working

I am getting errors like

KeyError: "['age'] not in index"

but as you can see in the Index of df.column we have 'age' I don't know why it showing error for all the columns except 'target' column where I can drop the column in axis=1

predict_model(best_model, data=df.drop('target',axis=1).tail())#working

---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
/tmp/ipykernel_63/493668063.py in <module>
----> 1 predict_model(best_model, data=df.drop('age',axis=1).tail())  #while creating predict_model

/opt/conda/lib/python3.7/site-packages/pycaret/classification.py in predict_model(estimator, data, probability_threshold, encoded_labels, raw_score, round, verbose)
   1952         round=round,
   1953         verbose=verbose,
-> 1954         ml_usecase=MLUsecase.CLASSIFICATION,
   1955     )
   1956 

/opt/conda/lib/python3.7/site-packages/pycaret/internal/tabular.py in predict_model(estimator, data, probability_threshold, encoded_labels, raw_score, round, verbose, ml_usecase, display)
   8665     # prediction starts here
   8666 
-> 8667     pred = np.nan_to_num(estimator.predict(X_test_))
   8668 
   8669     try:

/opt/conda/lib/python3.7/site-packages/sklearn/utils/metaestimators.py in <lambda>(*args, **kwargs)
    117 
    118         # lambda, but not partial, allows help() to work with update_wrapper
--> 119         out = lambda *args, **kwargs: self.fn(obj, *args, **kwargs)
    120         # update the docstring of the returned function
    121         update_wrapper(out, self.fn)

/opt/conda/lib/python3.7/site-packages/sklearn/pipeline.py in predict(self, X, **predict_params)
    405         Xt = X
    406         for _, name, transform in self._iter(with_final=False):
--> 407             Xt = transform.transform(Xt)
    408         return self.steps[-1][-1].predict(Xt, **predict_params)
    409 

/opt/conda/lib/python3.7/site-packages/pycaret/internal/preprocess.py in transform(self, dataset, y)
    362         # drop any columns that were asked to drop
    363         data.drop(columns=self.features_todrop, errors="ignore", inplace=True)
--> 364         data = data[self.final_training_columns]
    365 
    366         # also make sure that all the column names are string

/opt/conda/lib/python3.7/site-packages/pandas/core/frame.py in __getitem__(self, key)
   3459             if is_iterator(key):
   3460                 key = list(key)
-> 3461             indexer = self.loc._get_listlike_indexer(key, axis=1)[1]
   3462 
   3463         # take() does not accept boolean indexers

/opt/conda/lib/python3.7/site-packages/pandas/core/indexing.py in _get_listlike_indexer(self, key, axis)
   1312             keyarr, indexer, new_indexer = ax._reindex_non_unique(keyarr)
   1313 
-> 1314         self._validate_read_indexer(keyarr, indexer, axis)
   1315 
   1316         if needs_i8_conversion(ax.dtype) or isinstance(

/opt/conda/lib/python3.7/site-packages/pandas/core/indexing.py in _validate_read_indexer(self, key, indexer, axis)
   1375 
   1376             not_found = list(ensure_index(key)[missing_mask.nonzero()[0]].unique())
-> 1377             raise KeyError(f"{not_found} not in index")
   1378 
   1379 

KeyError: "['age'] not in index"


PlutoSenthil
  • 332
  • 6
  • 13
  • 1
    It is weird, one idea, what is `print (df.columns.tolist())` ? – jezrael Oct 04 '21 at 06:07
  • 2
    Can you try `df.drop('age',axis=1)` on it's own? I guess as the error occurs later, *age* is dropped twice. Perhaps because *best_model* does not contain *age* as a feature. – Michael Szczesny Oct 04 '21 at 06:11

1 Answers1

1

All the COLUMN are having a space "age ", we have an extra space in "age ". We need to get rid of it.

df= pd.read_csv(fileName)

df=df.rename({"age ":"age"},axis=1)

Hope this will solve your problem.