Getting error while predicting for the test data:
CODE:
from pycaret.anomaly import *
anom_exp = setup(train,session_id = 125,
categorical_features=['date', 'hours', 'weekNumber', 'DayName', 'isWeekday'],
numeric_features=['cpu_avg'],
ignore_features = ['Timestamp', 'time'])
sod = create_model('sod',fraction = 0.1)
sod_test= predict_model(model = sod, data = test)
ERROR:
KeyError Traceback (most recent call last)
File \~/.local/lib/python3.10/site-packages/pandas/core/indexes/base.py:3629, in Index.get_loc(self, key, method, tolerance)
3628 try:
\-\> 3629 return self.\_engine.get_loc(casted_key)
3630 except KeyError as err:
File \~/.local/lib/python3.10/site-packages/pandas/\_libs/index.pyx:136, in pandas.\_libs.index.IndexEngine.get_loc()
File \~/.local/lib/python3.10/site-packages/pandas/\_libs/index.pyx:163, in pandas.\_libs.index.IndexEngine.get_loc()
File pandas/\_libs/hashtable_class_helper.pxi:5198, in pandas.\_libs.hashtable.PyObjectHashTable.get_item()
File pandas/\_libs/hashtable_class_helper.pxi:5206, in pandas.\_libs.hashtable.PyObjectHashTable.get_item()
KeyError: 0
The above exception was the direct cause of the following exception:
KeyError Traceback (most recent call last)
Cell In\[20\], line 1
\----\> 1 sod_test= predict_model(model = sod, data = test)
File \~/.local/lib/python3.10/site-packages/pycaret/anomaly/functional.py:941, in predict_model(model, data)
938 if experiment is None:
939 experiment = \_EXPERIMENT_CLASS()
\--\> 941 return experiment.predict_model(estimator=model, data=data)
File \~/.local/lib/python3.10/site-packages/pycaret/anomaly/oop.py:87, in AnomalyExperiment.predict_model(self, estimator, data, ml_usecase)
48 def predict_model(
49 self, estimator, data: pd.DataFrame, ml_usecase: Optional\[MLUsecase\] = None
50 ) -\> pd.DataFrame:
51 """
52 This function generates anomaly labels on using a trained model.
53
(...)
85
86 """
\---\> 87 return super().predict_model(estimator, data, ml_usecase)
File \~/.local/lib/python3.10/site-packages/pycaret/internal/pycaret_experiment/unsupervised_experiment.py:1354, in \_UnsupervisedExperiment.predict_model(self, estimator, data, ml_usecase)
1351 else:
1352 raise TypeError("Model doesn't support predict parameter.")
\-\> 1354 pred = estimator.predict(data_transformed)
1355 if ml_usecase == MLUsecase.CLUSTERING:
1356 data_transformed\["Cluster"\] = \[f"Cluster {i}" for i in pred\]
File \~/.local/lib/python3.10/site-packages/pyod/models/base.py:165, in BaseDetector.predict(self, X, return_confidence)
144 """Predict if a particular sample is an outlier or not.
145
146 Parameters
(...)
161 Only if return_confidence is set to True.
162 """
164 check_is_fitted(self, \['decision_scores\_', 'threshold\_', 'labels\_'\])
\--\> 165 pred_score = self.decision_function(X)
166 prediction = (pred_score \> self.threshold\_).astype('int').ravel()
168 if return_confidence:
File \~/.local/lib/python3.10/site-packages/pyod/models/sod.py:157, in SOD.decision_function(self, X)
140 def decision_function(self, X):
141 """Predict raw anomaly score of X using the fitted detector.
142 The anomaly score of an input sample is computed based on different
143 detector algorithms. For consistency, outliers are assigned with
(...)
155 The anomaly score of the input samples.
156 """
\--\> 157 return self.\_sod(X)
File \~/.local/lib/python3.10/site-packages/pyod/models/sod.py:187, in SOD.\_sod(self, X)
185 anomaly_scores = np.zeros(shape=(X.shape\[0\],))
186 for i in range(X.shape\[0\]):
\--\> 187 obs = X\[i\]
188 ref = X\[ref_inds\[i,\],\]
189 means = np.mean(ref, axis=0) # mean of each column
File \~/.local/lib/python3.10/site-packages/pandas/core/frame.py:3505, in DataFrame.__getitem__(self, key)
3503 if self.columns.nlevels \> 1:
3504 return self.\_getitem_multilevel(key)
\-\> 3505 indexer = self.columns.get_loc(key)
3506 if is_integer(indexer):
3507 indexer = \[indexer\]
File \~/.local/lib/python3.10/site-packages/pandas/core/indexes/base.py:3631, in Index.get_loc(self, key, method, tolerance)
3629 return self.\_engine.get_loc(casted_key)
3630 except KeyError as err:
\-\> 3631 raise KeyError(key) from err
3632 except TypeError:
3633 # If we have a listlike key, \_check_indexing_error will raise
3634 # InvalidIndexError. Otherwise we fall through and re-raise
3635 # the TypeError.
3636 self.\_check_indexing_error(key)
KeyError: 0
I looked at the source code and I think I know where the problem is, but editing that did not predict anomalies correctly.
source code: https://pyod.readthedocs.io/en/latest/_modules/pyod/models/sod.html
There is a decision_function which is defined as:
def decision_function(self, X):
return self._sod(X)
What I think is the problem: Dataframe X should be changed to array type using check_array(X) before sending it to _sod function