I have a pipeline that consists of two custom column transformers, one of them is working while on another one it gives NotFittedError. Here is the ppl code:
class SkipSimpleImputer(SimpleImputer):
def __init__(self, **kwargs):
super().__init__(**kwargs)
def transform(self, X, y=None):
if 'MARITAL_STATUS' in X.columns:
print('\t---- MARITAL STATUS found in skipsimpleimpute, all cols are: ', X.columns)
transformed_X = super().transform(X['MARITAL_STATUS'])
X['MARITAL_STATUS'] = transformed_X
return X
def fit(self, X, y=None):
return self
drop_cols = Pipeline(steps=[
("columnDropper", columnDropperTransformer(id_cols))
])
feature_remover = Pipeline(steps=[
("columnDropper", missingRemover())
])
class CustomZeroImputer(SimpleImputer):
def __init__(self, columns, **kwargs):
self.columns = columns
print('---- Got zeroImp request: ', columns)
super().__init__(**kwargs)
def fit(self, X, y=None):
return self
def transform(self, X, y=None):
cols_to_imp = [col for col in self.columns if col in X.columns]
print('---- Imputing zero cols: ', cols_to_imp)
X[cols_to_imp] = super().transform(X[cols_to_imp])
return X
fill_na_zero_transformer = Pipeline(steps=[
('zero_imputer', CustomZeroImputer(fill_zero_cols, strategy='constant', fill_value=0))
])
numeric_transformer = Pipeline(steps=[
('imputer', SimpleImputer(strategy = "constant", fill_value=-1, add_indicator=True)),
('scaler', StandardScaler())
])
categorical_transformer = Pipeline(steps=[
('categorical_imputer', SkipSimpleImputer(strategy="constant", fill_value='Unknown')),
('encoder', OneHotEncoder(handle_unknown='ignore'))
])
preprocess_ppl = ColumnTransformer(
transformers=[
('encode', categorical_transformer, make_column_selector(dtype_include=object)),
('zero_impute', fill_na_zero_transformer, make_column_selector(dtype_include=np.number)),
('numeric', numeric_transformer, make_column_selector(dtype_include=np.number))
]
)
pipeline2 = Pipeline(
steps=[
('dropper', drop_cols),
('remover',feature_remover),
("preprocessor", preprocess_ppl),
("estimator", customOLS(sm.OLS))
]
)
print('---- Fitting pipeline')
pipeline2.fit(X_train,y_train)
The CustomZeroImputer only tries to impute the column exisitng in the intermediate data, coz otherwise it gives column not found error. The error I receive is:
NotFittedError: This CustomZeroImputer instance is not fitted yet. Call 'fit' with appropriate arguments before using this estimator.