I am having the following problem while trying to implement pipeline For the preprocessor, I want to combine adding a new column and processing all other columns. It works as it should
features = ['Pclass', 'Sex', 'Age', 'Parch', 'SibSp','Embarked']
target = ['Survived']
num_features = data[features].select_dtypes(include=['int64', 'float64']).columns
cat_features = data[features].select_dtypes(include=['object']).columns
X_train = data[features]
y_train = data['Survived']
class Add_family(BaseEstimator, TransformerMixin):
def __init__(self, add_family = True):
self.ad_family = add_family
def fit(self, X, y= None):
return self
def transform(self, X, y= None):
df=pd.DataFrame(X).copy()
if self.ad_family:
df['Family_size'] = df.apply(lambda x: x.Parch + x.SibSp + 1, axis=1)
def get_family_type(var):
if var == 1:
return 'alone'
elif var<=4:
return 'small'
else:
return 'big'
df['FamilyType'] = df.apply(lambda x: get_family_type(x.Family_size), axis = 1)
df = df.drop(columns=['Parch', 'SibSp'])
return df
num_transformer = Pipeline([('scaler', StandardScaler()),
('imputer',SimpleImputer(strategy='mean'))])
cat_transformer = Pipeline([('onehot', OneHotEncoder(handle_unknown='ignore'))])
col_transform = ColumnTransformer([
('cat', cat_transformer, make_column_selector(dtype_include=object)),
('num', num_transformer, make_column_selector(dtype_include=np.number))])
preprocessor = Pipeline([('Adder_features', Add_family(add_family=True)),
('transform', col_transform)])
data_f = preprocessor.fit_transform(X_train)
pd.DataFrame(data_f)
But when I try to train the model I get the following error
lr = Pipeline([('prep', preprocessor),
('clf', LogisticRegression())])
lr.fit(X_train, y_train)
TypeError: cannot unpack non-iterable NoneType object