0

I am having the following problem while trying to implement pipeline For the preprocessor, I want to combine adding a new column and processing all other columns. It works as it should

features = ['Pclass', 'Sex', 'Age', 'Parch', 'SibSp','Embarked']
target = ['Survived']
num_features = data[features].select_dtypes(include=['int64', 'float64']).columns
cat_features = data[features].select_dtypes(include=['object']).columns 

X_train = data[features]
y_train = data['Survived']

class Add_family(BaseEstimator, TransformerMixin):
  def __init__(self, add_family = True):
    self.ad_family = add_family
  def fit(self, X, y= None):
    return self
  def transform(self, X, y= None):
    df=pd.DataFrame(X).copy()
    if self.ad_family:
      df['Family_size'] = df.apply(lambda x: x.Parch + x.SibSp + 1, axis=1)
      def get_family_type(var):
        if var == 1:
          return 'alone'
        elif var<=4:
          return 'small'
        else:
          return 'big'
      df['FamilyType'] = df.apply(lambda x: get_family_type(x.Family_size), axis = 1)
      df = df.drop(columns=['Parch', 'SibSp'])
    return df 

num_transformer = Pipeline([('scaler', StandardScaler()),
                            ('imputer',SimpleImputer(strategy='mean'))])

cat_transformer = Pipeline([('onehot', OneHotEncoder(handle_unknown='ignore'))])

col_transform = ColumnTransformer([
        ('cat', cat_transformer, make_column_selector(dtype_include=object)),
        ('num', num_transformer, make_column_selector(dtype_include=np.number))])

preprocessor = Pipeline([('Adder_features', Add_family(add_family=True)),
                         ('transform', col_transform)])
data_f = preprocessor.fit_transform(X_train)
pd.DataFrame(data_f)

But when I try to train the model I get the following error

lr = Pipeline([('prep', preprocessor),
            ('clf', LogisticRegression())])
lr.fit(X_train, y_train)

TypeError: cannot unpack non-iterable NoneType object

enter image description here

0 Answers0