0

i don't know how to use LabelEncoder in pipeline! i repeatedly get an error

OK! this is my code: import pandas as pd import numpy as np from sklearn.model_selection import train_test_split

bank = pd.read_csv('bankmarketing.csv')

bank.columns = bank.columns.astype('str') bank.columns = bank.columns.str.strip()

train_set, test_set = train_test_split(bank, test_size = 0.3, random_state = 100) train_set.shape

from sklearn.pipeline import Pipeline
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.pipeline import FeatureUnion




class DataFrameSelector(BaseEstimator, TransformerMixin):
    def __init__(self, attribute_names):
        self.attribute_names = attribute_names
    def fit(self, X, y=None):
        return self
    def transform(self, X):
        return X[self.attribute_names].values




df = train_set.copy()
df_label = df['y'].copy()
df = df.drop('y', axis = 1)

df_num_needless = df[['age', 'day', 'compaign', 'previous']].copy()
df_num = df.drop(['age', 'day', 'compaign', 'previous'], axis = 1)
df_num = df[['balance', 'duration', 'pdays']].copy()
df_cat = df[['job', 'marital', 'education', 'default', 'housing', 'loan', 'contact', 'month', 'poutcome']].copy()




df_cat1 = df['job']
df_cat2 = df['marital']
df_cat3 = df['education']
df_cat4 = df['default']
df_cat5 = df['housing']
df_cat6 = df['loan']
df_cat7 = df['contact']
df_cat8 = df['month']
df_cat9 = df['poutcome']


num_attrs = list(df_num)

cat_attr1 = list(df_cat1)
cat_attr2 = list(df_cat2)
cat_attr3 = list(df_cat3)
cat_attr4 = list(df_cat4)
cat_attr5 = list(df_cat5)
cat_attr6 = list(df_cat6)
cat_attr7 = list(df_cat7)
cat_attr8 = list(df_cat8)
cat_attr9 = list(df_cat9)



num_pipeline = Pipeline([
    ('selector', DataFrameSelector(num_attrs)),
    ('scaler', StandardScaler())
    ])

*: i got error when i wanted to use LabelEncoder so i had to make different pipeline for categorical data:

cat_pipeline1 = Pipeline([
    ('selector', DataFrameSelector(cat_attr1)),
    ('encoder', LabelEncoder())
    ])


cat_pipeline2 = Pipeline([
    ('selector', DataFrameSelector(cat_attr2)),
    ('encoder', LabelEncoder())
    ])



cat_pipeline3 = Pipeline([
    ('selector', DataFrameSelector(cat_attr3)),
    ('encoder', LabelEncoder())
    ])




cat_pipeline4 = Pipeline([
    ('selector', DataFrameSelector(cat_attr4)),
    ('encoder', LabelEncoder())
    ])


cat_pipeline5 = Pipeline([
    ('selector', DataFrameSelector(cat_attr5)),
    ('encoder', LabelEncoder())
    ])



cat_pipeline6 = Pipeline([
    ('selector', DataFrameSelector(cat_attr6)),
    ('encoder', LabelEncoder())
    ])


cat_pipeline7 = Pipeline([
    ('selector', DataFrameSelector(cat_attr7)),
    ('encoder', LabelEncoder())
    ])



cat_pipeline8 = Pipeline([
    ('selector', DataFrameSelector(cat_attr8)),
    ('encoder', LabelEncoder())
    ])


cat_pipeline9 = Pipeline([
    ('selector', DataFrameSelector(cat_attr9)),
    ('encoder', LabelEncoder())
    ])



*: but now i don't know how can i use these pipelines????



full_pipeline = FeatureUnion(transformer_list = [
    ('num_pipeline', num_pipeline),
    ('cat_pipeline', ????????????)
    ])






bank_prepared_df = pd.DataFrame(full_pipeline.fit_transform(df), columns = ['age', 'job', 'marital', 'education', 'default', 'balance',
                                                            'housing', 'loan', 'contact', 'day', 'month', 'duration',
                                                            'compaign', 'pdays', 'previous', 'poutcome'])
bank_prepared_df.head()

0 Answers0