I'm trying to use classifierChain for a multilabel classification problem and following this tutorial:
from pmlb import fetch_data
from sklearn.multioutput import ClassifierChain
from sklearn.model_selection import train_test_split
from sklearn.multiclass import OneVsRestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import seaborn as sns
# load dataset and descriptive statistics
dataset_Name = 'yeast'; dataset = fetch_data(dataset_Name)
print(); print(dataset.head())
print(); print(dataset.columns)
cols = ['mcg', 'gvh', 'alm', 'mit', 'erl', 'pox', 'vac', 'nuc']
print(); print(dataset[cols].info())
print(); print(dataset[cols].describe())
print(); print(dataset[cols].corr())
# load features and target from dataset
X, y = fetch_data(dataset_Name, return_X_y=True)
# Split Train and Test Datasets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
chains = [ClassifierChain(LogisticRegression(), order=[1,0,2,4,3,5,6,7,8], random_state=i)
for i in range(9)]
print(chains)
for chain in chains:
chain.fit(X_train, Y_train)
I' m getting the error: 'tuple index out of range'. Can any one guide me about this error as I'm unable to understand this error? Full trace of error is below:
IndexError Traceback (most recent call last)
<ipython-input-41-d020752b05d2> in <module>
5 print(chains)
6 for chain in chains:
----> 7 chain.fit(X_train, Y_train)
~\Anaconda3\envs\tensorflow\lib\site-packages\sklearn\multioutput.py fit (self, X, Y)
465 if self.order_ == 'random':
466 self.order_ = random_state.permutation(Y.shape[1])
--> 467 elif sorted(self.order_) != list(range(Y.shape[1])):
468 raise ValueError("invalid order")
469
IndexError: tuple index out of range