i have recently used scikit-learn for sentiment analysis, so after i have trained my labeled data then tried to test them on unlabeled set of data, this error comes up 'ValueError: Can't handle mix of continuous-multioutput and binary'
i think that what i am doing wrong is that i give the (y_pred) the wrong assumption.
the error comes from this: accuracy = classifier.score(test_matrix,ALL_test)
but when i change the ALL_test to ALL_train (trained and labeled data) it brings the accuracy of 0.971251409245; which is absolutely wrong
what should i do?
# -*- coding:utf-8 -*-
import sklearn.cross_validation
import sklearn.feature_extraction.text
import sklearn.metrics
import sklearn.naive_bayes
from sklearn import svm
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score, precision_score, recall_score
name = ['Tweet','Label']
name2 =['Tweet','Label']
data_train = pd.read_table('unstemmedtrain.csv',sep = ';',names = name)
data_test = pd.read_table('unstemmedtest.csv',names=name2)
train_data =pd.DataFrame(data_test,columns=name2)
test_data=pd.DataFrame(data_train,columns=name)
vectorizer = sklearn.feature_extraction.text.CountVectorizer()
train_matrix = vectorizer.fit_transform(train_data['Tweet'])
test_matrix = vectorizer.transform(test_data['Tweet'])
#print train_matrix
positive_train = (train_data['Label']=='positive')
negative_train= (train_data['Label']=='negative')
neutral_train=(train_data['Label']=='neutral')
#print negative_cases_train
ALL_train = positive_train +negative_train +neutral_train
#print positive_cases_train
ALL_test = (test_data['Tweet'])
positive_test =(test_data['Label']=='positive')
negative_test =(test_data['Label']=='negative')
neutral_test = (test_data['Label']=='neutral')
ALL_Test = positive_test + negative_test + neutral_test
#print positive_cases_test
classifier=sklearn.naive_bayes.MultinomialNB()
classifier2 = classifier.fit(train_matrix,ALL_train)
p_sentiment = classifier.predict(test_matrix)
p_prob = classifier.predict_proba(test_matrix)
#print predicted_prob
accuracy = classifier.score(test_matrix,ALL_test)
print accuracy