I applied predict_proba on classfication problem. I have some experience with building classification models in R but it's my first time with Python's sklearn.
So the the problem is: In sklearn after fitting I can't find a way to access probabilities. Is it possible? There is a method predict_proba(), but...as the name suggests, it is prediction. This is my code:
import pandas as pd
from sklearn.svm import SVC
from sklearn.svm import LinearSVC
import pickle
from nltk.tokenize import sent_tokenize
from Model import SkripsiPipeline
def konten(kata, model):
item = []
loaded_model = pickle.load(open(model, 'rb'))
for v in kata.itertuples(index = False):
sentiment = []
variabel1 = v[0]
variabel2 = v[1]
kalimat = variabel1 + variabel2
hasil_tokenize = sent_tokenize(kalimat)
preds = loaded_model.predict(hasil_tokenize)
if preds == 1:
proba = loaded_model.predict_proba(hasil_tokenize)
proba = proba.reshape(-1, 1).tolist()
sentiment.append('Positif')
sentiment.append(proba[0])
elif preds == 0:
proba = loaded_model.predict_proba(hasil_tokenize)
proba = proba.reshape(-1, 1).tolist()
sentiment.append('Netral')
sentiment.append(proba[1])
elif preds == -1:
proba = loaded_model.predict_proba(hasil_tokenize)
proba = proba.reshape(-1,1).tolist()
sentiment.append('Negatif')
sentiment.append(proba[2])
item.append(sentiment)
return item
but I got this error :
AttributeError: 'SkripsiPipeline' object has no attribute 'predict_proba'
and this is the SkripsiPipeline code :
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.model_selection import KFold
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import pickle
class SkripsiPipeline():
def __init__ (self, predictor):
self.predictor = predictor
def fit(self,X,y):
vectorizer = CountVectorizer()
tfidf_transformer = TfidfTransformer()
svm_predictor = self.predictor
X = vectorizer.fit_transform(X)
X = tfidf_transformer.fit_transform(X)
svm_predictor.fit(X,y)
self.vectorizer = vectorizer
self.tfidf_transformer = tfidf_transformer
self.svm_predictor = svm_predictor
def predict (self, X):
X = self.vectorizer.transform(X)
X = self.tfidf_transformer.transform(X)
prediction = self.svm_predictor.predict(X)
return prediction
I am new to Python Sklearn package. Can anyone tell me what is wrong with my Python code. I've googled it, but could not understand properly.