I am trying to Deploy my sentimental analysis model using Django but getting the following error.
views.py file
from django.shortcuts import render
from django.http import HttpResponse
from django.contrib.auth import authenticate
import pandas as pd
import matplotlib.pyplot as plt
import pickle
import re
import nltk
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
from string import punctuation
from nltk.corpus import stopwords
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.preprocessing import LabelEncoder
# Create your views here.
badfood=pickle.load(open('mymodel/pkl/BadFood.pickle','rb'))
def index(request):
return render(request,'mymodel/index.html')
def remove_non_ascii_1(text):
return ''.join(i for i in text if ord(i)<128)
def clean_text(input_str):
lemmatizer= WordNetLemmatizer()
input_str=input_str.lower()
remove_num=re.sub(r'\d+','',input_str)
remove_punc=remove_num.translate(str.maketrans("","",punctuation))
remove_white=remove_punc.strip()
stop_words=set(stopwords.words('english'))
tokens=word_tokenize(remove_white)
result=[i for i in tokens if not i in stop_words]
lemmatized_words=[lemmatizer.lemmatize(word) for word in result]
review=' '.join(lemmatized_words)
return review
def predict(request):
# Grabbing data from user
if request.method=="POST":
review=request.POST.get('review','')
city=request.POST.get('city','')
input_data=[{'review':review,'city':city}]
dataset=pd.DataFrame(input_data)
dataset = dataset.replace(r'\r',' ', regex=True)
dataset['review']=dataset['review'].apply(lambda x:remove_non_ascii_1(x))
dataset['review']=dataset['review'].apply(lambda x:clean_text(x))
# Bag of words
vectorizer=CountVectorizer()
features_data=
pd.DataFrame(vectorizer.fit_transform(dataset.review).toarray())
features_data.columns=vectorizer.get_feature_names()
features_data.insert(0,'city_x',dataset['city'])
# Label Encoding the city column
labelencoder=LabelEncoder()
features_data['city_x']=labelencoder.fit_transform(features_data.city_x)
features_data['city_x']=features_data['city_x'].astype('category')
regressor=badfood.predict(features_data)
return render(request,'mymodel/result.html',{'res':regressor})
ValueError at /predict/ Number of features of the model must match the input. Model n_features is 7397 and input n_features is 12
I have designed a user interface in which I am giving a text box field in which the user can enter the review and a dropdown list from which user can select the city. The above is my views.py file which is containing all the necessary code for prediction. Now how can I make my input dataframe features equal to the model features that is 7397 on which is was trained because every time different review will be given and all the columns formed after word tokenization will be different.I donot know what columns will be formed from the words.My aim is to create the exactly the same dataframe from the users input on which it was trained so that it can make the right prediction.The indentation of the code is disturbed while pasting the code Please Do not consider it as error.
Thank you very Much