import codes, i've tried importing precision and all the library i still can't get it work
import nltk
import string
import pandas as pd
import nltk.metrics
import collections
from nltk import precision
from nltk.metrics.scores import (accuracy, precision, recall, f_measure,log_likelihood, approxrand)
from nltk import precision
from nltk.tag import pos_tag
from nltk.tokenize import word_tokenize
from nltk.tokenize import sent_tokenize
from nltk.stem.wordnet import WordNetLemmatizer
from nltk import classify
from nltk.corpus import stopwords
from nltk import NaiveBayesClassifier
from random import shuffle
from nltk.metrics.scores import (precision, recall)
the codes below are the codes i use to implement the codes
path = os.path.join('c:' + os.sep, 'Users', 'User', 'Documents', 'Reviews_Labeled.csv')
df = pd.read_csv(path)
positive = []
negative = []
for i in range(0,df.shape[0]):
if df.iloc[i][1] == 'Positive':
positive.append(df.iloc[i][0])
else:
negative.append(df.iloc[i][0])
positive_tokens = []
for i in positive:
positive_tokens.append(word_tokenize(i))
negative_tokens = []
for i in negative:
negative_tokens.append(word_tokenize(i))
def is_clean(word: str):
if word in string.punctuation:
return False
if word.isnumeric():
return False
if word in stopwords.words('english'):
return False
return True
def clean_tokens(tokens: list):
return [word.lower() for word in tokens if is_clean(word)]
positive_tokens_cleaned = [clean_tokens(tokens) for tokens in positive_tokens]
negative_tokens_cleaned = [clean_tokens(tokens) for tokens in negative_tokens]
lemmatizer = WordNetLemmatizer()
def lemmatize(word: str, tag: str):
if tag.startswith('NN'):
pos = 'n'
elif tag.startswith('VB'):
pos = 'v'
else:
pos = 'a'
return lemmatizer.lemmatize(word, pos)
def lemmatize_tokens(tokens: list):
return [lemmatize(word, tag) for word, tag in pos_tag(tokens)]
positive_tokens_normalized = [lemmatize_tokens(tokens) for tokens in positive_tokens_cleaned]
negative_tokens_normalized = [lemmatize_tokens(tokens) for tokens in negative_tokens_cleaned]
dataset = positive_dataset + negative_dataset
shuffle(dataset)
train_ds = dataset[:5126]
test_ds = dataset[5126:]
classifier = NaiveBayesClassifier.train(train_ds)
classify.accuracy(classifier, test_ds)
when i'm searching other people question this is the way people call out the precision value and other metrics value
refsets = collections.defaultdict(set)
testsets = collections.defaultdict(set)
for i, (feats, label) in enumerate(test_ds):
refsets[label].add(i)
observed = classifier.classify(feats)
testsets[observed].add(i)
this is the code i use to try and print the precision value
print(nltk.metrics.scores.precision(refsets['pos'],testsets['pos']))
this is the error i got
print(nltk.metrics.precision(refsets['pos'],testsets['pos']))
this also give the same error
so how do i solve the error message what did i do wrong?