hello my programmer friends... i'm doing my first NLP project that counts and shows 5 documents TFIDF. here's part of the code:
def IDF(corpus , unique_words):
idf_dict = {}
N = len(corpus)
for i in unique_words:
count = 0
for sen in corpus:
if i in sen.split():
count = count+1
idf_dict[i] = (math.log((1 + N) / (count+1))) + 1
return idf_dict
def fit(whole_data):
unique_words = set()
if isinstance(whole_data, (list,)):
for x in whole_data:
for y in x.split():
if len(y)<2:
continue
unique_words.add(y)
unique_words = sorted(list(unique_words))
vocab = {j:i for i,j in enumerate(unique_words)}
Idf_values_of_all_unique_words = IDF(whole_data,unique_words)
return vocab, Idf_values_of_all_unique_words
vocabulary, idf_of_vocabulary = fit(corpus)
The word IDF in line 22 gives me a NameError. is it about positioning?