I'm new to Python and I'm having trouble with a function that has been discussed many times already, most recently here: Extract Topic Scores for Documents LDA Gensim Python problem of sorting tuples
I've done what's suggested in the answer:
def format_topics_sentences(ldamodel=lda_model, corpus=corpus, texts=data1):
# Init output
final = []
# Get main topic in each document
for i, row_list in enumerate(ldamodel[corpus]):
row = row_list[0] if ldamodel.per_word_topics else row_list
row = sorted(row, key=lambda x: (x[1]),reverse=True)
# Get the Dominant topic, Perc Contribution and Keywords for each document
for j, (topic_num, prop_topic) in enumerate(row):
if j == 0: # => dominant topic
wp = ldamodel.show_topic(topic_num)
topic_keywords = ", ".join([word for word, prop in wp])
lists1 = int(topic_num), round(prop_topic,4),topic_keywords
final.append(lists1)
else:
break
sent_topics_df = pd.DataFrame(final, columns=['Dominant_Topic', 'Perc_Contribution', 'Topic_Keywords'])
contents = pd.Series(texts)
sent_topics_df = pd.concat([sent_topics_df,contents], axis=1)
return(sent_topics_df)
df_topic_sents_keywords = format_topics_sentences(ldamodel=optimal_model, corpus=corpus, texts=texts)
# Format
df_dominant_topic = df_topic_sents_keywords.reset_index()
df_dominant_topic.columns = ['Document_No', 'Dominant_Topic', 'Topic_Perc_Contrib', 'Keywords', 'Text']
# Show
df_dominant_topic.head(10)
but I get AttributeError: 'LdaMallet' object has no attribute 'per_word_topics'
Can someone suggest a correction to the code?