I have a dataframe , where there is text in 1st column and predefine aspect in another column however there is no aspects defined for few text ,for example row 2.
data = {
'text': [
"The camera quality of this phone is amazing.",
"The belt is poor quality",
"The battery life could be improved.",
"The display is sharp and vibrant.",
"The customer service was disappointing."
],
'aspects': [
["camera", "phone"],
[],
["battery", "life"],
["display"],
["customer service"]
]
}
df = pd.DataFrame(data)
I want to generate two things
- using pre define aspect for the text, generate sentiment score
- using text generate aspect and also the sentiment score from the package
Note: This package yangheng/deberta-v3-base-absa-v1.1
1)generate sentiment score based on predefine aspects
2)generate both aspect and it's respective sentiments
Note Row 2 does not have predefine aspect
I tried and getting error
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import pandas as pd
# Load the ABSA model and tokenizer
model_name = "yangheng/deberta-v3-base-absa-v1.1"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)
# Generate aspects and sentiments
aspects = []
sentiments = []
for index, row in df.iterrows():
text = row['text']
row_aspects = row['aspects']
aspect_sentiments = []
for aspect in row_aspects:
inputs = tokenizer(text, aspect, return_tensors="pt")
with torch.inference_mode():
outputs = model(**inputs)
predicted_sentiment = torch.argmax(outputs.logits).item()
sentiment_label = model.config.id2label[predicted_sentiment]
aspect_sentiments.append(f"{aspect}: {sentiment_label}")
aspects.append(row_aspects)
sentiments.append(aspect_sentiments)
# Add the generated aspects and sentiments to the DataFrame
df['generated_aspects'] = aspects
df['generated_sentiments'] = sentiments
# Print the updated DataFrame
print(df)
generic example to use the package
import torch
import torch.nn.functional as F
from transformers import AutoTokenizer, AutoModelForSequenceClassification
model_name = "yangheng/deberta-v3-base-absa-v1.1"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)
aspects = ["food", "service"]
text = "The food was great but the service was terrible."
sentiment_aspect = {}
for aspect in aspects:
inputs = tokenizer(text, aspect, return_tensors="pt")
with torch.inference_mode():
outputs = model(**inputs)
scores = F.softmax(outputs.logits[0], dim=-1)
label_id = torch.argmax(scores).item()
sentiment_aspect[aspect] = (model.config.id2label[label_id], scores[label_id].item())
print(sentiment_aspect)
Desired Output