I am trying to run sentiment analysis on a dataset of millions of tweets on the server. I am calling a API prediction function that takes a list of 100 tweets and iterate over the test of each tweet to return the huggingface sentiment value, and writes that sentiment to a solr database. However, after the process of few hundred tweets, I get the below error, any suggestions?
API code:
from transformers import pipeline
model = pipeline(task = 'sentiment-analysis',model="finiteautomata/bertweet-base-sentiment-analysis")
# huggingface sentiment analyser
def huggingface_sent(sentence):
text=preprocess(sentence)
if (len(text)>0):
predicted_dic = {'NEG': 'Negative','NEU':'Neutral', 'POS':'Positive'}
return predicted_dic[model(text)[0]['label']]
else:
return 'Neutral'
def predict_list(tweets):
print('Data Processing\n')
predictions={}
for t_id in tweets.keys():
if(tweets[t_id]['language']=='en'):
predictions[t_id] = huggingface_sent(str(tweets[t_id]['full_text']))
else:
predictions[t_id]='NoneEnglish'
print('processed ', len(tweets.keys()))
print('\n first element is ', predictions[t_id])
return predictions
print('Running analyser ....\n')
Error log:
Token indices sequence length is longer than the specified maximum sequence length for this model (211 > 128). Running this sequence through the model will result in indexing errors [2021-11-01 12:24:20,649] ERROR in app: Exception on /api/predict [POST] Traceback (most recent call last): File "/myusername/anaconda3/lib/python3.8/site-packages/flask/app.py", line 2447, in wsgi_app response = self.full_dispatch_request() File "/myusername/anaconda3/lib/python3.8/site-packages/flask/app.py", line 1952, in full_dispatch_request rv = self.handle_user_exception(e) File "/myusername/anaconda3/lib/python3.8/site-packages/flask/app.py", line 1821, in handle_user_exception reraise(exc_type, exc_value, tb) File "/myusername/anaconda3/lib/python3.8/site-packages/flask/_compat.py", line 39, in reraise raise value File "/myusername/anaconda3/lib/python3.8/site-packages/flask/app.py", line 1950, in full_dispatch_request rv = self.dispatch_request() File "/myusername/anaconda3/lib/python3.8/site-packages/flask/app.py", line 1936, in dispatch_request return self.view_functionsrule.endpoint File "/mnt/raid1/diil/sentiment_api/analyser_main.py", line 11, in api_predict_list predictions = predict_list(tweets) File "/mnt/raid1/diil/sentiment_api/analyser_core.py", line 84, in predict_list predictions[t_id] = huggingface_sent(str(tweets[t_id]['full_text'])) File "/mnt/raid1/diil/sentiment_api/analyser_core.py", line 70, in huggingface_sent if model(text): File "/myusername/anaconda3/lib/python3.8/site-packages/transformers/pipelines/text_classification.py", line 126, in call return super().call(*args, **kwargs) File "/myusername/anaconda3/lib/python3.8/site-packages/transformers/pipelines/base.py", line 915, in call return self.run_single(inputs, preprocess_params, forward_params, postprocess_params) File "/myusername/anaconda3/lib/python3.8/site-packages/transformers/pipelines/text_classification.py", line 172, in run_single return [super().run_single(inputs, preprocess_params, forward_params, postprocess_params)] File "/myusername/anaconda3/lib/python3.8/site-packages/transformers/pipelines/base.py", line 922, in run_single model_outputs = self.forward(model_inputs, **forward_params) File "/myusername/anaconda3/lib/python3.8/site-packages/transformers/pipelines/base.py", line 871, in forward model_outputs = self._forward(model_inputs, **forward_params) File "/myusername/anaconda3/lib/python3.8/site-packages/transformers/pipelines/text_classification.py", line 133, in _forward return self.model(**model_inputs) File "/myusername/anaconda3/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1051, in _call_impl return forward_call(*input, **kwargs) File "/myusername/anaconda3/lib/python3.8/site-packages/transformers/models/roberta/modeling_roberta.py", line 1198, in forward outputs = self.roberta( File "/myusername/anaconda3/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1051, in _call_impl return forward_call(*input, **kwargs) File "/myusername/anaconda3/lib/python3.8/site-packages/transformers/models/roberta/modeling_roberta.py", line 841, in forward embedding_output = self.embeddings( File "/myusername/anaconda3/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1051, in _call_impl return forward_call(*input, **kwargs) File "/myusername/anaconda3/lib/python3.8/site-packages/transformers/models/roberta/modeling_roberta.py", line 136, in forward position_embeddings = self.position_embeddings(position_ids) File "/myusername/anaconda3/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1051, in _call_impl return forward_call(*input, **kwargs) File "/myusername/anaconda3/lib/python3.8/site-packages/tousername/anaconda3/lib/python3.8/site-packages/torch/nn/functional.py", line 2043, in embedding return torch.embedding(weight, input, padding_idx, scale_grad_by_freq, sparse) IndexError: index out of range in selfusername/anaconda3/lib/python3.8/site-packages/torch/nn/functional.py", line 2043, in embedding return torch.embedding(weight, input, padding_idx, scale_grad_by_freq, sparse) IndexError: index out of range in self