I am training a BiLSTM CRF model and facing this error on all platform like jupyter notebook, colab, pycharm and even vscode in ubuntu 22.04.
MemoryError Traceback (most recent call last)
Cell In[37], line 2
1 if __name__ == '__main__':
----> 2 main()
Cell In[34], line 50, in main()
47 test_data = pd.read_csv(test_path, encoding='utf-8').dropna()
49 # Get dataset
---> 50 train_dataset = CTIDatasetForBiLSTMCRF(sentences=train_data['Sentences'].values,
51 tags=train_data['Tags'].values,
52 sequence_len=SEQ_LEN,
53 transforms=Data2Idx,
54 target_transforms=Data2Idx)
55 test_dataset = CTIDatasetForBiLSTMCRF(sentences=test_data['Sentences'].values,
56 tags=test_data['Tags'].values,
57 sequence_len=SEQ_LEN,
58 transforms=Data2Idx,
59 target_transforms=Data2Idx)
61 # Get dataloader
File ~threat intel/IOC-Detect/utils/datasets.py:26, in CTIDatasetForBiLSTMCRF.__init__(self, sentences, tags, sequence_len, transforms, target_transforms)
23 self.vocab_size, self.vocab2idx, self.idx2vocab = self.__get_vocabs(sentences, True)
24 self.tag_size, self.tag2idx, self.idx2tag = self.__get_vocabs(tags)
---> 26 self.sentences, self.tags = self.__data_preprocess(sentences, tags, sequence_len)
28 self.transforms = transforms(self.vocab2idx) if transforms else None
29 self.target_transforms = target_transforms(self.tag2idx) if target_transforms else None
File /threat intel//IOC-Detect/utils/datasets.py:49, in CTIDatasetForBiLSTMCRF.__data_preprocess(self, sentences, tags, sequence_len)
48 def __data_preprocess(self, sentences: np.array, tags: np.array, sequence_len: int) -> tuple[np.array, np.array]:
---> 49 sentences = self.__padding(self.__data_std(sentences, True), sequence_len)
50 tags = self.__padding(self.__data_std(tags), sequence_len)
52 return sentences, tags
File IOC-Detect/utils/datasets.py:70, in CTIDatasetForBiLSTMCRF.__padding(self, datas, sequence_len)
67 data = np.append(data, ['<PAD>'] * (sequence_len - len(data)))
68 outputs.extend(data[i:i+sequence_len] for i in range(len(data)-sequence_len+1))
---> 70 return np.array(outputs)
used these libraries : import os import time
import numpy as np import pandas as pd from tqdm import tqdm
import torch from torch import nn, optim from torch.utils.data import DataLoader
from model.bilstm_crf import BiLSTM_CRF from utils.datasets import CTIDatasetForBiLSTMCRFWithNgram, CTIDatasetForBiLSTMCRF, Data2Idx
when i reduce array size from 21714 to 1000 then problem solved but new problem created getting this error
ZeroDivisionError Traceback (most
call last)
Cell In[7], line 2
1 if name == 'main':
----> 2 main()
---> 81 train(model=model,
---> 31 print(f'Epoch: {epoch}, Train Loss: {train_loss /
ZeroDivisionError: division by zero