I've been trying to solve this error for some days now and I just can't manage to find the cause, I tried changing the cod from embedding_matrix[ i ] = model[vocab[i]] to embedding_matrix[ i ] = model.wv[vocab[i]] in which case I don't get the deprecated error but I still get the erorr with that line(25), can someone tell me what is wrong please
from gensim.models import Word2Vec
import re
vocab = []
for word in tokenizer.word_index:
vocab.append( word )
def tokenize( sentences ):
tokens_list = []
vocabulary = []
for sentence in sentences:
sentence = sentence.lower()
sentence = re.sub( '[^a-zA-Z]', ' ', sentence )
tokens = sentence.split()
vocabulary += tokens
tokens_list.append( tokens )
return tokens_list , vocabulary
p = tokenize( questions + answers )
model = Word2Vec( p[ 0 ] )
embedding_matrix = np.zeros( ( VOCAB_SIZE , 100 ) )
for i in range( len( tokenizer.word_index ) ):
embedding_matrix[ i ] = model[vocab[i]]
# encoder_input_data
tokenized_questions = tokenizer.texts_to_sequences( questions )
maxlen_questions = max( [ len(x) for x in tokenized_questions ] )
padded_questions = preprocessing.sequence.pad_sequences( tokenized_questions , maxlen=maxlen_questions , padding='post' )
encoder_input_data = np.array( padded_questions )
print( encoder_input_data.shape , maxlen_questions )
I am getting the following error:
/usr/local/lib/python3.6/dist-packages/ipykernel_launcher.py:25: DeprecationWarning: Call to deprecated `__getitem__` (Method will be removed in 4.0.0, use self.wv.__getitem__() instead).
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
<ipython-input-11-29183abd3d2d> in <module>()
23 embedding_matrix = np.zeros( ( VOCAB_SIZE , 100 ) )
24 for i in range( len( tokenizer.word_index ) ):
---> 25 embedding_matrix[ i ] = model[vocab[i]]
26
27 # encoder_input_data
Thank you in advance !