I am trying to build a hierarchical BiLSTM-CRF model for a multi class classification problem. I am using CRFModel from Keras_CRF. Here is the code for the model:
from keras_crf import CRFModel
input_size=max_sen_length
embedding_size=100
lstm_size=128
learn_rate=0.01
drop_out=0.1
output_size=len(unique_tag_set)
#-----------------------------------Model -----------------------------------------
current_input=Input(shape=(sequence_length,input_size,))
embeddings = Embedding(vocab_size, embedding_size, weights=[embedding_matrix],input_length=max_sen_length, name='current_embed',trainable=False,dtype=tf.float32)(current_input)
hidden_vectors=TimeDistributed(Bidirectional(LSTM(units=lstm_size, return_sequences=False)))(embeddings )
final_vectors=Bidirectional(LSTM(units=lstm_size, return_sequences=True, ))(hidden_vectors )
base_model = Model(inputs=current_input, outputs=final_vectors)
model = CRFModel(base_model, output_size)
opt = tf.keras.optimizers.Adam(learning_rate=learn_rate)
model.compile(optimizer=opt,metrics=['acc'])
#model.compile(optimizer=opt, loss=crf.loss_function, metrics=[crf.accuracy])
print('-------Building a Conversational level DA tagger for longer context--------------')
print(model.summary())
early_stopping_cb=tf.keras.callbacks.EarlyStopping(monitor='val_loss',patience=3,restore_best_weights=True)
history=model.fit(np.array(x_train_split),np.array(y_train_numb),
epochs=15,batch_size=16,
shuffle = False, verbose = 1,
#validation_data=[x_valid,y_valid],
validation_split=0.2,
sample_weight=np.array(sequence_sample_weight),
callbacks=[early_stopping_cb])
And I am getting this error:
Traceback (most recent call last):
File "/homes/sn313/mLSTM/preprocess/DA_taggers/conversational_tagger_CRF.py", line 501, in <module>
history=model.fit(np.array(x_train_split),np.array(y_train_numb),
File "/homes/sn313/.conda/envs/qm-gpu/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py", line 1100, in fit
tmp_logs = self.train_function(iterator)
File "/homes/sn313/.conda/envs/qm-gpu/lib/python3.8/site-packages/tensorflow/python/eager/def_function.py", line 828, in __call__
result = self._call(*args, **kwds)
File "/homes/sn313/.conda/envs/qm-gpu/lib/python3.8/site-packages/tensorflow/python/eager/def_function.py", line 888, in _call
return self._stateless_fn(*args, **kwds)
File "/homes/sn313/.conda/envs/qm-gpu/lib/python3.8/site-packages/tensorflow/python/eager/function.py", line 2942, in __call__
return graph_function._call_flat(
File "/homes/sn313/.conda/envs/qm-gpu/lib/python3.8/site-packages/tensorflow/python/eager/function.py", line 1918, in _call_flat
return self._build_call_outputs(self._inference_function.call(
File "/homes/sn313/.conda/envs/qm-gpu/lib/python3.8/site-packages/tensorflow/python/eager/function.py", line 555, in call
outputs = execute.execute(
File "/homes/sn313/.conda/envs/qm-gpu/lib/python3.8/site-packages/tensorflow/python/eager/execute.py", line 59, in quick_execute
tensors = pywrap_tfe.TFE_Py_Execute(ctx._handle, device_name, op_name,
tensorflow.python.framework.errors_impl.InvalidArgumentError: Incompatible shapes: [16,3] vs. [16]
[[node mul (defined at /.conda/envs/qm-gpu/lib/python3.8/site-packages/keras_crf/crf_model.py:51) ]] [Op:__inference_train_function_850088]
Errors may have originated from an input operation.
Input Source operations connected to node mul:
IteratorGetNext (defined at /mLSTM/preprocess/DA_taggers/conversational_tagger_CRF.py:501)
Neg (defined at /.conda/envs/qm-gpu/lib/python3.8/site-packages/keras_crf/crf_model.py:49)
Function call stack:
train_function
Can someone help me in solving this issue?