I am trying to improve the result by the LSTM. in part of my project I did the following for RNN:
Below is a quick method used to train the models:
def threshold_search(y_true, y_proba, average = None):
best_threshold = 0
best_score = 0
for threshold in [i * 0.01 for i in range(100)]:
score = f1_score(y_true=y_true, y_pred=y_proba > threshold, average=average)
if score > best_score:
best_threshold = threshold
best_score = score
search_result = {'threshold': best_threshold, 'f1': best_score}
return search_result
def train(model,
X_train, y_train, X_test, y_test,
checkpoint_path='model.hdf5',
epcohs = 25,
batch_size = DEFAULT_BATCH_SIZE,
class_weights = None,
fit_verbose=2,
print_summary = True
):
m = model()
if print_summary:
print(m.summary())
m.fit(
X_train,
y_train,
#this is bad practice using test data for validation, in a real case would use a seperate validation set
validation_data=(X_test, y_test),
epochs=epcohs,
batch_size=batch_size,
class_weight=class_weights,
#saves the most accurate model, usually you would save the one with the lowest loss
callbacks= [
ModelCheckpoint(checkpoint_path, monitor='val_acc', verbose=1, save_best_only=True),
EarlyStopping(patience = 2)
],
verbose=fit_verbose
)
print("\n\n****************************\n\n")
print('Loading Best Model...')
m.load_weights(checkpoint_path)
predictions = m.predict(X_test, verbose=1)
print('Validation Loss:', log_loss(y_test, predictions))
print('Test Accuracy', (predictions.argmax(axis = 1) == y_test.argmax(axis = 1)).mean())
print('F1 Score:', f1_score(y_test.argmax(axis = 1), predictions.argmax(axis = 1), average='weighted'))
plot_confusion_matrix(y_test.argmax(axis = 1), predictions.argmax(axis = 1), classes=encoder.classes_)
plt.show()
return m #returns best performing model
and then I used the simple implementation of an LSTM. Where The layers are as follows:
- Embedding: Matrix of Word Vectors, where each vector store the "meaning" of the word. These can be trained on the fly or by existing pre-trained vector.
- LSTM: RNN that allows for the "building" of state over time
- Dense(64): Feed Forward Neural Network used to interpret the LSTM Output
- Dense(3): This it the output of the model, 3 nodes corresponding to each class. The softmax output will ensure that the sum of values = 1.0 for each output.
def model_1():
model = Sequential()
model.add(Embedding(input_dim = (len(tokenizer.word_counts) + 1), output_dim = 128, input_length = MAX_SEQ_LEN))
model.add(LSTM(128))
model.add(Dense(64, activation='relu'))
model.add(Dense(3, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
return model
m1 = train(model_1,
train_text_vec,
y_train,
test_text_vec,
y_test,
checkpoint_path='model_1.h5',
class_weights= model.any(cws))
But I got the following output and error:
As you can see in the screenshot, the error is:
ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()
Would you please help me to solve this error?