I am implementing some code to generate labeled data for Natural Language Understanding (NLU) from the article "Labeled Data Generation with Encoder-decoder LSTM for Semantic Slot Filling" (https://pdfs.semanticscholar.org/7ffe/83d7dd3a474e15ccc2aef412009f100a5802.pdf). My architecture is a simple encoder-decoder LSTM, but since my generated sentences (for words and labels) are not correct, I am trying to generate exactly the same sentence (only words) I give as input. Unfortunately, this is not working correctly.
I am using a vord2vec for word-embedding and the dimension of the embeddings is set to 64 (as suggested in the article). The encoder LSTM is receiving the sequence in reversed order and with a dropout rate of 0.5. The decoder LSTM also has a dropout rate of 0.5 and a softmax layer for each output of the sequence to map the most probable word. The inputs are exactly the same that the targets (same sentences) since first I want to produce exactly the same sentence.
For training, I used Adam optimizer and categorical_crossentropy for loss. For inference, I used a beam search (B=3) when generating sequences.
My training code:
def pretrained_embedding_layer(emb):
vocab_len = len(emb)
emb_dim = len(emb[0])
emb_layer = Embedding(vocab_len, emb_dim, trainable = False)
emb_layer.build((None,))
emb_layer.set_weights([emb])
return emb_layer
LSTM_encoder = LSTM(1024, dropout=0.5, return_state=True, go_backwards=True, name='lstm_encoder')
LSTM_decoder = LSTM(1024, dropout=0.5, return_sequences=True, return_state=True, name='lstm_decoder')
dense_w = Dense(vocab_w_size, activation='softmax', name="word_output")
K.set_learning_phase(1)
def model1_enc_dec(input_shape, w_emb):
words_indices = Input(shape=input_shape, dtype='int32')
wemb_layer = pretrained_embedding_layer(w_emb)
wemb = wemb_layer(words_indices)
enc_out, enc_state_h, enc_state_c = LSTM_encoder(wemb)
encoder_states = [enc_state_h, enc_state_c]
dec_out, dec_state_h, dec_state_c = LSTM_decoder(wemb,
initial_state=encoder_states)
dec_out = dense_w(dec_out)
model1 = Model(inputs=[words_indices], outputs=[dec_out])
return model1
model = model1_enc_dec((maxlen,), w_emb, s_emb)
model.summary()
model.compile(loss="categorical_crossentropy", optimizer='adam', metrics=['accuracy'])
model.fit(train_w, train_lab_w, validation_data=(val_w, val_lab_w), epochs=epochs, verbose=1, shuffle=True)
My inference code:
wemb_layer = Embedding(len(w_emb), len(w_emb[0]), trainable=False)
wemb_layer.build((None,))
LSTM_encoder = LSTM(1024, return_state=True, go_backwards=True, name='lstm_encoder')
LSTM_decoder = LSTM(1024, return_sequences=True, return_state=True, name='lstm_decoder')
dense_w = Dense(vocab_w_size, activation='softmax', name="word_output")
def target_model(input_shape):
words_indices = Input(shape=input_shape, dtype='int32')
wemb = wemb_layer(words_indices)
enc_out, enc_state_h, enc_state_c = LSTM_encoder(wemb)
encoder_states = [enc_state_h, enc_state_c]
dec_out, dec_state_h, dec_state_c = LSTM_decoder(wemb,
initial_state=encoder_states)
dec_out = dense_w(dec_out)
model = Model(inputs=[words_indices], outputs=[dec_out])
return model
target_model = target_model((maxlen,))
wemb_layer.set_weights(model1.layers[1].get_weights()) # layer 0: input
LSTM_encoder.set_weights(model1.layers[2].get_weights())
LSTM_decoder.set_weights(model1.layers[3].get_weights())
dense_w.set_weights(model1.layers[4].get_weights())
def model1_enco_infe(input_shape):
words_indices = Input(shape=input_shape, dtype='int32')
wemb = wemb_layer(words_indices)
enc_out, enc_state_h, enc_state_c = LSTM_encoder(wemb)
encoder_model = Model(inputs=[words_indices], outputs=[enc_state_h,
enc_state_c])
return encoder_model
def model1_deco_infe(input_shape):
dec_word_input = Input(shape=input_shape, dtype='int32')
dec_state_input_h = Input(shape=(1024,))
dec_state_input_c = Input(shape=(1024,))
wemb = wemb_layer(dec_word_input)
dec_states_input = [dec_state_input_h, dec_state_input_c]
dec_out, dec_state_h, dec_state_c = LSTM_decoder(wemb,
initial_state=dec_states_input)
dec_states_output = [dec_state_h, dec_state_c]
deco_out = dense_w(dec_out)
decoder_model = Model(inputs=[dec_word_input] + dec_states_input, outputs=
[deco_out] + dec_states_output)
return decoder_model
encoder_model = model1_enco_infe((maxlen,))
decoder_model = model1_deco_infe((1,))
def beamsearch_B(deco_w_out, beam):
words_index = []
dw = deco_w_out.copy()
for i in range(beam):
word_index = np.argmax(dw, axis=-1)
dw[0][0][word_index[0][0]] = 0
words_index.append(word_index[0][0])
return words_index
def generate_model1_add(word_seq, encoder_model, decoder_model, dec_word_input, id2word, beam):
[enc_state_h, enc_state_c] = encoder_model.predict(word_seq)
states = [enc_state_h, enc_state_c]
word_sentence = ''
probs_word = []
word_sentences = []
dec_word_inputs = []
states_beam = []
stop_condition = False
[dec_w_out, dec_state_h, dec_state_c] =
decoder_model.predict([dec_word_input] + states)
words_index, _ = beamsearch_B(dec_w_out, [], beam)
for i in range(beam):
probs_word.append(-log(dec_w_out[0][0][words_index[i]]))
word_sentences.append(id2word[words_index[i]])
dec_word_inputs.append([words_index[i]])
states_beam.append([dec_state_h, dec_state_c])
n_words = 1
endgame = []
while not stop_condition:
words_indexes, words_sentences, probs_words, states_b = [], [],
[], []
for k in range(beam):
[dec_w_out, dec_state_h, dec_state_c] =
decoder_model.predict([dec_word_inputs[k]] + states_beam[k])
words_index, _ = beamsearch_B(dec_w_out, [], beam)
states = [dec_state_h, dec_state_c]
for j in range(beam):
words_indexes.append(words_index[j])
probs_words.append(probs_word[k] * -log(dec_w_out[0][0]
[words_index[j]]) + 1e-7)
words_sentences.append(word_sentences[k] + ' ' +
id2word[words_index[j]])
states_b.append(states)
probs = []
for i in range(len(probs_words)):
probs.append(1 / (probs_words[i]))
indexes = []
for i in range(beam):
index = np.argmax(probs, axis=-1)
probs[index] = 0
indexes.append(index)
for i in range(beam):
probs_word[i] = probs_words[indexes[i]]
word_sentences[i] = words_sentences[indexes[i]]
dec_word_inputs[i] = [words_indexes[indexes[i]]]
states_beam[i] = states_b[indexes[i]]
if (id2word[words_indexes[indexes[i]]] == 'EOS'):
endgame.append(i)
if len(endgame) == 1:
word_sentence = word_sentences[endgame]
stop_condition = True
elif len(endgame) > 1:
word_sentence = word_sentences[np.min(endgame)]
stop_condition = True
n_words += 1
if n_words > 50:
word_sentence = word_sentences[0]
stop_condition = True
return word_sentence
word_sentence = generate_model1_add(np.reshape(train_w[i], (1, maxlen)),
encoder_model, 0, decoder_model, [w2i['BOS']], i2w, 3)
An example of my generated sequences:
Input sentence: BOS i 'm fourth in flying from boston to atlanta EOS PAD PAD PAD ... Generated sentence: BOS from from from from from from from from from from from from from from from from from from from ...
It seems that the training weights are not correct but I got loss: 0.0032 - acc: 0.9990 - val_loss: 0.0794 - val_acc: 0.9888 during training.
What I want is just to generate exactly the same sentence of the input. Hope you can help me guys. Thank you in advance!