slot-filling intent-detection joint model

Question

Hi everybody i have developed two RNN models for a chatbot.Let's say that user says:"Tell me how the weather will be tomorrow in Paris". The first model will be able to recognize the user's intent WEATHER_INFO , while the second one will be able to extract meaningful information from the phrase like LOC:Paris and DATE:tomorrow Of course there are many other intent categories like for example MUSIC_PLAY and so on. Since the two model are not linked in any way for the same phrase we could have for example the results intent:WEATHER_INFO and TITLE:Paris , where the slot TITLE is instead linked to the MUSIC_PLAY intention Many researchers tried to improve performance creating a joint model where the two models use the other one in order to avoid this kind of error here my code for the slot filling nn

from keras.models import Model, Input
from keras.layers import LSTM, Embedding, Dense, TimeDistributed, Dropout, Bidirectional
import keras as k
from keras_contrib.layers import CRF


# input = Input(shape=(140,))
# input = Input(shape=(len(X_train),max_len))
input = Input(shape=(max_len,))

# word_embedding_size = 150
word_embedding_size = 150
n_words = len(token_ids)


# Embedding Layer
model = Embedding(input_dim=n_words, output_dim=word_embedding_size, input_length=max_len)(input)
# model = Embedding(input_dim=n_words, output_dim=word_embedding_size, input_length=140)(input)

# BI-LSTM Layer
model = Bidirectional(LSTM(units=word_embedding_size,
                           return_sequences=True,
                           dropout=0.5,
                           recurrent_dropout=0.5,
                           kernel_initializer=k.initializers.he_normal()))(model)
model = LSTM(units=word_embedding_size * 2,
             return_sequences=True,
             dropout=0.5,
             recurrent_dropout=0.5,
             kernel_initializer=k.initializers.he_normal())(model)

# TimeDistributed Layer
model = TimeDistributed(Dense(n_tags, activation="relu"))(model)

# CRF Layer
crf = CRF(n_tags)

out = crf(model)  # output
model = Model(input, out)

# In[]

# model compile and fit

from keras.callbacks import ModelCheckpoint
import matplotlib.pyplot as plt

# Optimiser
adam = k.optimizers.Adam(lr=0.0005, beta_1=0.9, beta_2=0.999)

# Compile model
model.compile(optimizer=adam, loss=crf.loss_function, metrics=[crf.accuracy, 'accuracy'])

model.summary()

# Saving the best model only
filepath = "ner-bi-lstm-td-model-{val_accuracy:.2f}.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='val_accuracy', verbose=1, save_best_only=True, mode='max')
callbacks_list = [checkpoint]

# Fit the best model
history = model.fit(X_train, y_train, batch_size=256, epochs=10, validation_split=0.1, verbose=1,
                    callbacks=callbacks_list)

and here the code for the intent detection NN

#CNN architecture    

from __future__ import print_function
import keras
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras import backend as K
from keras import layers

batch_size = 128

epochs = 12
if nn_architecture == 'CNN':
    model_CNN = Sequential()
    e = Embedding(vocab_size, 300, weights=[embedding_matrix], input_length=max_length, trainable=False)
    model_CNN.add(e)
    model_CNN.add(Dropout(0.2))
    # we add a Convolution1D, which will learn filters
    # word group filters of size filter_length:
    filters = 50
    kernel_size = 3    
    hidden_dims = 250
    model_CNN.add(layers.Conv1D(filters,
                     kernel_size,
                     padding='valid',
                     activation='relu',
                     strides=1))
    # we use max pooling:
    model_CNN.add(layers.GlobalMaxPooling1D())

    # We add a vanilla hidden layer:
    model_CNN.add(Dense(hidden_dims))
    model_CNN.add(Dropout(0.2))
    model_CNN.add(layers.Activation('relu'))

    # We project onto a single unit output layer, and squash it with a sigmoid:
    model_CNN.add(Dense(nbClasses)) # no_of_categories
    model_CNN.add(layers.Activation('sigmoid'))

    model_CNN.compile(loss='binary_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])
    history_CNN =model_CNN.fit(X_train, Y_train_c,
              batch_size=batch_size,
              epochs=epochs,
              #validation_split=0.2
              )
    # Epoch 12/12
    # 38771/38771 [==============================] - 11s 276us/step - 
    #loss: 0.0046 - accuracy: 0.9985

What I would like to is to merge this two architectures in order to obtain this

Please help me ... thanks in advance

score 0 · Answer 1 · answered Apr 29 '21 at 13:54

It's hardly a research problem nowadays, but nevertheless... Here is article on github.io that does exactly what you'd want - combines intent classification and slot filling tasks in one model. https://chsasank.github.io/spoken-language-understanding.html

Since links only answers are frowned upon, here is model architecture as well - I modified it slightly, but in general here is Keras code:

def build_model(self):
    main_input = Input(shape=(15), dtype='int32', name='main_input')
    x = Embedding(output_dim=self.embedding_dimension, input_dim=n_vocab, input_length=15)(main_input)
    x = Convolution1D(64, 5, padding='same', activation='relu')(x)

    if self.dropout_parameter > 0.0:
        x = Dropout(self.dropout_parameter)(x)

    if self.rnn_type is 'GRU':
        rnn = GR    def build_model(self):
    main_input = Input(shape=(15), dtype='int32', name='main_input')
    x = Embedding(output_dim=self.embedding_dimension, input_dim=n_vocab, input_length=15)(main_input)
    x = Convolution1D(64, 5, padding='same', activation='relu')(x)

    if self.dropout_parameter > 0.0:
        x = Dropout(self.dropout_parameter)(x)

    if self.rnn_type is 'GRU':
        rnn = GRU(self.rnn_units, return_sequences=True)

    elif self.rnn_type is 'LSTM':
        rnn = LSTM(self.rnn_units, return_sequences=True)
    else:
        rnn = SimpleRNN(self.rnn_units)

    if self.bidirectional:
        rnn_slot = Bidirectional(rnn)(x)
    else:
        rnn_slot = rnn(x)

    rnn_intent = GRU(self.rnn_units, return_sequences=False)(x)

    if self.maxPooling:
        x = MaxPooling1D(strides=1, padding='same')(x)
        print("Using MaxPooling")
    elif self.averagePooling:
        x = AveragePooling1D(strides=1, padding='same')(x)
        print("Using AveragePooling")
    slot_output = Dense(n_slots, activation='softmax', name='slot_output')(rnn_slot)
    intent_output = Dense(n_classes, activation='softmax', name='intent_output')(rnn_intent)
    model = kerasModel(inputs=[main_input], outputs=[intent_output, slot_output])

    # rmsprop is recommended for RNNs https://stats.stackexchange.com/questions/315743/rmsprop-and-adam-vs-sgd
    model.compile(optimizer='rmsprop', loss={'intent_output': 'categorical_crossentropy', 'slot_output': 'categorical_crossentropy'}, metrics='accuracy')
    model.summary()
    self.model = model

    return 0U(self.rnn_units, return_sequences=True)

    elif self.rnn_type is 'LSTM':
        rnn = LSTM(self.rnn_units, return_sequences=True)
    else:
        rnn = SimpleRNN(self.rnn_units)

    if self.bidirectional:
        rnn_slot = Bidirectional(rnn)(x)
    else:
        rnn_slot = rnn(x)

    rnn_intent = GRU(self.rnn_units, return_sequences=False)(x)

    if self.maxPooling:
        x = MaxPooling1D(strides=1, padding='same')(x)
        print("Using MaxPooling")
    elif self.averagePooling:
        x = AveragePooling1D(strides=1, padding='same')(x)
        print("Using AveragePooling")
    slot_output = Dense(n_slots, activation='softmax', name='slot_output')(rnn_slot)
    intent_output = Dense(n_classes, activation='softmax', name='intent_output')(rnn_intent)
    model = kerasModel(inputs=[main_input], outputs=[intent_output, slot_output])

    # rmsprop is recommended for RNNs https://stats.stackexchange.com/questions/315743/rmsprop-and-adam-vs-sgd
    model.compile(optimizer='rmsprop', loss={'intent_output': 'categorical_crossentropy', 'slot_output': 'categorical_crossentropy'}, metrics='accuracy')
    model.summary()
    self.model = model

    return 0

slot-filling intent-detection joint model

1 Answers1