Decoder in Convolutional LSTM

Question

I am trying to implement the convolution lstm network based on this paper: https://arxiv.org/abs/1506.04214, I have implemented the encoder like this:

def new_convLSTM_layer(input,              # The previous layer.
                       num_input_channels, # Num. channels in prev. layer.
                       filter_size,        # Width and height of each filter.
                       num_filters,        # Number of filters.
                       img_size,           #size of image (width or height)
                       hid_st,             #Hidden state of previous level  
                       Cell,
                       use_pooling):            #Cell

    # Shape of the filter-weights for the convolution.

    shape = [filter_size, filter_size, num_input_channels, num_filters]
    shape_2 = [filter_size, filter_size, num_filters, num_filters]

    # Create new weights aka. filters with the given shape.
#filters or weights for input gate
    W_xi = new_weights(shape=shape)
    W_hi = new_weights(shape=shape_2)
#filters or weights for forget gate
    W_xf = new_weights(shape=shape)
    W_hf = new_weights(shape=shape_2)
#filters or weights for cell    
    W_xc = new_weights(shape=shape)
    W_hc = new_weights(shape=shape_2)
#filters or weights for output gate
    W_xo = new_weights(shape=shape)
    W_ho = new_weights(shape=shape_2)

#weights from the cell to gate vectors
#these weights should be the size of input
#The weight matrices from the cell to gate vectors (e.g. Wci) are diagonal, so element m in each gate vector only
#receives input from element m of the cell vector.
#one for each filter
    W_ci = new_weights_dia(num_filters=num_filters, img_size=img_size)
    W_cf = new_weights_dia(num_filters=num_filters, img_size=img_size)
    W_co = new_weights_dia(num_filters=num_filters, img_size=img_size)

    # Create new biases, one for each filter.
    biases_input = new_biases(length=num_filters)
    biases_forget = new_biases(length=num_filters)
    biases_cell = new_biases(length=num_filters)
    biases_output = new_biases(length=num_filters)




#weights * x(t)  

    Wxi_x = tf.nn.conv2d(input=input,
                         filter=W_xi,
                         strides=[1, 1, 1, 1],
                         padding='SAME')
    Wxf_x = tf.nn.conv2d(input=input,
                         filter=W_xf,
                         strides=[1, 1, 1, 1],
                         padding='SAME')
    Wxc_x = tf.nn.conv2d(input=input,
                         filter=W_xc,
                         strides=[1, 1, 1, 1],
                         padding='SAME')
    Wxo_x = tf.nn.conv2d(input=input,
                         filter=W_xo,
                         strides=[1, 1, 1, 1],
                         padding='SAME')
#weight * h(t-1)  

    Whi_h = tf.nn.conv2d(input=hid_st,
                         filter=W_hi,
                         strides=[1, 1, 1, 1],
                         padding='SAME')
    Whf_h = tf.nn.conv2d(input=hid_st,
                         filter=W_hf,
                         strides=[1, 1, 1, 1],
                         padding='SAME')
    Whc_h = tf.nn.conv2d(input=hid_st,
                         filter=W_hc,
                         strides=[1, 1, 1, 1],
                         padding='SAME')
    Who_h = tf.nn.conv2d(input=hid_st,
                         filter=W_ho,
                         strides=[1, 1, 1, 1],
                         padding='SAME')

    #Hadamard products
    Wci_c = tf.multiply(W_ci,Cell)
    Wcf_c = tf.multiply(W_cf,Cell)

    #Gates
    I = tf.sigmoid(Wxi_x + Whi_h + Wci_c + biases_input)
    F = tf.sigmoid(Wxf_x + Whf_h + Wcf_c + biases_forget)
    Cell = (tf.multiply(F, Cell) + tf.multiply(I, tf.tanh(Wxc_x + Whc_h + biases_cell)))
    Wco_c = tf.multiply(W_co,Cell)
    O = tf.sigmoid(Wxo_x + Who_h + Wco_c + biases_output)
    hid_st = tf.multiply(O,tf.tanh(Cell))


    if use_pooling:

        hid_st = tf.nn.max_pool(value=hid_st,
                               ksize=[1, 2, 2, 1],
                               strides=[1, 1, 1, 1],
                               padding='SAME')
        O = tf.nn.max_pool(value=O,
                           ksize=[1, 2, 2, 1],
                           strides=[1, 1, 1, 1],
                           padding='SAME')



    layer = tf.nn.relu(hid_st)
    layer_Output = tf.nn.relu(O)

    return layer, layer_Output, Cell

for decoder what I did is just using the above function and fed a tensor of zeros and hidden state and cell state of encoder (after the last input was read), and after that I fed the predicted output as input of next time. But when I plot the cell state and hidden state of decoder at each time step I see that it does not learn and has the same values as the hidden and cell state of the decoder.

I would be really thankful if someone tell me where I am wrong! This is my decoder:

def deconvLSTM(input,              # Output of the previous layer.
                     num_input_channels, # Num. channels in prev. layer.
                     filter_size,        # Width and height of each filter.
                     num_filters,        # Number of filters.
                     img_size,           #size of image (width or height)
                     hid_st,             #Hidden state of previous level 
                     Cell,               # Cell-state of previous layer     
                     use_pooling): 
    De_INPUT = input
    De_OUTPUT = tf.zeros([0, 64, 64, 16])
    De_Hidden = tf.zeros([0, 64, 64, 16])
    De_CELL = tf.zeros([0, 64, 64, 16])

    De_Hidden = tf.concat([De_Hidden, hid_st], 0)
    De_CELL = tf.concat([De_CELL, Cell], 0)


    for i in range(10):
        deconv_Hidden_State_1, deconv_Output_1, deconv_Cell_1 =  new_convLSTM_layer(input=De_INPUT,                         # The previous layer.
                                                                                num_input_channels=num_input_channels,    # Num. channels in prev. layer.
                                                                                filter_size=filter_size,                        # Width and height of each filter.
                                                                                num_filters=num_filters,                        # Number of filters.
                                                                                img_size=img_size,                           #size of image (width or height)
                                                                                hid_st=hid_st,                             #Hidden state of previous level  
                                                                                Cell=Cell,
                                                                                use_pooling=True)
        De_OUTPUT = tf.concat([De_OUTPUT, deconv_Output_1], 0)
        De_Hidden = tf.concat([De_Hidden, deconv_Hidden_State_1], 0)
        De_CELL = tf.concat([De_CELL, deconv_Cell_1], 0)

        De_INPUT = deconv_Output_1



    return De_OUTPUT, De_Hidden, De_CELL

Decoder in Convolutional LSTM

0 Answers0