Implementing self attention in tensorflow Keras with a bit modification ( e.g., residual (add connection)).
I have the following input shape:
myinput: KerasTensor(type_spec=TensorSpec(shape=(None, 8, 6, 64), dtype=tf.float32, name=None), name='multiply/mul:0', description="created by layer 'multiply'")
My goal is to process TensorSpec(shape=(None, 8, 6, 64) (8 time stamps one by one (6 * 64)) through self attention and get self attention feature map for every time stamp and then concatenate it again into output tensor shape (None, 8, 6, 64).
Implemented Code:
import tensorflow as tf
from tensorflow.keras.layers import Permute
def conv1d(channels, ks=1, strides=1, padding='same'):
conv = tf.keras.layers.Conv1D(channels, ks, strides, padding, activation='relu', use_bias=False,
kernel_initializer='HeNormal')
return conv
class my_self_attention(tf.keras.layers.Layer):
def __init__(self, channels):
super(my_self_attention, self).__init__()
self.query = conv1d(channels)
self.key = conv1d(channels)
self.value = conv1d(channels)
self.gamma = tf.compat.v1.get_variable("gamma", [1], initializer=tf.constant_initializer(0.0))
def call(self, x):
x = tf.reshape(x, shape=[-1, x.shape[2], x.shape[3]])
f = self.query(x),
g = self.key(x)
h = self.value(x)
attention_weights = tf.keras.activations.softmax(
tf.matmul(g, Permute((2, 1))(f))) # query multiply with key and then softmax on it
sensor_att_fm = tf.matmul(attention_weights, h)
o = self.gamma * sensor_att_fm + x
# return tf.reshape(o, shape = [-1, 1, x.shape[1], x.shape[2]])
return tf.reshape(o, shape=[-1, 1, x.shape[1], x.shape[2]])
sa = my_self_attention(channels)
refined_fm = tf.concat([sa(tf.expand_dims(my_input[:, t, :, :], 1)) for t in range(my_input.shape[1])], 1)
Getting Following Error
ValueError: Dimension must be 4 but is 3 for '{{node my_self_attention/permute/transpose}} = Transpose[T=DT_FLOAT, Tperm=DT_INT32](my_self_attention/permute/transpose/a, my_self_attention/permute/transpose/perm)' with input shapes: [1,?,6,64], [3].
How should I fix this issue?