I'm new to deep learning. May I ask if the code below uses soft attention or hard attention?
class AttentionBlock(nn.Module):
def __init__(self, f_g, f_l, f_int):
super().__init__()
self.w_g = nn.Sequential(
nn.Conv2d(f_g, f_int,
kernel_size=1, stride=1,
padding=0, bias=True),
nn.BatchNorm2d(f_int)
)
self.w_x = nn.Sequential(
nn.Conv2d(f_l, f_int,
kernel_size=1, stride=1,
padding=0, bias=True),
nn.BatchNorm2d(f_int)
)
self.psi = nn.Sequential(
nn.Conv2d(f_int, 1,
kernel_size=1, stride=1,
padding=0, bias=True),
nn.BatchNorm2d(1),
nn.Sigmoid(),
)
self.relu = nn.ReLU(inplace=True)
def forward(self, g, x):
g1 = self.w_g(g)
x1 = self.w_x(x)
psi = self.relu(g1+x1)
psi = self.psi(psi)
return psi*x
Source: https://www.kaggle.com/code/truthisneverlinear/attention-u-net-pytorch/notebook#Attention-U-Net
I didn't know the differences between soft attention and hard attention.