I am using neural network for a regression task. My input is an gray image whose size is 100x70x1.
The gray area has a unique value 60.
The input will go through a preprocessing layer, which multiply 1./255 on every pixel value.
My output is just three double number: [0.87077969, 0.98989031, 0.98888382]
I used ResNet152 model as shown below:
class Bottleneck(tf.keras.Model):
expansion = 4
def __init__(self, in_channels, out_channels, strides=1):
super(Bottleneck, self).__init__()
self.conv1 = tf.keras.layers.Conv2D(out_channels, 1, 1, use_bias=False)
self.bn1 = tf.keras.layers.BatchNormalization()
self.conv2 = tf.keras.layers.Conv2D(out_channels, 3, strides, padding="same", use_bias=False)
self.bn2 = tf.keras.layers.BatchNormalization()
self.conv3 = tf.keras.layers.Conv2D(out_channels*self.expansion, 1, 1, use_bias=False)
self.bn3 = tf.keras.layers.BatchNormalization()
if strides != 1 or in_channels != self.expansion * out_channels:
self.shortcut = tf.keras.Sequential([
tf.keras.layers.Conv2D(self.expansion*out_channels, kernel_size=1,
strides=strides, use_bias=False),
tf.keras.layers.BatchNormalization()]
)
else:
self.shortcut = lambda x,_: x
def call(self, x, training=False):
out = tf.nn.elu(self.bn1(self.conv1(x), training))
out = tf.nn.elu(self.bn2(self.conv2(out), training))
out = self.bn3(self.conv3(out), training)
out += self.shortcut(x, training)
return tf.nn.elu(out)
class ResNet(tf.keras.Model):
def __init__(self, block, num_blocks):
super(ResNet, self).__init__()
self.in_channels = 64
self.conv1 = tf.keras.layers.Conv2D(64, 7, 2, padding="same", use_bias=False) # 60x60
self.bn1 = tf.keras.layers.BatchNormalization()
self.pool1 = tf.keras.layers.MaxPool2D(pool_size=(3, 3), strides=2, padding='same') # 30x30
self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
self.avg_pool2d = tf.keras.layers.GlobalAveragePooling2D()
self.flatten = tf.keras.layers.Flatten()
def _make_layer(self, block, out_channels, num_blocks, stride):
strides = [stride] + [1] * (num_blocks - 1)
layers = []
for stride in strides:
layers.append(block(self.in_channels, out_channels, stride))
self.in_channels = out_channels * block.expansion
return tf.keras.Sequential(layers)
def call(self, x, training=False):
out = self.pool1(tf.nn.elu(self.bn1(self.conv1(x), training)))
out = self.layer1(out, training=training)
out = self.layer2(out, training=training)
out = self.layer3(out, training=training)
out = self.layer4(out, training=training)
# For classification
out = self.flatten(out)
# out = tf.keras.layers.Reshape((out.shape[-1],))(out)
#out = self.linear(out)
return out
def model(self):
x = tf.keras.layers.Input(shape=(100,70,1))
return tf.keras.Model(inputs=[x], outputs=self.call(x))
def ResNet152():
return ResNet(Bottleneck, [3,8,36,3])
I used elu as activation function and changed the GlobalAveragePooling layer into flatten layer at the end of ResNet.
Before output I stack two Dense layer(2048 units and 3 units) on top of the ResNet model.
For training I used adam optimizer and inital learning rate is 1e-4, which will decreasing by factor 10 when the val_loss not decreasing for 3 epoch.
The loss is just mse error.
After early stopping while learning rate is 1e-8, the mse loss is still very high:8.6225
The prediction is [2.92318237, 5.53124916, 3.00686643] which is far away from the ground truth: [0.87077969, 0.98989031, 0.98888382]
I don't know why such a deep network cannot overfit such a sample.
Is this the reason that my input image has too few information? Could someone help me?